You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tcp.h 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. #ifndef _IPXE_TCP_H
  2. #define _IPXE_TCP_H
  3. /** @file
  4. *
  5. * TCP protocol
  6. *
  7. * This file defines the iPXE TCP API.
  8. *
  9. */
  10. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  11. #include <ipxe/tcpip.h>
  12. /**
  13. * A TCP header
  14. */
  15. struct tcp_header {
  16. uint16_t src; /* Source port */
  17. uint16_t dest; /* Destination port */
  18. uint32_t seq; /* Sequence number */
  19. uint32_t ack; /* Acknowledgement number */
  20. uint8_t hlen; /* Header length (4), Reserved (4) */
  21. uint8_t flags; /* Reserved (2), Flags (6) */
  22. uint16_t win; /* Advertised window */
  23. uint16_t csum; /* Checksum */
  24. uint16_t urg; /* Urgent pointer */
  25. } __attribute__ (( packed ));
  26. /** @defgroup tcpopts TCP options
  27. * @{
  28. */
  29. /** End of TCP options list */
  30. #define TCP_OPTION_END 0
  31. /** TCP option pad */
  32. #define TCP_OPTION_NOP 1
  33. /** Generic TCP option */
  34. struct tcp_option {
  35. uint8_t kind;
  36. uint8_t length;
  37. } __attribute__ (( packed ));
  38. /** TCP MSS option */
  39. struct tcp_mss_option {
  40. uint8_t kind;
  41. uint8_t length;
  42. uint16_t mss;
  43. } __attribute__ (( packed ));
  44. /** Code for the TCP MSS option */
  45. #define TCP_OPTION_MSS 2
  46. /** TCP window scale option */
  47. struct tcp_window_scale_option {
  48. uint8_t kind;
  49. uint8_t length;
  50. uint8_t scale;
  51. } __attribute__ (( packed ));
  52. /** Padded TCP window scale option (used for sending) */
  53. struct tcp_window_scale_padded_option {
  54. uint8_t nop;
  55. struct tcp_window_scale_option wsopt;
  56. } __attribute (( packed ));
  57. /** Code for the TCP window scale option */
  58. #define TCP_OPTION_WS 3
  59. /** Advertised TCP window scale
  60. *
  61. * Using a scale factor of 2**9 provides for a maximum window of 32MB,
  62. * which is sufficient to allow Gigabit-speed transfers with a 200ms
  63. * RTT. The minimum advertised window is 512 bytes, which is still
  64. * less than a single packet.
  65. */
  66. #define TCP_RX_WINDOW_SCALE 9
  67. /** TCP selective acknowledgement permitted option */
  68. struct tcp_sack_permitted_option {
  69. uint8_t kind;
  70. uint8_t length;
  71. } __attribute__ (( packed ));
  72. /** Padded TCP selective acknowledgement permitted option (used for sending) */
  73. struct tcp_sack_permitted_padded_option {
  74. uint8_t nop[2];
  75. struct tcp_sack_permitted_option spopt;
  76. } __attribute__ (( packed ));
  77. /** Code for the TCP selective acknowledgement permitted option */
  78. #define TCP_OPTION_SACK_PERMITTED 4
  79. /** TCP selective acknowledgement option */
  80. struct tcp_sack_option {
  81. uint8_t kind;
  82. uint8_t length;
  83. } __attribute__ (( packed ));
  84. /** TCP selective acknowledgement block */
  85. struct tcp_sack_block {
  86. uint32_t left;
  87. uint32_t right;
  88. } __attribute__ (( packed ));
  89. /** Maximum number of selective acknowledgement blocks
  90. *
  91. * This allows for the presence of the TCP timestamp option.
  92. */
  93. #define TCP_SACK_MAX 3
  94. /** Padded TCP selective acknowledgement option (used for sending) */
  95. struct tcp_sack_padded_option {
  96. uint8_t nop[2];
  97. struct tcp_sack_option sackopt;
  98. } __attribute__ (( packed ));
  99. /** Code for the TCP selective acknowledgement option */
  100. #define TCP_OPTION_SACK 5
  101. /** TCP timestamp option */
  102. struct tcp_timestamp_option {
  103. uint8_t kind;
  104. uint8_t length;
  105. uint32_t tsval;
  106. uint32_t tsecr;
  107. } __attribute__ (( packed ));
  108. /** Padded TCP timestamp option (used for sending) */
  109. struct tcp_timestamp_padded_option {
  110. uint8_t nop[2];
  111. struct tcp_timestamp_option tsopt;
  112. } __attribute__ (( packed ));
  113. /** Code for the TCP timestamp option */
  114. #define TCP_OPTION_TS 8
  115. /** Parsed TCP options */
  116. struct tcp_options {
  117. /** Window scale option, if present */
  118. const struct tcp_window_scale_option *wsopt;
  119. /** SACK permitted option, if present */
  120. const struct tcp_sack_permitted_option *spopt;
  121. /** Timestamp option, if present */
  122. const struct tcp_timestamp_option *tsopt;
  123. };
  124. /** @} */
  125. /*
  126. * TCP flags
  127. */
  128. #define TCP_CWR 0x80
  129. #define TCP_ECE 0x40
  130. #define TCP_URG 0x20
  131. #define TCP_ACK 0x10
  132. #define TCP_PSH 0x08
  133. #define TCP_RST 0x04
  134. #define TCP_SYN 0x02
  135. #define TCP_FIN 0x01
  136. /**
  137. * @defgroup tcpstates TCP states
  138. *
  139. * The TCP state is defined by a combination of the flags that have
  140. * been sent to the peer, the flags that have been acknowledged by the
  141. * peer, and the flags that have been received from the peer.
  142. *
  143. * @{
  144. */
  145. /** TCP flags that have been sent in outgoing packets */
  146. #define TCP_STATE_SENT(flags) ( (flags) << 0 )
  147. #define TCP_FLAGS_SENT(state) ( ( (state) >> 0 ) & 0xff )
  148. /** TCP flags that have been acknowledged by the peer
  149. *
  150. * Note that this applies only to SYN and FIN.
  151. */
  152. #define TCP_STATE_ACKED(flags) ( (flags) << 8 )
  153. #define TCP_FLAGS_ACKED(state) ( ( (state) >> 8 ) & 0xff )
  154. /** TCP flags that have been received from the peer
  155. *
  156. * Note that this applies only to SYN and FIN, and that once SYN has
  157. * been received, we should always be sending ACK.
  158. */
  159. #define TCP_STATE_RCVD(flags) ( (flags) << 16 )
  160. #define TCP_FLAGS_RCVD(state) ( ( (state) >> 16 ) & 0xff )
  161. /** TCP flags that are currently being sent in outgoing packets */
  162. #define TCP_FLAGS_SENDING(state) \
  163. ( TCP_FLAGS_SENT ( state ) & ~TCP_FLAGS_ACKED ( state ) )
  164. /** CLOSED
  165. *
  166. * The connection has not yet been used for anything.
  167. */
  168. #define TCP_CLOSED TCP_RST
  169. /** LISTEN
  170. *
  171. * Not currently used as a state; we have no support for listening
  172. * connections. Given a unique value to avoid compiler warnings.
  173. */
  174. #define TCP_LISTEN 0
  175. /** SYN_SENT
  176. *
  177. * SYN has been sent, nothing has yet been received or acknowledged.
  178. */
  179. #define TCP_SYN_SENT ( TCP_STATE_SENT ( TCP_SYN ) )
  180. /** SYN_RCVD
  181. *
  182. * SYN has been sent but not acknowledged, SYN has been received.
  183. */
  184. #define TCP_SYN_RCVD ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
  185. TCP_STATE_RCVD ( TCP_SYN ) )
  186. /** ESTABLISHED
  187. *
  188. * SYN has been sent and acknowledged, SYN has been received.
  189. */
  190. #define TCP_ESTABLISHED ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
  191. TCP_STATE_ACKED ( TCP_SYN ) | \
  192. TCP_STATE_RCVD ( TCP_SYN ) )
  193. /** FIN_WAIT_1
  194. *
  195. * SYN has been sent and acknowledged, SYN has been received, FIN has
  196. * been sent but not acknowledged, FIN has not been received.
  197. *
  198. * RFC 793 shows that we can enter FIN_WAIT_1 without have had SYN
  199. * acknowledged, i.e. if the application closes the connection after
  200. * sending and receiving SYN, but before having had SYN acknowledged.
  201. * However, we have to *pretend* that SYN has been acknowledged
  202. * anyway, otherwise we end up sending SYN and FIN in the same
  203. * sequence number slot. Therefore, when we transition from SYN_RCVD
  204. * to FIN_WAIT_1, we have to remember to set TCP_STATE_ACKED(TCP_SYN)
  205. * and increment our sequence number.
  206. */
  207. #define TCP_FIN_WAIT_1 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
  208. TCP_STATE_ACKED ( TCP_SYN ) | \
  209. TCP_STATE_RCVD ( TCP_SYN ) )
  210. /** FIN_WAIT_2
  211. *
  212. * SYN has been sent and acknowledged, SYN has been received, FIN has
  213. * been sent and acknowledged, FIN ha not been received.
  214. */
  215. #define TCP_FIN_WAIT_2 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
  216. TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
  217. TCP_STATE_RCVD ( TCP_SYN ) )
  218. /** CLOSING / LAST_ACK
  219. *
  220. * SYN has been sent and acknowledged, SYN has been received, FIN has
  221. * been sent but not acknowledged, FIN has been received.
  222. *
  223. * This state actually encompasses both CLOSING and LAST_ACK; they are
  224. * identical with the definition of state that we use. I don't
  225. * *believe* that they need to be distinguished.
  226. */
  227. #define TCP_CLOSING_OR_LAST_ACK \
  228. ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
  229. TCP_STATE_ACKED ( TCP_SYN ) | \
  230. TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
  231. /** TIME_WAIT
  232. *
  233. * SYN has been sent and acknowledged, SYN has been received, FIN has
  234. * been sent and acknowledged, FIN has been received.
  235. */
  236. #define TCP_TIME_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
  237. TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
  238. TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
  239. /** CLOSE_WAIT
  240. *
  241. * SYN has been sent and acknowledged, SYN has been received, FIN has
  242. * been received.
  243. */
  244. #define TCP_CLOSE_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
  245. TCP_STATE_ACKED ( TCP_SYN ) | \
  246. TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
  247. /** Can send data in current state
  248. *
  249. * We can send data if and only if we have had our SYN acked and we
  250. * have not yet sent our FIN.
  251. */
  252. #define TCP_CAN_SEND_DATA(state) \
  253. ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
  254. TCP_STATE_SENT ( TCP_FIN ) ) ) \
  255. == TCP_STATE_ACKED ( TCP_SYN ) )
  256. /** Have ever been fully established
  257. *
  258. * We have been fully established if we have both received a SYN and
  259. * had our own SYN acked.
  260. */
  261. #define TCP_HAS_BEEN_ESTABLISHED(state) \
  262. ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
  263. TCP_STATE_RCVD ( TCP_SYN ) ) ) \
  264. == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )
  265. /** Have closed gracefully
  266. *
  267. * We have closed gracefully if we have both received a FIN and had
  268. * our own FIN acked.
  269. */
  270. #define TCP_CLOSED_GRACEFULLY(state) \
  271. ( ( (state) & ( TCP_STATE_ACKED ( TCP_FIN ) | \
  272. TCP_STATE_RCVD ( TCP_FIN ) ) ) \
  273. == ( TCP_STATE_ACKED ( TCP_FIN ) | TCP_STATE_RCVD ( TCP_FIN ) ) )
  274. /** @} */
  275. /** Mask for TCP header length field */
  276. #define TCP_MASK_HLEN 0xf0
  277. /** Smallest port number on which a TCP connection can listen */
  278. #define TCP_MIN_PORT 1
  279. /**
  280. * Maxmimum advertised TCP window size
  281. *
  282. * The maximum bandwidth on any link is limited by
  283. *
  284. * max_bandwidth * round_trip_time = tcp_window
  285. *
  286. * Some rough expectations for achievable bandwidths over various
  287. * links are:
  288. *
  289. * a) Gigabit LAN: expected bandwidth 125MB/s, typical RTT 0.5ms,
  290. * minimum required window 64kB
  291. *
  292. * b) Home Internet connection: expected bandwidth 10MB/s, typical
  293. * RTT 25ms, minimum required window 256kB
  294. *
  295. * c) WAN: expected bandwidth 2MB/s, typical RTT 100ms, minimum
  296. * required window 200kB.
  297. *
  298. * The maximum possible value for the TCP window size is 1GB (using
  299. * the maximum window scale of 2**14). However, it is advisable to
  300. * keep the window size as small as possible (without limiting
  301. * bandwidth), since in the event of a lost packet the window size
  302. * represents the maximum amount that will need to be retransmitted.
  303. *
  304. * We therefore choose a maximum window size of 256kB.
  305. */
  306. #define TCP_MAX_WINDOW_SIZE ( 256 * 1024 )
  307. /**
  308. * Path MTU
  309. *
  310. * IPv6 requires all data link layers to support a datagram size of
  311. * 1280 bytes. We choose to use this as our maximum transmitted
  312. * datagram size, on the assumption that any practical link layer we
  313. * encounter will allow this size. This is a very conservative
  314. * assumption in practice, but the impact of making such a
  315. * conservative assumption is insignificant since the amount of data
  316. * that we transmit (rather than receive) is negligible.
  317. *
  318. * We allow space within this 1280 bytes for an IPv6 header, a TCP
  319. * header, and a (padded) TCP timestamp option.
  320. */
  321. #define TCP_PATH_MTU \
  322. ( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ )
  323. /** TCP maximum segment lifetime
  324. *
  325. * Currently set to 2 minutes, as per RFC 793.
  326. */
  327. #define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )
  328. /**
  329. * TCP keepalive period
  330. *
  331. * We send keepalive ACKs after this period of inactivity has elapsed
  332. * on an established connection.
  333. */
  334. #define TCP_KEEPALIVE_DELAY ( 15 * TICKS_PER_SEC )
  335. /**
  336. * TCP maximum header length
  337. *
  338. */
  339. #define TCP_MAX_HEADER_LEN \
  340. ( MAX_LL_NET_HEADER_LEN + \
  341. sizeof ( struct tcp_header ) + \
  342. sizeof ( struct tcp_mss_option ) + \
  343. sizeof ( struct tcp_window_scale_padded_option ) + \
  344. sizeof ( struct tcp_timestamp_padded_option ) )
  345. /**
  346. * Compare TCP sequence numbers
  347. *
  348. * @v seq1 Sequence number 1
  349. * @v seq2 Sequence number 2
  350. * @ret diff Sequence difference
  351. *
  352. * Analogous to memcmp(), returns an integer less than, equal to, or
  353. * greater than zero if @c seq1 is found, respectively, to be before,
  354. * equal to, or after @c seq2.
  355. */
  356. static inline __attribute__ (( always_inline )) int32_t
  357. tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
  358. return ( ( int32_t ) ( seq1 - seq2 ) );
  359. }
  360. /**
  361. * Check if TCP sequence number lies within window
  362. *
  363. * @v seq Sequence number
  364. * @v start Start of window
  365. * @v len Length of window
  366. * @ret in_window Sequence number is within window
  367. */
  368. static inline int tcp_in_window ( uint32_t seq, uint32_t start,
  369. uint32_t len ) {
  370. return ( ( seq - start ) < len );
  371. }
  372. /** TCP finish wait time
  373. *
  374. * Currently set to one second, since we should not allow a slowly
  375. * responding server to substantially delay a call to shutdown().
  376. */
  377. #define TCP_FINISH_TIMEOUT ( 1 * TICKS_PER_SEC )
  378. extern struct tcpip_protocol tcp_protocol __tcpip_protocol;
  379. #endif /* _IPXE_TCP_H */