You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tcp.c 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987
  1. #include <string.h>
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4. #include <assert.h>
  5. #include <errno.h>
  6. #include <byteswap.h>
  7. #include <timer.h>
  8. #include <gpxe/pkbuff.h>
  9. #include <gpxe/malloc.h>
  10. #include <gpxe/retry.h>
  11. #include <gpxe/tcpip.h>
  12. #include <gpxe/tcp.h>
  13. /** @file
  14. *
  15. * TCP protocol
  16. *
  17. */
  18. static void tcp_expired ( struct retry_timer *timer, int over );
  19. static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send );
  20. /**
  21. * A TCP connection
  22. *
  23. * This data structure represents the internal state of a TCP
  24. * connection. It is kept separate from @c struct @c tcp_application
  25. * because the internal state is still required for some time after
  26. * the application closes the connection.
  27. */
  28. struct tcp_connection {
  29. /** List of TCP connections */
  30. struct list_head list;
  31. /** The associated TCP application, if any */
  32. struct tcp_application *app;
  33. /** Remote socket address */
  34. struct sockaddr_tcpip peer;
  35. /** Local port, in network byte order */
  36. uint16_t local_port;
  37. /** Current TCP state */
  38. unsigned int tcp_state;
  39. /** Previous TCP state
  40. *
  41. * Maintained only for debug messages
  42. */
  43. unsigned int prev_tcp_state;
  44. /** Current sequence number
  45. *
  46. * Equivalent to SND.UNA in RFC 793 terminology.
  47. */
  48. uint32_t snd_seq;
  49. /** Unacknowledged sequence count
  50. *
  51. * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology.
  52. */
  53. uint32_t snd_sent;
  54. /** Send window
  55. *
  56. * Equivalent to SND.WND in RFC 793 terminology
  57. */
  58. uint32_t snd_win;
  59. /** Current acknowledgement number
  60. *
  61. * Equivalent to RCV.NXT in RFC 793 terminology.
  62. */
  63. uint32_t rcv_ack;
  64. /** Transmit packet buffer
  65. *
  66. * This buffer is allocated prior to calling the application's
  67. * senddata() method, to provide temporary storage space.
  68. */
  69. struct pk_buff *tx_pkb;
  70. /** Retransmission timer */
  71. struct retry_timer timer;
  72. };
  73. /**
  74. * List of registered TCP connections
  75. */
  76. static LIST_HEAD ( tcp_conns );
  77. /**
  78. * Name TCP state
  79. *
  80. * @v state TCP state
  81. * @ret name Name of TCP state
  82. */
  83. static inline __attribute__ (( always_inline )) const char *
  84. tcp_state ( int state ) {
  85. switch ( state ) {
  86. case TCP_CLOSED: return "CLOSED";
  87. case TCP_LISTEN: return "LISTEN";
  88. case TCP_SYN_SENT: return "SYN_SENT";
  89. case TCP_SYN_RCVD: return "SYN_RCVD";
  90. case TCP_ESTABLISHED: return "ESTABLISHED";
  91. case TCP_FIN_WAIT_1: return "FIN_WAIT_1";
  92. case TCP_FIN_WAIT_2: return "FIN_WAIT_2";
  93. case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK";
  94. case TCP_TIME_WAIT: return "TIME_WAIT";
  95. case TCP_CLOSE_WAIT: return "CLOSE_WAIT";
  96. default: return "INVALID";
  97. }
  98. }
  99. /**
  100. * Dump TCP state transition
  101. *
  102. * @v conn TCP connection
  103. */
  104. static inline __attribute__ (( always_inline )) void
  105. tcp_dump_state ( struct tcp_connection *conn ) {
  106. if ( conn->tcp_state != conn->prev_tcp_state ) {
  107. DBGC ( conn, "TCP %p transitioned from %s to %s\n", conn,
  108. tcp_state ( conn->prev_tcp_state ),
  109. tcp_state ( conn->tcp_state ) );
  110. }
  111. conn->prev_tcp_state = conn->tcp_state;
  112. }
  113. /**
  114. * Dump TCP flags
  115. *
  116. * @v flags TCP flags
  117. */
  118. static inline __attribute__ (( always_inline )) void
  119. tcp_dump_flags ( struct tcp_connection *conn, unsigned int flags ) {
  120. if ( flags & TCP_RST )
  121. DBGC ( conn, " RST" );
  122. if ( flags & TCP_SYN )
  123. DBGC ( conn, " SYN" );
  124. if ( flags & TCP_PSH )
  125. DBGC ( conn, " PSH" );
  126. if ( flags & TCP_FIN )
  127. DBGC ( conn, " FIN" );
  128. if ( flags & TCP_ACK )
  129. DBGC ( conn, " ACK" );
  130. }
  131. /**
  132. * Allocate TCP connection
  133. *
  134. * @ret conn TCP connection, or NULL
  135. *
  136. * Allocates TCP connection and adds it to the TCP connection list.
  137. */
  138. static struct tcp_connection * alloc_tcp ( void ) {
  139. struct tcp_connection *conn;
  140. conn = malloc ( sizeof ( *conn ) );
  141. if ( conn ) {
  142. DBGC ( conn, "TCP %p allocated\n", conn );
  143. memset ( conn, 0, sizeof ( *conn ) );
  144. conn->tcp_state = conn->prev_tcp_state = TCP_CLOSED;
  145. conn->snd_seq = random();
  146. conn->timer.expired = tcp_expired;
  147. list_add ( &conn->list, &tcp_conns );
  148. }
  149. return conn;
  150. }
  151. /**
  152. * Free TCP connection
  153. *
  154. * @v conn TCP connection
  155. *
  156. * Removes connection from TCP connection list and frees the data
  157. * structure.
  158. */
  159. static void free_tcp ( struct tcp_connection *conn ) {
  160. assert ( conn );
  161. assert ( conn->tcp_state == TCP_CLOSED );
  162. assert ( conn->app == NULL );
  163. stop_timer ( &conn->timer );
  164. list_del ( &conn->list );
  165. free ( conn );
  166. DBGC ( conn, "TCP %p freed\n", conn );
  167. }
  168. /**
  169. * Associate TCP connection with application
  170. *
  171. * @v conn TCP connection
  172. * @v app TCP application
  173. */
  174. static void tcp_associate ( struct tcp_connection *conn,
  175. struct tcp_application *app ) {
  176. assert ( conn->app == NULL );
  177. assert ( app->conn == NULL );
  178. conn->app = app;
  179. app->conn = conn;
  180. DBGC ( conn, "TCP %p associated with application %p\n", conn, app );
  181. }
  182. /**
  183. * Disassociate TCP connection from application
  184. *
  185. * @v conn TCP connection
  186. */
  187. static void tcp_disassociate ( struct tcp_connection *conn ) {
  188. struct tcp_application *app = conn->app;
  189. if ( app ) {
  190. assert ( app->conn == conn );
  191. conn->app = NULL;
  192. app->conn = NULL;
  193. DBGC ( conn, "TCP %p disassociated from application %p\n",
  194. conn, app );
  195. }
  196. }
  197. /**
  198. * Abort TCP connection
  199. *
  200. * @v conn TCP connection
  201. * @v send_rst Send a RST after closing
  202. * @v rc Reason code
  203. */
  204. static void tcp_abort ( struct tcp_connection *conn, int send_rst, int rc ) {
  205. struct tcp_application *app = conn->app;
  206. /* Transition to CLOSED */
  207. conn->tcp_state = TCP_CLOSED;
  208. tcp_dump_state ( conn );
  209. /* Send RST if requested to do so */
  210. if ( send_rst )
  211. tcp_senddata_conn ( conn, 1 );
  212. /* Break association between application and connection */
  213. tcp_disassociate ( conn );
  214. /* Free the connection */
  215. free_tcp ( conn );
  216. /* Notify application */
  217. if ( app && app->tcp_op->closed )
  218. app->tcp_op->closed ( app, rc );
  219. }
  220. /**
  221. * Transmit any outstanding data
  222. *
  223. * @v conn TCP connection
  224. * @v force_send Force sending of packet
  225. *
  226. * Transmits any outstanding data on the connection. If the
  227. * connection is in a connected state, the application's senddata()
  228. * method will be called to generate the data payload, if any.
  229. *
  230. * Note that even if an error is returned, the retransmission timer
  231. * will have been started if necessary, and so the stack will
  232. * eventually attempt to retransmit the failed packet.
  233. */
  234. static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
  235. struct tcp_application *app = conn->app;
  236. struct pk_buff *pkb;
  237. struct tcp_header *tcphdr;
  238. struct tcp_mss_option *mssopt;
  239. void *payload;
  240. unsigned int flags;
  241. size_t len;
  242. size_t seq_len;
  243. size_t window;
  244. int rc;
  245. /* Allocate space to the TX buffer */
  246. pkb = alloc_pkb ( MAX_PKB_LEN );
  247. if ( ! pkb ) {
  248. DBGC ( conn, "TCP %p could not allocate data buffer\n", conn );
  249. /* Start the retry timer so that we attempt to
  250. * retransmit this packet later. (Start it
  251. * unconditionally, since without a packet buffer we
  252. * can't call the senddata() callback, and so may not
  253. * be able to tell whether or not we have something
  254. * that actually needs to be retransmitted).
  255. */
  256. start_timer ( &conn->timer );
  257. return -ENOMEM;
  258. }
  259. pkb_reserve ( pkb, MAX_HDR_LEN );
  260. /* If we are connected, call the senddata() method, which may
  261. * call tcp_send() to queue up a data payload.
  262. */
  263. if ( TCP_CAN_SEND_DATA ( conn->tcp_state ) &&
  264. app && app->tcp_op->senddata ) {
  265. conn->tx_pkb = pkb;
  266. app->tcp_op->senddata ( app, pkb->data, pkb_tailroom ( pkb ) );
  267. conn->tx_pkb = NULL;
  268. }
  269. /* Truncate payload length to fit transmit window */
  270. len = pkb_len ( pkb );
  271. if ( len > conn->snd_win )
  272. len = conn->snd_win;
  273. /* Calculate amount of sequence space that this transmission
  274. * consumes. (SYN or FIN consume one byte, and we can never
  275. * send both at once).
  276. */
  277. seq_len = len;
  278. flags = TCP_FLAGS_SENDING ( conn->tcp_state );
  279. assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) );
  280. if ( flags & ( TCP_SYN | TCP_FIN ) )
  281. seq_len++;
  282. conn->snd_sent = seq_len;
  283. /* If we have nothing to transmit, drop the packet */
  284. if ( ( seq_len == 0 ) && ! force_send ) {
  285. free_pkb ( pkb );
  286. return 0;
  287. }
  288. /* If we are transmitting anything that requires
  289. * acknowledgement (i.e. consumes sequence space), start the
  290. * retransmission timer.
  291. */
  292. if ( seq_len )
  293. start_timer ( &conn->timer );
  294. /* Estimate window size */
  295. window = freemem;
  296. if ( window > TCP_MAX_WINDOW_SIZE )
  297. window = TCP_MAX_WINDOW_SIZE;
  298. window &= ~0x03; /* Keep everything dword-aligned */
  299. /* Fill up the TCP header */
  300. payload = pkb->data;
  301. if ( flags & TCP_SYN ) {
  302. mssopt = pkb_push ( pkb, sizeof ( *mssopt ) );
  303. mssopt->kind = TCP_OPTION_MSS;
  304. mssopt->length = sizeof ( *mssopt );
  305. mssopt->mss = htons ( TCP_MSS );
  306. }
  307. tcphdr = pkb_push ( pkb, sizeof ( *tcphdr ) );
  308. memset ( tcphdr, 0, sizeof ( *tcphdr ) );
  309. tcphdr->src = conn->local_port;
  310. tcphdr->dest = conn->peer.st_port;
  311. tcphdr->seq = htonl ( conn->snd_seq );
  312. tcphdr->ack = htonl ( conn->rcv_ack );
  313. tcphdr->hlen = ( ( payload - pkb->data ) << 2 );
  314. tcphdr->flags = flags;
  315. tcphdr->win = htons ( window );
  316. tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
  317. /* Dump header */
  318. DBGC ( conn, "TCP %p TX %d->%d %08lx..%08lx %08lx %4zd",
  319. conn, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
  320. ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ),
  321. ntohl ( tcphdr->ack ), len );
  322. tcp_dump_flags ( conn, tcphdr->flags );
  323. DBGC ( conn, "\n" );
  324. /* Transmit packet */
  325. rc = tcpip_tx ( pkb, &tcp_protocol, &conn->peer, NULL, &tcphdr->csum );
  326. /* If we got -ENETUNREACH, kill the connection immediately
  327. * because there is no point retrying. This isn't strictly
  328. * necessary (since we will eventually time out anyway), but
  329. * it avoids irritating needless delays. Don't do this for
  330. * RST packets transmitted on connection abort, to avoid a
  331. * potential infinite loop.
  332. */
  333. if ( ( ! ( conn->tcp_state & TCP_STATE_SENT ( TCP_RST ) ) ) &&
  334. ( rc == -ENETUNREACH ) ) {
  335. DBGC ( conn, "TCP %p aborting after TX failed: %s\n",
  336. conn, strerror ( rc ) );
  337. tcp_abort ( conn, 0, rc );
  338. }
  339. return rc;
  340. }
  341. /**
  342. * Transmit any outstanding data
  343. *
  344. * @v conn TCP connection
  345. *
  346. * This function allocates space to the transmit buffer and invokes
  347. * the senddata() callback function, to allow the application to
  348. * transmit new data.
  349. */
  350. int tcp_senddata ( struct tcp_application *app ) {
  351. struct tcp_connection *conn = app->conn;
  352. /* Check connection actually exists */
  353. if ( ! conn ) {
  354. DBG ( "TCP app %p has no connection\n", app );
  355. return -ENOTCONN;
  356. }
  357. return tcp_senddata_conn ( conn, 0 );
  358. }
  359. /**
  360. * Transmit data
  361. *
  362. * @v app TCP application
  363. * @v data Data to be sent
  364. * @v len Length of the data
  365. * @ret rc Return status code
  366. *
  367. * This function queues data to be sent via the TCP connection. It
  368. * can be called only in the context of an application's senddata()
  369. * method.
  370. */
  371. int tcp_send ( struct tcp_application *app, const void *data, size_t len ) {
  372. struct tcp_connection *conn = app->conn;
  373. struct pk_buff *pkb;
  374. /* Check connection actually exists */
  375. if ( ! conn ) {
  376. DBG ( "TCP app %p has no connection\n", app );
  377. return -ENOTCONN;
  378. }
  379. /* Check that we have a packet buffer to fill */
  380. pkb = conn->tx_pkb;
  381. if ( ! pkb ) {
  382. DBG ( "TCP app %p tried to send data outside of the "
  383. "senddata() method\n", app );
  384. return -EINVAL;
  385. }
  386. /* Truncate length to fit packet buffer */
  387. if ( len > pkb_tailroom ( pkb ) )
  388. len = pkb_tailroom ( pkb );
  389. /* Copy payload */
  390. memmove ( pkb_put ( pkb, len ), data, len );
  391. return 0;
  392. }
  393. /**
  394. * Retransmission timer expired
  395. *
  396. * @v timer Retry timer
  397. * @v over Failure indicator
  398. */
  399. static void tcp_expired ( struct retry_timer *timer, int over ) {
  400. struct tcp_connection *conn =
  401. container_of ( timer, struct tcp_connection, timer );
  402. int graceful_close = TCP_CLOSED_GRACEFULLY ( conn->tcp_state );
  403. DBGC ( conn, "TCP %p timer %s in %s\n", conn,
  404. ( over ? "expired" : "fired" ), tcp_state ( conn->tcp_state ) );
  405. assert ( ( conn->tcp_state == TCP_SYN_SENT ) ||
  406. ( conn->tcp_state == TCP_SYN_RCVD ) ||
  407. ( conn->tcp_state == TCP_ESTABLISHED ) ||
  408. ( conn->tcp_state == TCP_FIN_WAIT_1 ) ||
  409. ( conn->tcp_state == TCP_TIME_WAIT ) ||
  410. ( conn->tcp_state == TCP_CLOSE_WAIT ) ||
  411. ( conn->tcp_state == TCP_CLOSING_OR_LAST_ACK ) );
  412. if ( over || graceful_close ) {
  413. /* If we have finally timed out and given up, or if
  414. * this is the result of a graceful close, terminate
  415. * the connection
  416. */
  417. tcp_abort ( conn, 1, -ETIMEDOUT );
  418. } else {
  419. /* Otherwise, retransmit the packet */
  420. tcp_senddata_conn ( conn, 0 );
  421. }
  422. }
  423. /**
  424. * Send RST response to incoming packet
  425. *
  426. * @v in_tcphdr TCP header of incoming packet
  427. * @ret rc Return status code
  428. */
  429. static int tcp_send_reset ( struct tcp_connection *conn,
  430. struct tcp_header *in_tcphdr ) {
  431. struct pk_buff *pkb;
  432. struct tcp_header *tcphdr;
  433. /* Allocate space for dataless TX buffer */
  434. pkb = alloc_pkb ( MAX_HDR_LEN );
  435. if ( ! pkb ) {
  436. DBGC ( conn, "TCP %p could not allocate data buffer\n", conn );
  437. return -ENOMEM;
  438. }
  439. pkb_reserve ( pkb, MAX_HDR_LEN );
  440. /* Construct RST response */
  441. tcphdr = pkb_push ( pkb, sizeof ( *tcphdr ) );
  442. memset ( tcphdr, 0, sizeof ( *tcphdr ) );
  443. tcphdr->src = in_tcphdr->dest;
  444. tcphdr->dest = in_tcphdr->src;
  445. tcphdr->seq = in_tcphdr->ack;
  446. tcphdr->ack = in_tcphdr->seq;
  447. tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
  448. tcphdr->flags = ( TCP_RST | TCP_ACK );
  449. tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE );
  450. tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
  451. /* Dump header */
  452. DBGC ( conn, "TCP %p TX %d->%d %08lx..%08lx %08lx %4zd",
  453. conn, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
  454. ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ),
  455. ntohl ( tcphdr->ack ), 0 );
  456. tcp_dump_flags ( conn, tcphdr->flags );
  457. DBGC ( conn, "\n" );
  458. /* Transmit packet */
  459. return tcpip_tx ( pkb, &tcp_protocol, &conn->peer,
  460. NULL, &tcphdr->csum );
  461. }
  462. /**
  463. * Identify TCP connection by local port number
  464. *
  465. * @v local_port Local port (in network-endian order)
  466. * @ret conn TCP connection, or NULL
  467. */
  468. static struct tcp_connection * tcp_demux ( uint16_t local_port ) {
  469. struct tcp_connection *conn;
  470. list_for_each_entry ( conn, &tcp_conns, list ) {
  471. if ( conn->local_port == local_port )
  472. return conn;
  473. }
  474. return NULL;
  475. }
  476. /**
  477. * Handle TCP received SYN
  478. *
  479. * @v conn TCP connection
  480. * @v seq SEQ value (in host-endian order)
  481. * @ret rc Return status code
  482. */
  483. static int tcp_rx_syn ( struct tcp_connection *conn, uint32_t seq ) {
  484. struct tcp_application *app = conn->app;
  485. /* Synchronise sequence numbers on first SYN */
  486. if ( ! ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) )
  487. conn->rcv_ack = seq;
  488. /* Ignore duplicate SYN */
  489. if ( ( conn->rcv_ack - seq ) > 0 )
  490. return 0;
  491. /* Mark SYN as received and start sending ACKs with each packet */
  492. conn->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) |
  493. TCP_STATE_RCVD ( TCP_SYN ) );
  494. /* Acknowledge SYN */
  495. conn->rcv_ack++;
  496. /* Notify application of established connection, if applicable */
  497. if ( ( conn->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) &&
  498. app && app->tcp_op->connected )
  499. app->tcp_op->connected ( app );
  500. return 0;
  501. }
  502. /**
  503. * Handle TCP received ACK
  504. *
  505. * @v conn TCP connection
  506. * @v ack ACK value (in host-endian order)
  507. * @v win WIN value (in host-endian order)
  508. * @ret rc Return status code
  509. */
  510. static int tcp_rx_ack ( struct tcp_connection *conn, uint32_t ack,
  511. uint32_t win ) {
  512. struct tcp_application *app = conn->app;
  513. size_t ack_len = ( ack - conn->snd_seq );
  514. size_t len;
  515. unsigned int acked_flags = 0;
  516. /* Ignore duplicate or out-of-range ACK */
  517. if ( ack_len > conn->snd_sent ) {
  518. DBGC ( conn, "TCP %p received ACK for [%08lx,%08lx), "
  519. "sent only [%08lx,%08lx)\n", conn, conn->snd_seq,
  520. ( conn->snd_seq + ack_len ), conn->snd_seq,
  521. ( conn->snd_seq + conn->snd_sent ) );
  522. return -EINVAL;
  523. }
  524. /* If we are sending flags and this ACK acknowledges all
  525. * outstanding sequence points, then it acknowledges the
  526. * flags. (This works since both SYN and FIN will always be
  527. * the last outstanding sequence point.)
  528. */
  529. len = ack_len;
  530. if ( ack_len == conn->snd_sent ) {
  531. acked_flags = ( TCP_FLAGS_SENDING ( conn->tcp_state ) &
  532. ( TCP_SYN | TCP_FIN ) );
  533. if ( acked_flags )
  534. len--;
  535. }
  536. /* Update SEQ and sent counters, and window size */
  537. conn->snd_seq = ack;
  538. conn->snd_sent = 0;
  539. conn->snd_win = win;
  540. /* Stop the retransmission timer */
  541. stop_timer ( &conn->timer );
  542. /* Notify application of acknowledged data, if any */
  543. if ( len && app && app->tcp_op->acked )
  544. app->tcp_op->acked ( app, len );
  545. /* Mark SYN/FIN as acknowledged if applicable. */
  546. if ( acked_flags )
  547. conn->tcp_state |= TCP_STATE_ACKED ( acked_flags );
  548. /* Notify application of established connection, if applicable */
  549. if ( ( acked_flags & TCP_SYN ) &&
  550. ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
  551. app && app->tcp_op->connected )
  552. app->tcp_op->connected ( app );
  553. return 0;
  554. }
  555. /**
  556. * Handle TCP received data
  557. *
  558. * @v conn TCP connection
  559. * @v seq SEQ value (in host-endian order)
  560. * @v data Data buffer
  561. * @v len Length of data buffer
  562. * @ret rc Return status code
  563. */
  564. static int tcp_rx_data ( struct tcp_connection *conn, uint32_t seq,
  565. void *data, size_t len ) {
  566. struct tcp_application *app = conn->app;
  567. size_t already_rcvd;
  568. /* Ignore duplicate data */
  569. already_rcvd = ( conn->rcv_ack - seq );
  570. if ( already_rcvd >= len )
  571. return 0;
  572. data += already_rcvd;
  573. len -= already_rcvd;
  574. /* Acknowledge new data */
  575. conn->rcv_ack += len;
  576. /* Notify application */
  577. if ( app && app->tcp_op->newdata )
  578. app->tcp_op->newdata ( app, data, len );
  579. return 0;
  580. }
  581. /**
  582. * Handle TCP received FIN
  583. *
  584. * @v conn TCP connection
  585. * @v seq SEQ value (in host-endian order)
  586. * @ret rc Return status code
  587. */
  588. static int tcp_rx_fin ( struct tcp_connection *conn, uint32_t seq ) {
  589. struct tcp_application *app = conn->app;
  590. /* Ignore duplicate FIN */
  591. if ( ( conn->rcv_ack - seq ) > 0 )
  592. return 0;
  593. /* Mark FIN as received, acknowledge it, and send our own FIN */
  594. conn->tcp_state |= ( TCP_STATE_RCVD ( TCP_FIN ) |
  595. TCP_STATE_SENT ( TCP_FIN ) );
  596. conn->rcv_ack++;
  597. /* Break association with application */
  598. tcp_disassociate ( conn );
  599. /* Notify application */
  600. if ( app && app->tcp_op->closed )
  601. app->tcp_op->closed ( app, 0 );
  602. return 0;
  603. }
  604. /**
  605. * Handle TCP received RST
  606. *
  607. * @v conn TCP connection
  608. * @v seq SEQ value (in host-endian order)
  609. * @ret rc Return status code
  610. */
  611. static int tcp_rx_rst ( struct tcp_connection *conn, uint32_t seq ) {
  612. /* Accept RST only if it falls within the window. If we have
  613. * not yet received a SYN, then we have no window to test
  614. * against, so fall back to checking that our SYN has been
  615. * ACKed.
  616. */
  617. if ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
  618. if ( ( conn->rcv_ack - seq ) > 0 )
  619. return 0;
  620. } else {
  621. if ( ! ( conn->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
  622. return 0;
  623. }
  624. /* Abort connection without sending a RST */
  625. tcp_abort ( conn, 0, -ECONNRESET );
  626. return -ECONNRESET;
  627. }
  628. /**
  629. * Process received packet
  630. *
  631. * @v pkb Packet buffer
  632. * @v st_src Partially-filled source address
  633. * @v st_dest Partially-filled destination address
  634. * @v pshdr_csum Pseudo-header checksum
  635. * @ret rc Return status code
  636. */
  637. static int tcp_rx ( struct pk_buff *pkb,
  638. struct sockaddr_tcpip *st_src __unused,
  639. struct sockaddr_tcpip *st_dest __unused,
  640. uint16_t pshdr_csum ) {
  641. struct tcp_header *tcphdr = pkb->data;
  642. struct tcp_connection *conn;
  643. unsigned int hlen;
  644. uint16_t csum;
  645. uint32_t start_seq;
  646. uint32_t seq;
  647. uint32_t ack;
  648. uint32_t win;
  649. unsigned int flags;
  650. void *data;
  651. size_t len;
  652. int rc;
  653. /* Sanity check packet */
  654. if ( pkb_len ( pkb ) < sizeof ( *tcphdr ) ) {
  655. DBG ( "TCP packet too short at %d bytes (min %d bytes)\n",
  656. pkb_len ( pkb ), sizeof ( *tcphdr ) );
  657. rc = -EINVAL;
  658. goto done;
  659. }
  660. hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4;
  661. if ( hlen < sizeof ( *tcphdr ) ) {
  662. DBG ( "TCP header too short at %d bytes (min %d bytes)\n",
  663. hlen, sizeof ( *tcphdr ) );
  664. rc = -EINVAL;
  665. goto done;
  666. }
  667. if ( hlen > pkb_len ( pkb ) ) {
  668. DBG ( "TCP header too long at %d bytes (max %d bytes)\n",
  669. hlen, pkb_len ( pkb ) );
  670. rc = -EINVAL;
  671. goto done;
  672. }
  673. csum = tcpip_continue_chksum ( pshdr_csum, pkb->data, pkb_len ( pkb ));
  674. if ( csum != 0 ) {
  675. DBG ( "TCP checksum incorrect (is %04x including checksum "
  676. "field, should be 0000)\n", csum );
  677. rc = -EINVAL;
  678. goto done;
  679. }
  680. /* Parse parameters from header and strip header */
  681. conn = tcp_demux ( tcphdr->dest );
  682. start_seq = seq = ntohl ( tcphdr->seq );
  683. ack = ntohl ( tcphdr->ack );
  684. win = ntohs ( tcphdr->win );
  685. flags = tcphdr->flags;
  686. data = pkb_pull ( pkb, hlen );
  687. len = pkb_len ( pkb );
  688. /* Dump header */
  689. DBGC ( conn, "TCP %p RX %d<-%d %08lx %08lx..%08lx %4zd",
  690. conn, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
  691. ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
  692. ( ntohl ( tcphdr->seq ) + len +
  693. ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 ) ), len);
  694. tcp_dump_flags ( conn, tcphdr->flags );
  695. DBGC ( conn, "\n" );
  696. /* If no connection was found, send RST */
  697. if ( ! conn ) {
  698. tcp_send_reset ( conn, tcphdr );
  699. rc = -ENOTCONN;
  700. goto done;
  701. }
  702. /* Handle ACK, if present */
  703. if ( flags & TCP_ACK ) {
  704. if ( ( rc = tcp_rx_ack ( conn, ack, win ) ) != 0 ) {
  705. tcp_send_reset ( conn, tcphdr );
  706. goto done;
  707. }
  708. }
  709. /* Handle SYN, if present */
  710. if ( flags & TCP_SYN ) {
  711. tcp_rx_syn ( conn, seq );
  712. seq++;
  713. }
  714. /* Handle RST, if present */
  715. if ( flags & TCP_RST ) {
  716. if ( ( rc = tcp_rx_rst ( conn, seq ) ) != 0 )
  717. goto done;
  718. }
  719. /* Handle new data, if any */
  720. tcp_rx_data ( conn, seq, data, len );
  721. seq += len;
  722. /* Handle FIN, if present */
  723. if ( flags & TCP_FIN ) {
  724. tcp_rx_fin ( conn, seq );
  725. seq++;
  726. }
  727. /* Dump out any state change as a result of the received packet */
  728. tcp_dump_state ( conn );
  729. /* Send out any pending data. If peer is expecting an ACK for
  730. * this packet then force sending a reply.
  731. */
  732. tcp_senddata_conn ( conn, ( start_seq != seq ) );
  733. /* If this packet was the last we expect to receive, set up
  734. * timer to expire and cause the connection to be freed.
  735. */
  736. if ( TCP_CLOSED_GRACEFULLY ( conn->tcp_state ) ) {
  737. conn->timer.timeout = ( 2 * TCP_MSL );
  738. start_timer ( &conn->timer );
  739. }
  740. rc = 0;
  741. done:
  742. /* Free received packet */
  743. free_pkb ( pkb );
  744. return rc;
  745. }
  746. /**
  747. * Bind TCP connection to local port
  748. *
  749. * @v conn TCP connection
  750. * @v local_port Local port (in network byte order), or 0
  751. * @ret rc Return status code
  752. *
  753. * This function adds the connection to the list of registered TCP
  754. * connections. If the local port is 0, the connection is assigned an
  755. * available port between 1024 and 65535.
  756. */
  757. static int tcp_bind ( struct tcp_connection *conn, uint16_t local_port ) {
  758. struct tcp_connection *existing;
  759. static uint16_t try_port = 1024;
  760. /* If no port specified, find the first available port */
  761. if ( ! local_port ) {
  762. for ( ; try_port ; try_port++ ) {
  763. if ( try_port < 1024 )
  764. continue;
  765. if ( tcp_bind ( conn, htons ( try_port ) ) == 0 )
  766. return 0;
  767. }
  768. DBGC ( conn, "TCP %p could not bind: no free ports\n", conn );
  769. return -EADDRINUSE;
  770. }
  771. /* Attempt bind to local port */
  772. list_for_each_entry ( existing, &tcp_conns, list ) {
  773. if ( existing->local_port == local_port ) {
  774. DBGC ( conn, "TCP %p could not bind: port %d in use\n",
  775. conn, ntohs ( local_port ) );
  776. return -EADDRINUSE;
  777. }
  778. }
  779. conn->local_port = local_port;
  780. DBGC ( conn, "TCP %p bound to port %d\n", conn, ntohs ( local_port ) );
  781. return 0;
  782. }
  783. /**
  784. * Connect to a remote server
  785. *
  786. * @v app TCP application
  787. * @v peer Remote socket address
  788. * @v local_port Local port number (in network byte order), or 0
  789. * @ret rc Return status code
  790. *
  791. * This function initiates a TCP connection to the socket address specified in
  792. * peer. It sends a SYN packet to peer. When the connection is established, the
  793. * TCP stack calls the connected() callback function.
  794. */
  795. int tcp_connect ( struct tcp_application *app, struct sockaddr_tcpip *peer,
  796. uint16_t local_port ) {
  797. struct tcp_connection *conn;
  798. int rc;
  799. /* Application must not already have an open connection */
  800. if ( app->conn ) {
  801. DBG ( "TCP app %p already open on %p\n", app, app->conn );
  802. return -EISCONN;
  803. }
  804. /* Allocate connection state storage and add to connection list */
  805. conn = alloc_tcp();
  806. if ( ! conn ) {
  807. DBG ( "TCP app %p could not allocate connection\n", app );
  808. return -ENOMEM;
  809. }
  810. /* Bind to peer and to local port */
  811. memcpy ( &conn->peer, peer, sizeof ( conn->peer ) );
  812. if ( ( rc = tcp_bind ( conn, local_port ) ) != 0 ) {
  813. free_tcp ( conn );
  814. return rc;
  815. }
  816. /* Associate with application */
  817. tcp_associate ( conn, app );
  818. /* Transition to TCP_SYN_SENT and send the SYN */
  819. conn->tcp_state = TCP_SYN_SENT;
  820. tcp_dump_state ( conn );
  821. tcp_senddata_conn ( conn, 0 );
  822. return 0;
  823. }
  824. /**
  825. * Close the connection
  826. *
  827. * @v app TCP application
  828. *
  829. * The association between the application and the TCP connection is
  830. * immediately severed, and the TCP application data structure can be
  831. * reused or freed immediately. The TCP connection will persist until
  832. * the state machine has returned to the TCP_CLOSED state.
  833. */
  834. void tcp_close ( struct tcp_application *app ) {
  835. struct tcp_connection *conn = app->conn;
  836. /* If no connection exists, do nothing */
  837. if ( ! conn )
  838. return;
  839. /* Break association between application and connection */
  840. tcp_disassociate ( conn );
  841. /* If we have not yet received a SYN (i.e. we are in CLOSED,
  842. * LISTEN or SYN_SENT), just delete the connection
  843. */
  844. if ( ! ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
  845. conn->tcp_state = TCP_CLOSED;
  846. tcp_dump_state ( conn );
  847. free_tcp ( conn );
  848. return;
  849. }
  850. /* If we have not had our SYN acknowledged (i.e. we are in
  851. * SYN_RCVD), pretend that it has been acknowledged so that we
  852. * can send a FIN without breaking things.
  853. */
  854. if ( ! ( conn->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
  855. tcp_rx_ack ( conn, ( conn->snd_seq + 1 ), 0 );
  856. /* Send a FIN to initiate the close */
  857. conn->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
  858. tcp_dump_state ( conn );
  859. tcp_senddata_conn ( conn, 0 );
  860. }
  861. /** TCP protocol */
  862. struct tcpip_protocol tcp_protocol __tcpip_protocol = {
  863. .name = "TCP",
  864. .rx = tcp_rx,
  865. .tcpip_proto = IP_TCP,
  866. };