Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

tcp.c 25KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. #include <string.h>
  2. #include <stdlib.h>
  3. #include <assert.h>
  4. #include <errno.h>
  5. #include <byteswap.h>
  6. #include <timer.h>
  7. #include <vsprintf.h>
  8. #include <gpxe/pkbuff.h>
  9. #include <gpxe/retry.h>
  10. #include <gpxe/tcpip.h>
  11. #include <gpxe/tcp.h>
  12. /** @file
  13. *
  14. * TCP protocol
  15. *
  16. */
  17. static void tcp_expired ( struct retry_timer *timer, int over );
  18. /**
  19. * A TCP connection
  20. *
  21. * This data structure represents the internal state of a TCP
  22. * connection. It is kept separate from @c struct @c tcp_application
  23. * because the internal state is still required for some time after
  24. * the application closes the connection.
  25. */
  26. struct tcp_connection {
  27. /** List of TCP connections */
  28. struct list_head list;
  29. /** The associated TCP application, if any */
  30. struct tcp_application *app;
  31. /** Remote socket address */
  32. struct sockaddr_tcpip peer;
  33. /** Local port, in network byte order */
  34. uint16_t local_port;
  35. /** Current TCP state */
  36. unsigned int tcp_state;
  37. /** Previous TCP state
  38. *
  39. * Maintained only for debug messages
  40. */
  41. unsigned int prev_tcp_state;
  42. /** Current sequence number
  43. *
  44. * Equivalent to SND.UNA in RFC 793 terminology.
  45. */
  46. uint32_t snd_seq;
  47. /** Unacknowledged sequence count
  48. *
  49. * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology.
  50. */
  51. uint32_t snd_sent;
  52. /** Send window
  53. *
  54. * Equivalent to SND.WND in RFC 793 terminology
  55. */
  56. uint32_t snd_win;
  57. /** Current acknowledgement number
  58. *
  59. * Equivalent to RCV.NXT in RFC 793 terminology.
  60. */
  61. uint32_t rcv_ack;
  62. /** Transmit packet buffer
  63. *
  64. * This buffer is allocated prior to calling the application's
  65. * senddata() method, to provide temporary storage space.
  66. */
  67. struct pk_buff *tx_pkb;
  68. /** Retransmission timer */
  69. struct retry_timer timer;
  70. };
  71. /**
  72. * List of registered TCP connections
  73. */
  74. static LIST_HEAD ( tcp_conns );
  75. /**
  76. * Name TCP state
  77. *
  78. * @v state TCP state
  79. * @ret name Name of TCP state
  80. */
  81. static inline __attribute__ (( always_inline )) const char *
  82. tcp_state ( int state ) {
  83. switch ( state ) {
  84. case TCP_CLOSED: return "CLOSED";
  85. case TCP_LISTEN: return "LISTEN";
  86. case TCP_SYN_SENT: return "SYN_SENT";
  87. case TCP_SYN_RCVD: return "SYN_RCVD";
  88. case TCP_ESTABLISHED: return "ESTABLISHED";
  89. case TCP_FIN_WAIT_1: return "FIN_WAIT_1";
  90. case TCP_FIN_WAIT_2: return "FIN_WAIT_2";
  91. case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK";
  92. case TCP_TIME_WAIT: return "TIME_WAIT";
  93. case TCP_CLOSE_WAIT: return "CLOSE_WAIT";
  94. default: return "INVALID";
  95. }
  96. }
  97. /**
  98. * Dump TCP state transition
  99. *
  100. * @v conn TCP connection
  101. */
  102. static inline __attribute__ (( always_inline )) void
  103. tcp_dump_state ( struct tcp_connection *conn ) {
  104. if ( conn->tcp_state != conn->prev_tcp_state ) {
  105. DBGC ( conn, "TCP %p transitioned from %s to %s\n", conn,
  106. tcp_state ( conn->prev_tcp_state ),
  107. tcp_state ( conn->tcp_state ) );
  108. }
  109. conn->prev_tcp_state = conn->tcp_state;
  110. }
  111. /**
  112. * Dump TCP flags
  113. *
  114. * @v flags TCP flags
  115. */
  116. static inline __attribute__ (( always_inline )) void
  117. tcp_dump_flags ( struct tcp_connection *conn, unsigned int flags ) {
  118. if ( flags & TCP_RST )
  119. DBGC ( conn, " RST" );
  120. if ( flags & TCP_SYN )
  121. DBGC ( conn, " SYN" );
  122. if ( flags & TCP_PSH )
  123. DBGC ( conn, " PSH" );
  124. if ( flags & TCP_FIN )
  125. DBGC ( conn, " FIN" );
  126. if ( flags & TCP_ACK )
  127. DBGC ( conn, " ACK" );
  128. }
  129. /**
  130. * Allocate TCP connection
  131. *
  132. * @ret conn TCP connection, or NULL
  133. *
  134. * Allocates TCP connection and adds it to the TCP connection list.
  135. */
  136. static struct tcp_connection * alloc_tcp ( void ) {
  137. struct tcp_connection *conn;
  138. conn = calloc ( 1, sizeof ( *conn ) );
  139. if ( conn ) {
  140. DBGC ( conn, "TCP %p allocated\n", conn );
  141. conn->tcp_state = conn->prev_tcp_state = TCP_CLOSED;
  142. conn->snd_seq = random();
  143. conn->timer.expired = tcp_expired;
  144. list_add ( &conn->list, &tcp_conns );
  145. }
  146. return conn;
  147. }
  148. /**
  149. * Free TCP connection
  150. *
  151. * @v conn TCP connection
  152. *
  153. * Removes connection from TCP connection list and frees the data
  154. * structure.
  155. */
  156. static void free_tcp ( struct tcp_connection *conn ) {
  157. assert ( conn );
  158. assert ( conn->tcp_state == TCP_CLOSED );
  159. assert ( conn->app == NULL );
  160. stop_timer ( &conn->timer );
  161. list_del ( &conn->list );
  162. free ( conn );
  163. DBGC ( conn, "TCP %p freed\n", conn );
  164. }
  165. /**
  166. * Associate TCP connection with application
  167. *
  168. * @v conn TCP connection
  169. * @v app TCP application
  170. */
  171. static void tcp_associate ( struct tcp_connection *conn,
  172. struct tcp_application *app ) {
  173. assert ( conn->app == NULL );
  174. assert ( app->conn == NULL );
  175. conn->app = app;
  176. app->conn = conn;
  177. DBGC ( conn, "TCP %p associated with application %p\n", conn, app );
  178. }
  179. /**
  180. * Disassociate TCP connection from application
  181. *
  182. * @v conn TCP connection
  183. */
  184. static void tcp_disassociate ( struct tcp_connection *conn ) {
  185. struct tcp_application *app = conn->app;
  186. if ( app ) {
  187. assert ( app->conn == conn );
  188. conn->app = NULL;
  189. app->conn = NULL;
  190. DBGC ( conn, "TCP %p disassociated from application %p\n",
  191. conn, app );
  192. }
  193. }
  194. /**
  195. * Transmit any outstanding data
  196. *
  197. * @v conn TCP connection
  198. * @v force_send Force sending of packet
  199. *
  200. * Transmits any outstanding data on the connection. If the
  201. * connection is in a connected state, the application's senddata()
  202. * method will be called to generate the data payload, if any.
  203. *
  204. * Note that even if an error is returned, the retransmission timer
  205. * will have been started if necessary, and so the stack will
  206. * eventually attempt to retransmit the failed packet.
  207. */
  208. static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
  209. struct tcp_application *app = conn->app;
  210. struct pk_buff *pkb;
  211. struct tcp_header *tcphdr;
  212. struct tcp_mss_option *mssopt;
  213. void *payload;
  214. unsigned int flags;
  215. size_t len;
  216. size_t seq_len;
  217. /* Allocate space to the TX buffer */
  218. pkb = alloc_pkb ( MAX_PKB_LEN );
  219. if ( ! pkb ) {
  220. DBGC ( conn, "TCP %p could not allocate data buffer\n", conn );
  221. /* Start the retry timer so that we attempt to
  222. * retransmit this packet later. (Start it
  223. * unconditionally, since without a packet buffer we
  224. * can't call the senddata() callback, and so may not
  225. * be able to tell whether or not we have something
  226. * that actually needs to be retransmitted).
  227. */
  228. start_timer ( &conn->timer );
  229. return -ENOMEM;
  230. }
  231. pkb_reserve ( pkb, MAX_HDR_LEN );
  232. /* If we are connected, call the senddata() method, which may
  233. * call tcp_send() to queue up a data payload.
  234. */
  235. if ( TCP_CAN_SEND_DATA ( conn->tcp_state ) &&
  236. app && app->tcp_op->senddata ) {
  237. conn->tx_pkb = pkb;
  238. app->tcp_op->senddata ( app, pkb->data, pkb_tailroom ( pkb ) );
  239. conn->tx_pkb = NULL;
  240. }
  241. /* Truncate payload length to fit transmit window */
  242. len = pkb_len ( pkb );
  243. if ( len > conn->snd_win )
  244. len = conn->snd_win;
  245. /* Calculate amount of sequence space that this transmission
  246. * consumes. (SYN or FIN consume one byte, and we can never
  247. * send both at once).
  248. */
  249. seq_len = len;
  250. flags = TCP_FLAGS_SENDING ( conn->tcp_state );
  251. assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) );
  252. if ( flags & ( TCP_SYN | TCP_FIN ) )
  253. seq_len++;
  254. conn->snd_sent = seq_len;
  255. /* If we have nothing to transmit, drop the packet */
  256. if ( ( seq_len == 0 ) && ! force_send ) {
  257. free_pkb ( pkb );
  258. return 0;
  259. }
  260. /* If we are transmitting anything that requires
  261. * acknowledgement (i.e. consumes sequence space), start the
  262. * retransmission timer.
  263. */
  264. if ( seq_len )
  265. start_timer ( &conn->timer );
  266. /* Fill up the TCP header */
  267. payload = pkb->data;
  268. if ( flags & TCP_SYN ) {
  269. mssopt = pkb_push ( pkb, sizeof ( *mssopt ) );
  270. mssopt->kind = TCP_OPTION_MSS;
  271. mssopt->length = sizeof ( *mssopt );
  272. mssopt->mss = htons ( TCP_MSS );
  273. }
  274. tcphdr = pkb_push ( pkb, sizeof ( *tcphdr ) );
  275. memset ( tcphdr, 0, sizeof ( *tcphdr ) );
  276. tcphdr->src = conn->local_port;
  277. tcphdr->dest = conn->peer.st_port;
  278. tcphdr->seq = htonl ( conn->snd_seq );
  279. tcphdr->ack = htonl ( conn->rcv_ack );
  280. tcphdr->hlen = ( ( payload - pkb->data ) << 2 );
  281. tcphdr->flags = flags;
  282. tcphdr->win = htons ( TCP_WINDOW_SIZE );
  283. tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
  284. /* Dump header */
  285. DBGC ( conn, "TCP %p TX %d->%d %08lx..%08lx %08lx %4zd",
  286. conn, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
  287. ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ),
  288. ntohl ( tcphdr->ack ), len );
  289. tcp_dump_flags ( conn, tcphdr->flags );
  290. DBGC ( conn, "\n" );
  291. /* Transmit packet */
  292. return tcpip_tx ( pkb, &tcp_protocol, &conn->peer,
  293. NULL, &tcphdr->csum );
  294. }
  295. /**
  296. * Transmit any outstanding data
  297. *
  298. * @v conn TCP connection
  299. *
  300. * This function allocates space to the transmit buffer and invokes
  301. * the senddata() callback function, to allow the application to
  302. * transmit new data.
  303. */
  304. int tcp_senddata ( struct tcp_application *app ) {
  305. struct tcp_connection *conn = app->conn;
  306. /* Check connection actually exists */
  307. if ( ! conn ) {
  308. DBG ( "TCP app %p has no connection\n", app );
  309. return -ENOTCONN;
  310. }
  311. return tcp_senddata_conn ( conn, 0 );
  312. }
  313. /**
  314. * Transmit data
  315. *
  316. * @v app TCP application
  317. * @v data Data to be sent
  318. * @v len Length of the data
  319. * @ret rc Return status code
  320. *
  321. * This function queues data to be sent via the TCP connection. It
  322. * can be called only in the context of an application's senddata()
  323. * method.
  324. */
  325. int tcp_send ( struct tcp_application *app, const void *data, size_t len ) {
  326. struct tcp_connection *conn = app->conn;
  327. struct pk_buff *pkb;
  328. /* Check connection actually exists */
  329. if ( ! conn ) {
  330. DBG ( "TCP app %p has no connection\n", app );
  331. return -ENOTCONN;
  332. }
  333. /* Check that we have a packet buffer to fill */
  334. pkb = conn->tx_pkb;
  335. if ( ! pkb ) {
  336. DBG ( "TCP app %p tried to send data outside of the "
  337. "senddata() method\n", app );
  338. return -EINVAL;
  339. }
  340. /* Truncate length to fit packet buffer */
  341. if ( len > pkb_tailroom ( pkb ) )
  342. len = pkb_tailroom ( pkb );
  343. /* Copy payload */
  344. memmove ( pkb_put ( pkb, len ), data, len );
  345. return 0;
  346. }
  347. /**
  348. * Retransmission timer expired
  349. *
  350. * @v timer Retry timer
  351. * @v over Failure indicator
  352. */
  353. static void tcp_expired ( struct retry_timer *timer, int over ) {
  354. struct tcp_connection *conn =
  355. container_of ( timer, struct tcp_connection, timer );
  356. struct tcp_application *app = conn->app;
  357. int graceful_close = TCP_CLOSED_GRACEFULLY ( conn->tcp_state );
  358. DBGC ( conn, "TCP %p timer %s in %s\n", conn,
  359. ( over ? "expired" : "fired" ), tcp_state ( conn->tcp_state ) );
  360. assert ( ( conn->tcp_state == TCP_SYN_SENT ) ||
  361. ( conn->tcp_state == TCP_SYN_RCVD ) ||
  362. ( conn->tcp_state == TCP_ESTABLISHED ) ||
  363. ( conn->tcp_state == TCP_FIN_WAIT_1 ) ||
  364. ( conn->tcp_state == TCP_TIME_WAIT ) ||
  365. ( conn->tcp_state == TCP_CLOSE_WAIT ) ||
  366. ( conn->tcp_state == TCP_CLOSING_OR_LAST_ACK ) );
  367. /* If we have finally timed out and given up, or if this is
  368. * the result of a graceful close, terminate the connection
  369. */
  370. if ( over || graceful_close ) {
  371. /* Transition to CLOSED */
  372. conn->tcp_state = TCP_CLOSED;
  373. tcp_dump_state ( conn );
  374. /* If we haven't closed gracefully, send a RST */
  375. if ( ! graceful_close )
  376. tcp_senddata_conn ( conn, 1 );
  377. /* Break association between application and connection */
  378. tcp_disassociate ( conn );
  379. /* Free the connection */
  380. free_tcp ( conn );
  381. /* Notify application */
  382. if ( app && app->tcp_op->closed )
  383. app->tcp_op->closed ( app, -ETIMEDOUT );
  384. } else {
  385. /* Otherwise, retransmit the packet */
  386. tcp_senddata_conn ( conn, 0 );
  387. }
  388. }
  389. /**
  390. * Send RST response to incoming packet
  391. *
  392. * @v in_tcphdr TCP header of incoming packet
  393. * @ret rc Return status code
  394. */
  395. static int tcp_send_reset ( struct tcp_connection *conn,
  396. struct tcp_header *in_tcphdr ) {
  397. struct pk_buff *pkb;
  398. struct tcp_header *tcphdr;
  399. /* Allocate space for dataless TX buffer */
  400. pkb = alloc_pkb ( MAX_HDR_LEN );
  401. if ( ! pkb ) {
  402. DBGC ( conn, "TCP %p could not allocate data buffer\n", conn );
  403. return -ENOMEM;
  404. }
  405. pkb_reserve ( pkb, MAX_HDR_LEN );
  406. /* Construct RST response */
  407. tcphdr = pkb_push ( pkb, sizeof ( *tcphdr ) );
  408. memset ( tcphdr, 0, sizeof ( *tcphdr ) );
  409. tcphdr->src = in_tcphdr->dest;
  410. tcphdr->dest = in_tcphdr->src;
  411. tcphdr->seq = in_tcphdr->ack;
  412. tcphdr->ack = in_tcphdr->seq;
  413. tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
  414. tcphdr->flags = ( TCP_RST | TCP_ACK );
  415. tcphdr->win = htons ( TCP_WINDOW_SIZE );
  416. tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
  417. /* Dump header */
  418. DBGC ( conn, "TCP %p TX %d->%d %08lx..%08lx %08lx %4zd",
  419. conn, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
  420. ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ),
  421. ntohl ( tcphdr->ack ), 0 );
  422. tcp_dump_flags ( conn, tcphdr->flags );
  423. DBGC ( conn, "\n" );
  424. /* Transmit packet */
  425. return tcpip_tx ( pkb, &tcp_protocol, &conn->peer,
  426. NULL, &tcphdr->csum );
  427. }
  428. /**
  429. * Identify TCP connection by local port number
  430. *
  431. * @v local_port Local port (in network-endian order)
  432. * @ret conn TCP connection, or NULL
  433. */
  434. static struct tcp_connection * tcp_demux ( uint16_t local_port ) {
  435. struct tcp_connection *conn;
  436. list_for_each_entry ( conn, &tcp_conns, list ) {
  437. if ( conn->local_port == local_port )
  438. return conn;
  439. }
  440. return NULL;
  441. }
  442. /**
  443. * Handle TCP received SYN
  444. *
  445. * @v conn TCP connection
  446. * @v seq SEQ value (in host-endian order)
  447. * @ret rc Return status code
  448. */
  449. static int tcp_rx_syn ( struct tcp_connection *conn, uint32_t seq ) {
  450. struct tcp_application *app = conn->app;
  451. /* Synchronise sequence numbers on first SYN */
  452. if ( ! ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) )
  453. conn->rcv_ack = seq;
  454. /* Ignore duplicate SYN */
  455. if ( ( conn->rcv_ack - seq ) > 0 )
  456. return 0;
  457. /* Mark SYN as received and start sending ACKs with each packet */
  458. conn->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) |
  459. TCP_STATE_RCVD ( TCP_SYN ) );
  460. /* Acknowledge SYN */
  461. conn->rcv_ack++;
  462. /* Notify application of established connection, if applicable */
  463. if ( ( conn->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) &&
  464. app && app->tcp_op->connected )
  465. app->tcp_op->connected ( app );
  466. return 0;
  467. }
  468. /**
  469. * Handle TCP received ACK
  470. *
  471. * @v conn TCP connection
  472. * @v ack ACK value (in host-endian order)
  473. * @v win WIN value (in host-endian order)
  474. * @ret rc Return status code
  475. */
  476. static int tcp_rx_ack ( struct tcp_connection *conn, uint32_t ack,
  477. uint32_t win ) {
  478. struct tcp_application *app = conn->app;
  479. size_t ack_len = ( ack - conn->snd_seq );
  480. size_t len;
  481. unsigned int acked_flags = 0;
  482. /* Ignore duplicate or out-of-range ACK */
  483. if ( ack_len > conn->snd_sent ) {
  484. DBGC ( conn, "TCP %p received ACK for [%08lx,%08lx), "
  485. "sent only [%08lx,%08lx)\n", conn, conn->snd_seq,
  486. ( conn->snd_seq + ack_len ), conn->snd_seq,
  487. ( conn->snd_seq + conn->snd_sent ) );
  488. return -EINVAL;
  489. }
  490. /* If we are sending flags and this ACK acknowledges all
  491. * outstanding sequence points, then it acknowledges the
  492. * flags. (This works since both SYN and FIN will always be
  493. * the last outstanding sequence point.)
  494. */
  495. len = ack_len;
  496. if ( ack_len == conn->snd_sent ) {
  497. acked_flags = ( TCP_FLAGS_SENDING ( conn->tcp_state ) &
  498. ( TCP_SYN | TCP_FIN ) );
  499. if ( acked_flags )
  500. len--;
  501. }
  502. /* Update SEQ and sent counters, and window size */
  503. conn->snd_seq = ack;
  504. conn->snd_sent = 0;
  505. conn->snd_win = win;
  506. /* Stop the retransmission timer */
  507. stop_timer ( &conn->timer );
  508. /* Notify application of acknowledged data, if any */
  509. if ( len && app && app->tcp_op->acked )
  510. app->tcp_op->acked ( app, len );
  511. /* Mark SYN/FIN as acknowledged if applicable. */
  512. if ( acked_flags )
  513. conn->tcp_state |= TCP_STATE_ACKED ( acked_flags );
  514. /* Notify application of established connection, if applicable */
  515. if ( ( acked_flags & TCP_SYN ) &&
  516. ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
  517. app && app->tcp_op->connected )
  518. app->tcp_op->connected ( app );
  519. return 0;
  520. }
  521. /**
  522. * Handle TCP received data
  523. *
  524. * @v conn TCP connection
  525. * @v seq SEQ value (in host-endian order)
  526. * @v data Data buffer
  527. * @v len Length of data buffer
  528. * @ret rc Return status code
  529. */
  530. static int tcp_rx_data ( struct tcp_connection *conn, uint32_t seq,
  531. void *data, size_t len ) {
  532. struct tcp_application *app = conn->app;
  533. size_t already_rcvd;
  534. /* Ignore duplicate data */
  535. already_rcvd = ( conn->rcv_ack - seq );
  536. if ( already_rcvd >= len )
  537. return 0;
  538. data += already_rcvd;
  539. len -= already_rcvd;
  540. /* Acknowledge new data */
  541. conn->rcv_ack += len;
  542. /* Notify application */
  543. if ( app && app->tcp_op->newdata )
  544. app->tcp_op->newdata ( app, data, len );
  545. return 0;
  546. }
  547. /**
  548. * Handle TCP received FIN
  549. *
  550. * @v conn TCP connection
  551. * @v seq SEQ value (in host-endian order)
  552. * @ret rc Return status code
  553. */
  554. static int tcp_rx_fin ( struct tcp_connection *conn, uint32_t seq ) {
  555. struct tcp_application *app = conn->app;
  556. /* Ignore duplicate FIN */
  557. if ( ( conn->rcv_ack - seq ) > 0 )
  558. return 0;
  559. /* Mark FIN as received, acknowledge it, and send our own FIN */
  560. conn->tcp_state |= ( TCP_STATE_RCVD ( TCP_FIN ) |
  561. TCP_STATE_SENT ( TCP_FIN ) );
  562. conn->rcv_ack++;
  563. /* Break association with application */
  564. tcp_disassociate ( conn );
  565. /* Notify application */
  566. if ( app && app->tcp_op->closed )
  567. app->tcp_op->closed ( app, 0 );
  568. return 0;
  569. }
  570. /**
  571. * Handle TCP received RST
  572. *
  573. * @v conn TCP connection
  574. * @v seq SEQ value (in host-endian order)
  575. * @ret rc Return status code
  576. */
  577. static int tcp_rx_rst ( struct tcp_connection *conn, uint32_t seq ) {
  578. struct tcp_application *app = conn->app;
  579. /* Accept RST only if it falls within the window. If we have
  580. * not yet received a SYN, then we have no window to test
  581. * against, so fall back to checking that our SYN has been
  582. * ACKed.
  583. */
  584. if ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
  585. if ( ( conn->rcv_ack - seq ) > 0 )
  586. return 0;
  587. } else {
  588. if ( ! ( conn->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
  589. return 0;
  590. }
  591. /* Transition to CLOSED */
  592. conn->tcp_state = TCP_CLOSED;
  593. tcp_dump_state ( conn );
  594. /* Break association between application and connection */
  595. tcp_disassociate ( conn );
  596. /* Free the connection */
  597. free_tcp ( conn );
  598. /* Notify application */
  599. if ( app && app->tcp_op->closed )
  600. app->tcp_op->closed ( app, -ECONNRESET );
  601. return -ECONNRESET;
  602. }
  603. /**
  604. * Process received packet
  605. *
  606. * @v pkb Packet buffer
  607. * @v st_src Partially-filled source address
  608. * @v st_dest Partially-filled destination address
  609. * @v pshdr_csum Pseudo-header checksum
  610. * @ret rc Return status code
  611. */
  612. static int tcp_rx ( struct pk_buff *pkb,
  613. struct sockaddr_tcpip *st_src __unused,
  614. struct sockaddr_tcpip *st_dest __unused,
  615. uint16_t pshdr_csum ) {
  616. struct tcp_header *tcphdr = pkb->data;
  617. struct tcp_connection *conn;
  618. unsigned int hlen;
  619. uint16_t csum;
  620. uint32_t start_seq;
  621. uint32_t seq;
  622. uint32_t ack;
  623. uint32_t win;
  624. unsigned int flags;
  625. void *data;
  626. size_t len;
  627. int rc;
  628. /* Sanity check packet */
  629. if ( pkb_len ( pkb ) < sizeof ( *tcphdr ) ) {
  630. DBG ( "TCP packet too short at %d bytes (min %d bytes)\n",
  631. pkb_len ( pkb ), sizeof ( *tcphdr ) );
  632. rc = -EINVAL;
  633. goto done;
  634. }
  635. hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4;
  636. if ( hlen < sizeof ( *tcphdr ) ) {
  637. DBG ( "TCP header too short at %d bytes (min %d bytes)\n",
  638. hlen, sizeof ( *tcphdr ) );
  639. rc = -EINVAL;
  640. goto done;
  641. }
  642. if ( hlen > pkb_len ( pkb ) ) {
  643. DBG ( "TCP header too long at %d bytes (max %d bytes)\n",
  644. hlen, pkb_len ( pkb ) );
  645. rc = -EINVAL;
  646. goto done;
  647. }
  648. csum = tcpip_continue_chksum ( pshdr_csum, pkb->data, pkb_len ( pkb ));
  649. if ( csum != 0 ) {
  650. DBG ( "TCP checksum incorrect (is %04x including checksum "
  651. "field, should be 0000)\n", csum );
  652. rc = -EINVAL;
  653. goto done;
  654. }
  655. /* Parse parameters from header and strip header */
  656. conn = tcp_demux ( tcphdr->dest );
  657. start_seq = seq = ntohl ( tcphdr->seq );
  658. ack = ntohl ( tcphdr->ack );
  659. win = ntohs ( tcphdr->win );
  660. flags = tcphdr->flags;
  661. data = pkb_pull ( pkb, hlen );
  662. len = pkb_len ( pkb );
  663. /* Dump header */
  664. DBGC ( conn, "TCP %p RX %d<-%d %08lx %08lx..%08lx %4zd",
  665. conn, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
  666. ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
  667. ( ntohl ( tcphdr->seq ) + len +
  668. ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 ) ), len);
  669. tcp_dump_flags ( conn, tcphdr->flags );
  670. DBGC ( conn, "\n" );
  671. /* If no connection was found, send RST */
  672. if ( ! conn ) {
  673. tcp_send_reset ( conn, tcphdr );
  674. rc = -ENOTCONN;
  675. goto done;
  676. }
  677. /* Handle ACK, if present */
  678. if ( flags & TCP_ACK ) {
  679. if ( ( rc = tcp_rx_ack ( conn, ack, win ) ) != 0 ) {
  680. tcp_send_reset ( conn, tcphdr );
  681. goto done;
  682. }
  683. }
  684. /* Handle SYN, if present */
  685. if ( flags & TCP_SYN ) {
  686. tcp_rx_syn ( conn, seq );
  687. seq++;
  688. }
  689. /* Handle RST, if present */
  690. if ( flags & TCP_RST ) {
  691. if ( ( rc = tcp_rx_rst ( conn, seq ) ) != 0 )
  692. goto done;
  693. }
  694. /* Handle new data, if any */
  695. tcp_rx_data ( conn, seq, data, len );
  696. seq += len;
  697. /* Handle FIN, if present */
  698. if ( flags & TCP_FIN ) {
  699. tcp_rx_fin ( conn, seq );
  700. seq++;
  701. }
  702. /* Dump out any state change as a result of the received packet */
  703. tcp_dump_state ( conn );
  704. /* Send out any pending data. If peer is expecting an ACK for
  705. * this packet then force sending a reply.
  706. */
  707. tcp_senddata_conn ( conn, ( start_seq != seq ) );
  708. /* If this packet was the last we expect to receive, set up
  709. * timer to expire and cause the connection to be freed.
  710. */
  711. if ( TCP_CLOSED_GRACEFULLY ( conn->tcp_state ) ) {
  712. conn->timer.timeout = ( 2 * TCP_MSL );
  713. start_timer ( &conn->timer );
  714. }
  715. rc = 0;
  716. done:
  717. /* Free received packet */
  718. free_pkb ( pkb );
  719. return rc;
  720. }
  721. /**
  722. * Bind TCP connection to local port
  723. *
  724. * @v conn TCP connection
  725. * @v local_port Local port (in network byte order), or 0
  726. * @ret rc Return status code
  727. *
  728. * This function adds the connection to the list of registered TCP
  729. * connections. If the local port is 0, the connection is assigned an
  730. * available port between 1024 and 65535.
  731. */
  732. static int tcp_bind ( struct tcp_connection *conn, uint16_t local_port ) {
  733. struct tcp_connection *existing;
  734. static uint16_t try_port = 1024;
  735. /* If no port specified, find the first available port */
  736. if ( ! local_port ) {
  737. for ( ; try_port ; try_port++ ) {
  738. if ( try_port < 1024 )
  739. continue;
  740. if ( tcp_bind ( conn, htons ( try_port ) ) == 0 )
  741. return 0;
  742. }
  743. DBGC ( conn, "TCP %p could not bind: no free ports\n", conn );
  744. return -EADDRINUSE;
  745. }
  746. /* Attempt bind to local port */
  747. list_for_each_entry ( existing, &tcp_conns, list ) {
  748. if ( existing->local_port == local_port ) {
  749. DBGC ( conn, "TCP %p could not bind: port %d in use\n",
  750. conn, ntohs ( local_port ) );
  751. return -EADDRINUSE;
  752. }
  753. }
  754. conn->local_port = local_port;
  755. DBGC ( conn, "TCP %p bound to port %d\n", conn, ntohs ( local_port ) );
  756. return 0;
  757. }
  758. /**
  759. * Connect to a remote server
  760. *
  761. * @v app TCP application
  762. * @v peer Remote socket address
  763. * @v local_port Local port number (in network byte order), or 0
  764. * @ret rc Return status code
  765. *
  766. * This function initiates a TCP connection to the socket address specified in
  767. * peer. It sends a SYN packet to peer. When the connection is established, the
  768. * TCP stack calls the connected() callback function.
  769. */
  770. int tcp_connect ( struct tcp_application *app, struct sockaddr_tcpip *peer,
  771. uint16_t local_port ) {
  772. struct tcp_connection *conn;
  773. int rc;
  774. /* Application must not already have an open connection */
  775. if ( app->conn ) {
  776. DBG ( "TCP app %p already open on %p\n", app, app->conn );
  777. return -EISCONN;
  778. }
  779. /* Allocate connection state storage and add to connection list */
  780. conn = alloc_tcp();
  781. if ( ! conn ) {
  782. DBG ( "TCP app %p could not allocate connection\n", app );
  783. return -ENOMEM;
  784. }
  785. /* Bind to peer and to local port */
  786. memcpy ( &conn->peer, peer, sizeof ( conn->peer ) );
  787. if ( ( rc = tcp_bind ( conn, local_port ) ) != 0 ) {
  788. free_tcp ( conn );
  789. return rc;
  790. }
  791. /* Associate with application */
  792. tcp_associate ( conn, app );
  793. /* Transition to TCP_SYN_SENT and send the SYN */
  794. conn->tcp_state = TCP_SYN_SENT;
  795. tcp_dump_state ( conn );
  796. tcp_senddata_conn ( conn, 0 );
  797. return 0;
  798. }
  799. /**
  800. * Close the connection
  801. *
  802. * @v app TCP application
  803. *
  804. * The association between the application and the TCP connection is
  805. * immediately severed, and the TCP application data structure can be
  806. * reused or freed immediately. The TCP connection will persist until
  807. * the state machine has returned to the TCP_CLOSED state.
  808. */
  809. void tcp_close ( struct tcp_application *app ) {
  810. struct tcp_connection *conn = app->conn;
  811. /* If no connection exists, do nothing */
  812. if ( ! conn )
  813. return;
  814. /* Break association between application and connection */
  815. tcp_disassociate ( conn );
  816. /* If we have not yet received a SYN (i.e. we are in CLOSED,
  817. * LISTEN or SYN_SENT), just delete the connection
  818. */
  819. if ( ! ( conn->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
  820. conn->tcp_state = TCP_CLOSED;
  821. tcp_dump_state ( conn );
  822. free_tcp ( conn );
  823. return;
  824. }
  825. /* If we have not had our SYN acknowledged (i.e. we are in
  826. * SYN_RCVD), pretend that it has been acknowledged so that we
  827. * can send a FIN without breaking things.
  828. */
  829. if ( ! ( conn->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
  830. tcp_rx_ack ( conn, ( conn->snd_seq + 1 ), 0 );
  831. /* Send a FIN to initiate the close */
  832. conn->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
  833. tcp_dump_state ( conn );
  834. tcp_senddata_conn ( conn, 0 );
  835. }
  836. /** TCP protocol */
  837. struct tcpip_protocol tcp_protocol __tcpip_protocol = {
  838. .name = "TCP",
  839. .rx = tcp_rx,
  840. .tcpip_proto = IP_TCP,
  841. };