diff --git a/README.md b/README.md index 2e1a95d..2cc775a 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ A single network interface can be associated with the device. | **Transport** | TCP | Connection management, reliable delivery | [RFC 793](https://datatracker.ietf.org/doc/html/rfc793), [RFC 9293](https://datatracker.ietf.org/doc/html/rfc9293) | | **Transport** | TCP | Maximum Segment Size negotiation | [RFC 793](https://datatracker.ietf.org/doc/html/rfc793) | | **Transport** | TCP | TCP Timestamps, RTT measurement, PAWS, Window Scaling | [RFC 7323](https://datatracker.ietf.org/doc/html/rfc7323) | +| **Transport** | TCP | Retransmission timeout (RTO) computation | [RFC 6298](https://datatracker.ietf.org/doc/html/rfc6298), [RFC 5681](https://datatracker.ietf.org/doc/html/rfc5681) | | **Transport** | TCP | TCP SACK | [RFC 2018](https://datatracker.ietf.org/doc/html/rfc2018), [RFC 2883](https://datatracker.ietf.org/doc/html/rfc2883), [RFC 6675](https://datatracker.ietf.org/doc/html/rfc6675) | | **Transport** | TCP | Congestion Control: Slow start, congestion avoidance | [RFC 5681](https://datatracker.ietf.org/doc/html/rfc5681) | | **Transport** | TCP | Fast Retransmit, triple duplicate ACK detection | [RFC 5681](https://datatracker.ietf.org/doc/html/rfc5681) | diff --git a/src/test/unit/unit.c b/src/test/unit/unit.c index 11aa14d..72eafd2 100644 --- a/src/test/unit/unit.c +++ b/src/test/unit/unit.c @@ -5652,7 +5652,7 @@ START_TEST(test_tcp_rto_cb_non_established_noop) memset(ts, 0, sizeof(*ts)); ts->proto = WI_IPPROTO_TCP; ts->S = &s; - ts->sock.tcp.state = TCP_SYN_SENT; + ts->sock.tcp.state = TCP_CLOSED; ts->sock.tcp.rto_backoff = 2; tcp_rto_cb(ts); @@ -5660,6 +5660,183 @@ START_TEST(test_tcp_rto_cb_non_established_noop) } END_TEST +START_TEST(test_tcp_rto_cb_syn_sent_requeues_syn_and_arms_timer) +{ + struct wolfIP s; + struct tsocket *ts; + struct pkt_desc *desc; + struct wolfIP_tcp_seg *seg; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + ts->sock.tcp.state = TCP_SYN_SENT; + ts->sock.tcp.rto = 100; + ts->src_port = 12345; + ts->dst_port = 5001; + ts->local_ip = 0x0A000001U; + ts->remote_ip = 0x0A000002U; + fifo_init(&ts->sock.tcp.txbuf, ts->txmem, TXBUF_SIZE); + + s.last_tick = 1000; + tcp_rto_cb(ts); + + desc = fifo_peek(&ts->sock.tcp.txbuf); + ck_assert_ptr_nonnull(desc); + seg = (struct wolfIP_tcp_seg *)(ts->txmem + desc->pos + sizeof(*desc)); + ck_assert_uint_eq(seg->flags, 0x02); + ck_assert_uint_eq(ts->sock.tcp.ctrl_rto_retries, 1); + ck_assert_int_ne(ts->sock.tcp.tmr_rto, NO_TIMER); +} +END_TEST + +START_TEST(test_tcp_input_synack_cancels_control_rto) +{ + struct wolfIP s; + struct tsocket *ts; + struct wolfIP_tcp_seg synack; + struct wolfIP_timer tmr; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + ts->sock.tcp.state = TCP_SYN_SENT; + ts->sock.tcp.seq = 101; + ts->sock.tcp.ctrl_rto_retries = 3; + ts->sock.tcp.ctrl_rto_active = 1; + ts->src_port = 2222; + ts->dst_port = 5001; + ts->local_ip = 0x0A000001U; + ts->remote_ip = 0x0A000002U; + + memset(&tmr, 0, sizeof(tmr)); + tmr.cb = test_timer_cb; + tmr.expires = 100; + tmr.arg = ts; + ts->sock.tcp.tmr_rto = timers_binheap_insert(&s.timers, tmr); + ck_assert_int_ne(ts->sock.tcp.tmr_rto, NO_TIMER); + + memset(&synack, 0, sizeof(synack)); + synack.ip.ttl = 64; + synack.ip.src = ee32(0x0A000002U); + synack.ip.dst = ee32(0x0A000001U); + synack.ip.len = ee16(IP_HEADER_LEN + TCP_HEADER_LEN); + synack.src_port = ee16(5001); + synack.dst_port = ee16(ts->src_port); + synack.seq = ee32(1000); + synack.ack = ee32(ts->sock.tcp.seq + 1); + synack.hlen = TCP_HEADER_LEN << 2; + synack.flags = 0x12; + synack.win = ee16(65535); + + tcp_input(&s, TEST_PRIMARY_IF, &synack, + (uint32_t)(ETH_HEADER_LEN + IP_HEADER_LEN + TCP_HEADER_LEN)); + + ck_assert_int_eq(ts->sock.tcp.state, TCP_ESTABLISHED); + ck_assert_int_eq(ts->sock.tcp.tmr_rto, NO_TIMER); + ck_assert_uint_eq(ts->sock.tcp.ctrl_rto_retries, 0); + ck_assert_uint_eq(ts->sock.tcp.ctrl_rto_active, 0); +} +END_TEST + +START_TEST(test_tcp_rto_cb_last_ack_requeues_finack_and_arms_timer) +{ + struct wolfIP s; + struct tsocket *ts; + struct pkt_desc *desc; + struct wolfIP_tcp_seg *seg; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + ts->sock.tcp.state = TCP_LAST_ACK; + ts->sock.tcp.rto = 100; + ts->src_port = 12345; + ts->dst_port = 5001; + ts->local_ip = 0x0A000001U; + ts->remote_ip = 0x0A000002U; + fifo_init(&ts->sock.tcp.txbuf, ts->txmem, TXBUF_SIZE); + + s.last_tick = 1000; + tcp_rto_cb(ts); + + desc = fifo_peek(&ts->sock.tcp.txbuf); + ck_assert_ptr_nonnull(desc); + seg = (struct wolfIP_tcp_seg *)(ts->txmem + desc->pos + sizeof(*desc)); + ck_assert_uint_eq(seg->flags, 0x11); + ck_assert_uint_eq(ts->sock.tcp.ctrl_rto_retries, 1); + ck_assert_int_ne(ts->sock.tcp.tmr_rto, NO_TIMER); +} +END_TEST + +START_TEST(test_tcp_ack_fin_wait_1_ack_of_fin_moves_to_fin_wait_2_and_stops_timer) +{ + struct wolfIP s; + struct tsocket *ts; + struct wolfIP_tcp_seg ackseg; + struct wolfIP_timer tmr; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + ts->sock.tcp.state = TCP_FIN_WAIT_1; + ts->sock.tcp.last = 100; + ts->sock.tcp.snd_una = 100; + ts->sock.tcp.seq = 1000; + ts->sock.tcp.rto = 100; + ts->sock.tcp.ctrl_rto_active = 1; + ts->sock.tcp.ctrl_rto_retries = 2; + + memset(&tmr, 0, sizeof(tmr)); + tmr.cb = test_timer_cb; + tmr.expires = 200; + tmr.arg = ts; + ts->sock.tcp.tmr_rto = timers_binheap_insert(&s.timers, tmr); + ck_assert_int_ne(ts->sock.tcp.tmr_rto, NO_TIMER); + + memset(&ackseg, 0, sizeof(ackseg)); + ackseg.hlen = TCP_HEADER_LEN << 2; + ackseg.flags = 0x10; + ackseg.ack = ee32(101); + ackseg.ip.len = ee16(IP_HEADER_LEN + TCP_HEADER_LEN); + + tcp_ack(ts, &ackseg); + + ck_assert_int_eq(ts->sock.tcp.state, TCP_FIN_WAIT_2); + ck_assert_int_eq(ts->sock.tcp.tmr_rto, NO_TIMER); + ck_assert_uint_eq(ts->sock.tcp.ctrl_rto_active, 0); + ck_assert_uint_eq(ts->sock.tcp.ctrl_rto_retries, 0); +} +END_TEST + +START_TEST(test_tcp_rto_cb_control_retry_cap_closes_socket) +{ + struct wolfIP s; + struct tsocket *ts; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + ts->sock.tcp.state = TCP_SYN_SENT; + ts->sock.tcp.rto = 100; + ts->sock.tcp.ctrl_rto_active = 1; + ts->sock.tcp.ctrl_rto_retries = TCP_CTRL_RTO_MAXRTX; + + tcp_rto_cb(ts); + ck_assert_int_eq(ts->proto, 0); +} +END_TEST + START_TEST(test_tcp_rto_cb_cancels_existing_timer) { struct wolfIP s; @@ -5723,7 +5900,39 @@ START_TEST(test_tcp_rto_cb_clears_sack_and_marks_lowest_only) ck_assert_int_ne(desc2->flags & PKT_FLAG_SENT, 0); ck_assert_int_eq(desc2->flags & PKT_FLAG_RETRANS, 0); ck_assert_uint_eq(ts->sock.tcp.cwnd, TCP_MSS); - ck_assert_uint_eq(ts->sock.tcp.ssthresh, TCP_MSS * 4); + ck_assert_uint_eq(ts->sock.tcp.ssthresh, TCP_MSS * 2); +} +END_TEST + +START_TEST(test_tcp_rto_cb_ssthresh_uses_inflight_not_cwnd) +{ + struct wolfIP s; + struct tsocket *ts; + struct pkt_desc *desc; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + ts->sock.tcp.state = TCP_ESTABLISHED; + ts->sock.tcp.rto = 100; + ts->sock.tcp.cwnd = TCP_MSS * 4; + ts->sock.tcp.snd_una = 101; + ts->sock.tcp.seq = 101; + ts->sock.tcp.bytes_in_flight = TCP_MSS * 10; + fifo_init(&ts->sock.tcp.txbuf, ts->txmem, TXBUF_SIZE); + + ck_assert_int_eq(enqueue_tcp_tx(ts, 1, 0x18), 0); + desc = fifo_peek(&ts->sock.tcp.txbuf); + ck_assert_ptr_nonnull(desc); + desc->flags |= PKT_FLAG_SENT; + + s.last_tick = 1000; + tcp_rto_cb(ts); + + ck_assert_uint_eq(ts->sock.tcp.cwnd, TCP_MSS); + ck_assert_uint_eq(ts->sock.tcp.ssthresh, TCP_MSS * 5); } END_TEST @@ -8416,6 +8625,79 @@ START_TEST(test_tcp_process_ts_uses_ecr) s.last_tick = 1000; ck_assert_int_eq(tcp_process_ts(ts, tcp, sizeof(buf)), 0); ck_assert_uint_eq(ts->sock.tcp.rtt, 100); + ck_assert_uint_eq(ts->sock.tcp.rto, TCP_RTO_MIN_MS); + ck_assert_uint_eq(ts->sock.tcp.rto_initialized, 1); +} +END_TEST + +START_TEST(test_tcp_rto_update_second_sample_rfc6298) +{ + struct wolfIP s; + struct tsocket *ts; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + + tcp_rto_update_from_sample(ts, 2000); + ck_assert_uint_eq(ts->sock.tcp.rtt, 2000); + ck_assert_uint_eq(ts->sock.tcp.rto, 6000); + + tcp_rto_update_from_sample(ts, 1000); + ck_assert_uint_eq(ts->sock.tcp.rtt, 1875); + ck_assert_uint_eq(ts->sock.tcp.rto, 5875); +} +END_TEST + +START_TEST(test_tcp_rto_update_sequence_known_deviation) +{ + struct wolfIP s; + struct tsocket *ts; + const uint32_t samples[] = {4000U, 2000U, 2000U, 2000U}; + const uint32_t exp_srtt[] = {32000U, 30000U, 28250U, 26718U}; + const uint32_t exp_rttvar[] = {8000U, 8000U, 7750U, 7343U}; + const uint32_t exp_rto[] = {12000U, 11750U, 11281U, 10682U}; + size_t i; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + + for (i = 0; i < sizeof(samples) / sizeof(samples[0]); i++) { + tcp_rto_update_from_sample(ts, samples[i]); + ck_assert_uint_eq(ts->sock.tcp.srtt, exp_srtt[i]); + ck_assert_uint_eq(ts->sock.tcp.rttvar, exp_rttvar[i]); + ck_assert_uint_eq(ts->sock.tcp.rto, exp_rto[i]); + } +} +END_TEST + +START_TEST(test_tcp_rto_update_slow_convergence_intermediate_values) +{ + struct wolfIP s; + struct tsocket *ts; + const uint32_t samples[] = {8000U, 2000U, 2000U, 2000U, 2000U, 2000U, 2000U, 2000U, 2000U}; + const uint32_t exp_srtt_ms[] = {8000U, 7250U, 6593U, 6019U, 5517U, 5077U, 4692U, 4356U, 4061U}; + size_t i; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + + for (i = 0; i < sizeof(samples) / sizeof(samples[0]); i++) { + tcp_rto_update_from_sample(ts, samples[i]); + ck_assert_uint_eq(ts->sock.tcp.rtt, exp_srtt_ms[i]); + if (i > 0) + ck_assert_uint_lt(ts->sock.tcp.rtt, exp_srtt_ms[i - 1]); + } + /* Convergence is intentionally smooth, not immediate. */ + ck_assert_uint_gt(ts->sock.tcp.rtt, 2000U); } END_TEST @@ -9949,7 +10231,7 @@ START_TEST(test_tcp_ack_closes_last_ack_socket) desc->flags |= PKT_FLAG_SENT; memset(&ackseg, 0, sizeof(ackseg)); - ackseg.ack = ee32(seq); + ackseg.ack = ee32(seq + 1); ackseg.hlen = TCP_HEADER_LEN << 2; ackseg.flags = 0x10; @@ -10042,6 +10324,58 @@ START_TEST(test_tcp_ack_fresh_desc_updates_rtt_existing) } END_TEST +START_TEST(test_tcp_ack_retransmitted_desc_skips_rtt_update) +{ + struct wolfIP s; + struct tsocket *ts; + struct tcp_seg_buf segbuf; + struct wolfIP_tcp_seg *seg; + struct wolfIP_tcp_seg ackseg; + struct pkt_desc *desc; + uint32_t seq = 300; + uint32_t old_rtt; + uint32_t old_rto; + + wolfIP_init(&s); + ts = &s.tcpsockets[0]; + memset(ts, 0, sizeof(*ts)); + ts->proto = WI_IPPROTO_TCP; + ts->S = &s; + ts->sock.tcp.state = TCP_ESTABLISHED; + fifo_init(&ts->sock.tcp.txbuf, ts->txmem, TXBUF_SIZE); + s.last_tick = 1000; + + tcp_rto_update_from_sample(ts, 200); + old_rtt = ts->sock.tcp.rtt; + old_rto = ts->sock.tcp.rto; + + memset(&segbuf, 0, sizeof(segbuf)); + seg = &segbuf.seg; + seg->ip.len = ee16(IP_HEADER_LEN + TCP_HEADER_LEN + 1); + seg->hlen = TCP_HEADER_LEN << 2; + seg->seq = ee32(seq); + seg->data[0] = TCP_OPTION_EOO; + ck_assert_int_eq(fifo_push(&ts->sock.tcp.txbuf, &segbuf, sizeof(segbuf)), 0); + desc = fifo_peek(&ts->sock.tcp.txbuf); + ck_assert_ptr_nonnull(desc); + desc->flags |= PKT_FLAG_SENT; + desc->flags |= PKT_FLAG_WAS_RETRANS; + desc->time_sent = 800; + ts->sock.tcp.bytes_in_flight = 1; + ts->sock.tcp.snd_una = seq; + ts->sock.tcp.seq = seq + 1; + + memset(&ackseg, 0, sizeof(ackseg)); + ackseg.ack = ee32(seq + 1); + ackseg.hlen = TCP_HEADER_LEN << 2; + ackseg.flags = 0x10; + + tcp_ack(ts, &ackseg); + ck_assert_uint_eq(ts->sock.tcp.rtt, old_rtt); + ck_assert_uint_eq(ts->sock.tcp.rto, old_rto); +} +END_TEST + START_TEST(test_tcp_ack_duplicate_zero_len_segment_large_ack) { struct wolfIP s; @@ -14230,8 +14564,14 @@ Suite *wolf_suite(void) tcase_add_test(tc_utils, test_tcp_rto_cb_skips_unsent_desc); tcase_add_test(tc_utils, test_tcp_rto_cb_non_tcp_noop); tcase_add_test(tc_utils, test_tcp_rto_cb_non_established_noop); + tcase_add_test(tc_utils, test_tcp_rto_cb_syn_sent_requeues_syn_and_arms_timer); + tcase_add_test(tc_utils, test_tcp_input_synack_cancels_control_rto); + tcase_add_test(tc_utils, test_tcp_rto_cb_last_ack_requeues_finack_and_arms_timer); + tcase_add_test(tc_utils, test_tcp_ack_fin_wait_1_ack_of_fin_moves_to_fin_wait_2_and_stops_timer); + tcase_add_test(tc_utils, test_tcp_rto_cb_control_retry_cap_closes_socket); tcase_add_test(tc_utils, test_tcp_rto_cb_cancels_existing_timer); tcase_add_test(tc_utils, test_tcp_rto_cb_clears_sack_and_marks_lowest_only); + tcase_add_test(tc_utils, test_tcp_rto_cb_ssthresh_uses_inflight_not_cwnd); tcase_add_test(tc_utils, test_tcp_rto_cb_ssthresh_floor_two_mss); tcase_add_test(tc_utils, test_tcp_rto_cb_fallback_marks_lowest_sent_when_no_snd_una_cover); tcase_add_test(tc_utils, test_sock_close_udp_icmp); @@ -14264,6 +14604,7 @@ Suite *wolf_suite(void) tcase_add_test(tc_utils, test_tcp_ack_closes_last_ack_socket); tcase_add_test(tc_utils, test_tcp_ack_last_seq_match_no_close); tcase_add_test(tc_utils, test_tcp_ack_fresh_desc_updates_rtt_existing); + tcase_add_test(tc_utils, test_tcp_ack_retransmitted_desc_skips_rtt_update); tcase_add_test(tc_utils, test_tcp_ack_duplicate_zero_len_segment_large_ack); tcase_add_test(tc_utils, test_tcp_ack_duplicate_seq_match_large_seg_len); tcase_add_test(tc_utils, test_tcp_ack_duplicate_clears_sent_flag); @@ -14290,6 +14631,9 @@ Suite *wolf_suite(void) tcase_add_test(tc_utils, test_tcp_recv_close_wait_ack_match); tcase_add_test(tc_utils, test_tcp_recv_queue_full_sends_ack); tcase_add_test(tc_utils, test_tcp_process_ts_uses_ecr); + tcase_add_test(tc_utils, test_tcp_rto_update_second_sample_rfc6298); + tcase_add_test(tc_utils, test_tcp_rto_update_sequence_known_deviation); + tcase_add_test(tc_utils, test_tcp_rto_update_slow_convergence_intermediate_values); tcase_add_test(tc_utils, test_tcp_process_ts_nop_then_ts); tcase_add_test(tc_utils, test_tcp_process_ts_skips_unknown_option); tcase_add_test(tc_utils, test_tcp_process_ts_no_ecr); diff --git a/src/wolfip.c b/src/wolfip.c index fd26aa0..6791e94 100644 --- a/src/wolfip.c +++ b/src/wolfip.c @@ -113,6 +113,12 @@ struct wolfIP_icmp_packet; #define WI_IP_MTU 1500 #define TCP_MSS (WI_IP_MTU - (IP_HEADER_LEN + TCP_HEADER_LEN)) #define TCP_DEFAULT_MSS 536U +#define TCP_CTRL_RTO_MAXRTX 6U + +#define TCP_RTO_MIN_MS 1000U +#define TCP_RTO_MAX_MS 60000U +#define TCP_RTO_G_MS 1U + /* Arbitrary upper limit to avoid monopolizing the CPU during poll loops. */ #define WOLFIP_POLL_BUDGET 128 @@ -123,6 +129,8 @@ struct wolfIP_icmp_packet; #define PKT_FLAG_ACKED 0x02U #define PKT_FLAG_FIN 0x04U #define PKT_FLAG_RETRANS 0x08U +#define PKT_FLAG_WAS_RETRANS 0x10U + #define TX_WRITABLE_THRESHOLD 1 #define TCP_SACK_MAX_BLOCKS 4 @@ -987,9 +995,13 @@ struct tcpsocket { enum tcp_state state; uint32_t last_ts, rtt, rto, cwnd, cwnd_count, ssthresh, tmr_rto, rto_backoff, seq, ack, last_ack, last, bytes_in_flight, snd_una; + uint32_t srtt, rttvar; uint32_t last_early_rexmit_ack; + uint8_t rto_initialized; uint8_t dup_acks; uint8_t early_rexmit_done; + uint8_t ctrl_rto_retries; + uint8_t ctrl_rto_active; ip4 local_ip, remote_ip; uint32_t peer_rwnd; uint16_t peer_mss; @@ -1033,6 +1045,11 @@ static void close_socket(struct tsocket *ts); static inline uint32_t tcp_seq_inc(uint32_t seq, uint32_t n); static inline int tcp_seq_leq(uint32_t a, uint32_t b); static inline int tcp_seq_lt(uint32_t a, uint32_t b); +static void tcp_rto_update_from_sample(struct tsocket *t, uint32_t sample_ms); +static void tcp_rto_cb(void *arg); +static void tcp_ctrl_rto_start(struct tsocket *t, uint64_t now); +static void tcp_ctrl_rto_stop(struct tsocket *t); +static int tcp_ctrl_state_needs_rto(const struct tsocket *t); #ifdef ETHERNET struct PACKED arp_packet { @@ -1654,13 +1671,18 @@ static struct tsocket *tcp_new_socket(struct wolfIP *s) t->S = s; t->if_idx = 0; t->sock.tcp.state = TCP_CLOSED; - t->sock.tcp.rto = 1000; + t->sock.tcp.rto = TCP_RTO_MIN_MS; t->sock.tcp.rtt = 0; + t->sock.tcp.srtt = 0; + t->sock.tcp.rttvar = 0; + t->sock.tcp.rto_initialized = 0; t->sock.tcp.rto_backoff = 0; t->sock.tcp.bytes_in_flight = 0; t->sock.tcp.snd_una = t->sock.tcp.seq; t->sock.tcp.dup_acks = 0; t->sock.tcp.early_rexmit_done = 0; + t->sock.tcp.ctrl_rto_retries = 0; + t->sock.tcp.ctrl_rto_active = 0; t->sock.tcp.last_early_rexmit_ack = 0; t->sock.tcp.peer_rwnd = 0xFFFF; t->sock.tcp.cwnd = tcp_initial_cwnd(t->sock.tcp.peer_rwnd); @@ -2108,6 +2130,46 @@ static void tcp_send_syn(struct tsocket *t, uint8_t flags) fifo_push(&t->sock.tcp.txbuf, tcp, sizeof(struct wolfIP_tcp_seg) + opt_len); } +static int tcp_ctrl_state_needs_rto(const struct tsocket *t) +{ + if (!t || t->proto != WI_IPPROTO_TCP) + return 0; + return (t->sock.tcp.state == TCP_SYN_SENT) || + (t->sock.tcp.state == TCP_SYN_RCVD) || + (t->sock.tcp.state == TCP_FIN_WAIT_1) || + (t->sock.tcp.state == TCP_LAST_ACK); +} + +static void tcp_ctrl_rto_stop(struct tsocket *t) +{ + if (!t || t->proto != WI_IPPROTO_TCP) + return; + if (t->sock.tcp.tmr_rto != NO_TIMER) { + timer_binheap_cancel(&t->S->timers, t->sock.tcp.tmr_rto); + t->sock.tcp.tmr_rto = NO_TIMER; + } + t->sock.tcp.ctrl_rto_active = 0; + t->sock.tcp.ctrl_rto_retries = 0; +} + +static void tcp_ctrl_rto_start(struct tsocket *t, uint64_t now) +{ + struct wolfIP_timer tmr = {0}; + uint64_t shift_rto; + if (!t || t->proto != WI_IPPROTO_TCP) + return; + if (t->sock.tcp.tmr_rto != NO_TIMER) { + timer_binheap_cancel(&t->S->timers, t->sock.tcp.tmr_rto); + t->sock.tcp.tmr_rto = NO_TIMER; + } + shift_rto = (uint64_t)t->sock.tcp.rto << t->sock.tcp.ctrl_rto_retries; + tmr.expires = now + shift_rto; + tmr.arg = t; + tmr.cb = tcp_rto_cb; + t->sock.tcp.tmr_rto = timers_binheap_insert(&t->S->timers, tmr); + t->sock.tcp.ctrl_rto_active = 1; +} + /* Increment a TCP sequence number (wraps at 2^32) */ static inline uint32_t tcp_seq_inc(uint32_t seq, uint32_t n) { @@ -2370,6 +2432,7 @@ static int tcp_process_ts(struct tsocket *t, const struct wolfIP_tcp_seg *tcp, uint32_t frame_len) { struct tcp_parsed_opts po; + uint32_t sample; tcp_parse_options(tcp, frame_len, &po); if (!po.ts_found) @@ -2379,16 +2442,51 @@ static int tcp_process_ts(struct tsocket *t, const struct wolfIP_tcp_seg *tcp, return -1; /* No echoed timestamp; fall back to coarse RTT. */ if (po.ts_ecr > t->S->last_tick) return -1; /* Echoed timestamp in the future; ignore. */ - if (t->sock.tcp.rtt == 0) - t->sock.tcp.rtt = (uint32_t)(t->S->last_tick - po.ts_ecr); - else { - uint64_t rtt_scaled = (uint64_t)t->sock.tcp.rtt << 3; - uint64_t sample_scaled = (t->S->last_tick - po.ts_ecr) << 3; - t->sock.tcp.rtt = (uint32_t)(7 * rtt_scaled + sample_scaled); - } + sample = (uint32_t)(t->S->last_tick - po.ts_ecr); + tcp_rto_update_from_sample(t, sample); return 0; } +static uint32_t tcp_rto_clamp(uint32_t rto_ms) +{ + if (rto_ms < TCP_RTO_MIN_MS) + return TCP_RTO_MIN_MS; + if (rto_ms > TCP_RTO_MAX_MS) + return TCP_RTO_MAX_MS; + return rto_ms; +} + +static void tcp_rto_update_from_sample(struct tsocket *t, uint32_t sample_ms) +{ + uint32_t srtt_ms; + uint32_t rto_ms; + uint32_t err_ms; + + if (!t || t->proto != WI_IPPROTO_TCP) + return; + if (sample_ms == 0) + sample_ms = 1; + + if (!t->sock.tcp.rto_initialized) { + t->sock.tcp.srtt = sample_ms << 3; /* SRTT in ms*8 */ + t->sock.tcp.rttvar = sample_ms << 1; /* RTTVAR in ms*4, initialized to R/2 */ + t->sock.tcp.rto_initialized = 1; + } else { + srtt_ms = t->sock.tcp.srtt >> 3; + if (srtt_ms > sample_ms) + err_ms = srtt_ms - sample_ms; + else + err_ms = sample_ms - srtt_ms; + t->sock.tcp.rttvar = (3U * t->sock.tcp.rttvar + (err_ms << 2)) >> 2; + t->sock.tcp.srtt = (7U * t->sock.tcp.srtt + (sample_ms << 3)) >> 3; + } + + srtt_ms = t->sock.tcp.srtt >> 3; + rto_ms = srtt_ms + ((t->sock.tcp.rttvar > TCP_RTO_G_MS) ? t->sock.tcp.rttvar : TCP_RTO_G_MS); + t->sock.tcp.rtt = srtt_ms; + t->sock.tcp.rto = tcp_rto_clamp(rto_ms); +} + #define SEQ_DIFF(a,b) ((a - b) > 0x7FFFFFFF) ? (b - a) : (a - b) /* Return true if a <= b @@ -2564,11 +2662,23 @@ static int tcp_mark_unsacked_for_retransmit(struct tsocket *t, uint32_t ack) static void tcp_ack(struct tsocket *t, const struct wolfIP_tcp_seg *tcp) { uint32_t ack = ee32(tcp->ack); + uint32_t fin_acked = tcp_seq_inc(t->sock.tcp.last, 1); struct pkt_desc *desc; int ack_count = 0; int ack_advanced = 0; uint32_t inflight_pre = t->sock.tcp.bytes_in_flight; + if (t->sock.tcp.state == TCP_LAST_ACK && tcp_seq_leq(fin_acked, ack)) { + tcp_ctrl_rto_stop(t); + t->sock.tcp.state = TCP_CLOSED; + close_socket(t); + return; + } + if (t->sock.tcp.state == TCP_FIN_WAIT_1 && tcp_seq_leq(fin_acked, ack)) { + t->sock.tcp.state = TCP_FIN_WAIT_2; + tcp_ctrl_rto_stop(t); + } + tcp_process_sack(t, tcp, (uint32_t)(ETH_HEADER_LEN + IP_HEADER_LEN + (tcp->hlen >> 2))); desc = fifo_peek(&t->sock.tcp.txbuf); @@ -2582,13 +2692,6 @@ static void tcp_ack(struct tsocket *t, const struct wolfIP_tcp_seg *tcp) desc = fifo_peek(&t->sock.tcp.txbuf); continue; } - if (ee32(seg->seq) == t->sock.tcp.last && ee32(seg->seq) == ack) { - if (t->sock.tcp.state == TCP_LAST_ACK) { - t->sock.tcp.state = TCP_CLOSED; - close_socket(t); - return; - } - } if (tcp_seq_leq(ee32(seg->seq) + seg_len, ack)) { desc->flags |= PKT_FLAG_ACKED; desc->flags &= ~PKT_FLAG_SENT; @@ -2637,7 +2740,9 @@ static void tcp_ack(struct tsocket *t, const struct wolfIP_tcp_seg *tcp) } if (ack_count > 0) { struct pkt_desc *fresh_desc = NULL; - struct wolfIP_tcp_seg *seg; + uint32_t ack_ip_len = ee16(tcp->ip.len); + uint32_t ack_hdr_len = IP_HEADER_LEN + (uint32_t)(tcp->hlen >> 2); + uint32_t ack_frame_len = 0; /* This ACK ackwnowledged some data. */ desc = fifo_peek(&t->sock.tcp.txbuf); while (desc && (desc->flags & PKT_FLAG_ACKED)) { @@ -2645,17 +2750,16 @@ static void tcp_ack(struct tsocket *t, const struct wolfIP_tcp_seg *tcp) desc = fifo_peek(&t->sock.tcp.txbuf); } if (fresh_desc) { - seg = (struct wolfIP_tcp_seg *)(t->txmem + fresh_desc->pos + sizeof(*fresh_desc)); - /* Update rtt */ - if (tcp_process_ts(t, seg, fresh_desc->len) < 0) { - /* No timestamp option, use coarse RTT estimation */ - if (t->S->last_tick >= fresh_desc->time_sent) { - uint32_t rtt = (uint32_t)(t->S->last_tick - fresh_desc->time_sent); - if (t->sock.tcp.rtt == 0) { - t->sock.tcp.rtt = rtt; - } else { - uint64_t rtt_scaled = (uint64_t)t->sock.tcp.rtt << 3; - t->sock.tcp.rtt = (uint32_t)(7 * rtt_scaled + ((uint64_t)rtt << 3)); + /* Karn rule: ignore RTT samples for retransmitted segments. */ + if (!(fresh_desc->flags & PKT_FLAG_WAS_RETRANS)) { + if (ack_ip_len >= ack_hdr_len) + ack_frame_len = ETH_HEADER_LEN + ack_ip_len; + /* Prefer timestamp-based RTT sample from the incoming ACK. */ + if (ack_frame_len == 0 || tcp_process_ts(t, tcp, ack_frame_len) < 0) { + /* No usable TS echo; use coarse RTT sample from send timestamp. */ + if (t->S->last_tick >= fresh_desc->time_sent) { + uint32_t rtt = (uint32_t)(t->S->last_tick - fresh_desc->time_sent); + tcp_rto_update_from_sample(t, rtt); } } } @@ -2878,6 +2982,7 @@ static void tcp_input(struct wolfIP *S, unsigned int if_idx, } else if (t->sock.tcp.state == TCP_SYN_SENT) { if (tcp->flags == 0x12) { t->sock.tcp.state = TCP_ESTABLISHED; + tcp_ctrl_rto_stop(t); t->sock.tcp.ack = tcp_seq_inc(ee32(tcp->seq), 1); t->sock.tcp.seq = ee32(tcp->ack); t->sock.tcp.snd_una = t->sock.tcp.seq; @@ -2894,6 +2999,7 @@ static void tcp_input(struct wolfIP *S, unsigned int if_idx, if ((tcplen == 0) && (t->sock.tcp.state == TCP_SYN_RCVD)) { if (tcp->flags == 0x10) { t->sock.tcp.state = TCP_ESTABLISHED; + tcp_ctrl_rto_stop(t); t->sock.tcp.ack = ee32(tcp->seq); t->sock.tcp.seq = ee32(tcp->ack); t->sock.tcp.snd_una = t->sock.tcp.seq; @@ -2950,8 +3056,32 @@ static void tcp_rto_cb(void *arg) uint32_t guard = 0; uint32_t budget; uint32_t first_sent_seq = 0; - uint32_t prev_cwnd; - if ((ts->proto != WI_IPPROTO_TCP) || (ts->sock.tcp.state != TCP_ESTABLISHED)) + uint32_t prev_in_flight; + if (ts->proto != WI_IPPROTO_TCP) + return; + if (tcp_ctrl_state_needs_rto(ts) || ts->sock.tcp.ctrl_rto_active) { + if (!tcp_ctrl_state_needs_rto(ts)) { + tcp_ctrl_rto_stop(ts); + return; + } + if (ts->sock.tcp.ctrl_rto_retries >= TCP_CTRL_RTO_MAXRTX) { + tcp_ctrl_rto_stop(ts); + ts->sock.tcp.state = TCP_CLOSED; + close_socket(ts); + return; + } + ts->sock.tcp.ctrl_rto_retries++; + if (ts->sock.tcp.state == TCP_SYN_SENT) { + tcp_send_syn(ts, 0x02); + } else if (ts->sock.tcp.state == TCP_SYN_RCVD) { + tcp_send_syn(ts, 0x12); + } else if (ts->sock.tcp.state == TCP_FIN_WAIT_1 || ts->sock.tcp.state == TCP_LAST_ACK) { + tcp_send_finack(ts); + } + tcp_ctrl_rto_start(ts, ts->S->last_tick); + return; + } + if (ts->sock.tcp.state != TCP_ESTABLISHED) return; /* RFC 6675 / RFC 2018 guidance: after an RTO, SACK scoreboard must not be * trusted (receiver may renege). Fall back to cumulative-ACK driven @@ -3023,8 +3153,11 @@ static void tcp_rto_cb(void *arg) ts->events |= CB_EVENT_WRITABLE; } if (pending) { + prev_in_flight = ts->sock.tcp.bytes_in_flight; /* RTO implies all in-flight data is considered lost. */ ts->sock.tcp.bytes_in_flight = 0; + } else { + prev_in_flight = 0; } if (ts->sock.tcp.tmr_rto != NO_TIMER) { @@ -3032,10 +3165,9 @@ static void tcp_rto_cb(void *arg) ts->sock.tcp.tmr_rto = NO_TIMER; } if (pending) { - prev_cwnd = ts->sock.tcp.cwnd; ts->sock.tcp.rto_backoff++; ts->sock.tcp.cwnd = TCP_MSS; - ts->sock.tcp.ssthresh = prev_cwnd / 2; + ts->sock.tcp.ssthresh = prev_in_flight / 2; if (ts->sock.tcp.ssthresh < (2 * TCP_MSS)) ts->sock.tcp.ssthresh = 2 * TCP_MSS; @@ -3061,6 +3193,8 @@ static void tcp_resync_inflight(struct wolfIP *s, struct tsocket *ts, uint64_t n if (!s || !ts) return; + if (tcp_ctrl_state_needs_rto(ts) || ts->sock.tcp.ctrl_rto_active) + return; budget = fifo_desc_budget(&ts->sock.tcp.txbuf); scan = fifo_peek(&ts->sock.tcp.txbuf); while (scan && guard++ < budget) { @@ -3279,7 +3413,9 @@ int wolfIP_sock_connect(struct wolfIP *s, int sockfd, const struct wolfIP_sockad ts->sock.tcp.state = TCP_CLOSED; return -1; } + ts->sock.tcp.ctrl_rto_retries = 0; tcp_send_syn(ts, 0x02); + tcp_ctrl_rto_start(ts, s->last_tick); return -WOLFIP_EAGAIN; } return -WOLFIP_EINVAL; @@ -3727,7 +3863,9 @@ int wolfIP_sock_close(struct wolfIP *s, int sockfd) ts = &s->tcpsockets[SOCKET_UNMARK(sockfd)]; if (ts->sock.tcp.state == TCP_ESTABLISHED) { ts->sock.tcp.state = TCP_FIN_WAIT_1; + ts->sock.tcp.ctrl_rto_retries = 0; tcp_send_finack(ts); + tcp_ctrl_rto_start(ts, s->last_tick); return -WOLFIP_EAGAIN; } else if (ts->sock.tcp.state == TCP_LISTEN) { ts->sock.tcp.state = TCP_CLOSED; @@ -3738,7 +3876,9 @@ int wolfIP_sock_close(struct wolfIP *s, int sockfd) return 0; } else if (ts->sock.tcp.state == TCP_CLOSE_WAIT) { ts->sock.tcp.state = TCP_LAST_ACK; + ts->sock.tcp.ctrl_rto_retries = 0; tcp_send_finack(ts); + tcp_ctrl_rto_start(ts, s->last_tick); return -WOLFIP_EAGAIN; } else if (ts->sock.tcp.state == TCP_CLOSING) { ts->sock.tcp.state = TCP_TIME_WAIT; @@ -5288,6 +5428,8 @@ int wolfIP_poll(struct wolfIP *s, uint64_t now) } desc->flags |= PKT_FLAG_SENT; desc->flags &= ~PKT_FLAG_RETRANS; + if (is_retrans) + desc->flags |= PKT_FLAG_WAS_RETRANS; desc->time_sent = now; if (size == IP_HEADER_LEN + (uint32_t)(tcp->hlen >> 2)) { desc = fifo_pop(&ts->sock.tcp.txbuf);