aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c97
1 files changed, 65 insertions, 32 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 97f29ece3800..6d7f441c7dd7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -179,8 +179,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
- u32 rcv_nxt)
+static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -194,7 +193,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
- tcp_dec_quickack_mode(sk, pkts);
+ tcp_dec_quickack_mode(sk);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -1104,7 +1103,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
skb_set_hash_from_sk(skb, sk);
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
- skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
+ skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm));
/* Build TCP header and checksum it. */
th = (struct tcphdr *)skb->data;
@@ -1152,7 +1151,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+ tcp_event_ack_sent(sk, rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@@ -1653,15 +1652,20 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
struct tcp_sock *tp = tcp_sk(sk);
- /* Track the maximum number of outstanding packets in each
- * window, and remember whether we were cwnd-limited then.
+ /* Track the strongest available signal of the degree to which the cwnd
+ * is fully utilized. If cwnd-limited then remember that fact for the
+ * current window. If not cwnd-limited then track the maximum number of
+ * outstanding packets in the current window. (If cwnd-limited then we
+ * chose to not update tp->max_packets_out to avoid an extra else
+ * clause with no functional impact.)
*/
- if (!before(tp->snd_una, tp->max_packets_seq) ||
- tp->packets_out > tp->max_packets_out ||
- is_cwnd_limited) {
- tp->max_packets_out = tp->packets_out;
- tp->max_packets_seq = tp->snd_nxt;
+ if (!before(tp->snd_una, tp->cwnd_usage_seq) ||
+ is_cwnd_limited ||
+ (!tp->is_cwnd_limited &&
+ tp->packets_out > tp->max_packets_out)) {
tp->is_cwnd_limited = is_cwnd_limited;
+ tp->max_packets_out = tp->packets_out;
+ tp->cwnd_usage_seq = tp->snd_nxt;
}
if (tcp_is_cwnd_limited(sk)) {
@@ -1761,7 +1765,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
@@ -2255,6 +2259,18 @@ static bool tcp_pacing_check(struct sock *sk)
return true;
}
+static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk)
+{
+ const struct rb_node *node = sk->tcp_rtx_queue.rb_node;
+
+ /* No skb in the rtx queue. */
+ if (!node)
+ return true;
+
+ /* Only one skb in rtx queue. */
+ return !node->rb_left && !node->rb_right;
+}
+
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* (These limits are doubled for retransmits)
@@ -2276,7 +2292,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor;
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
@@ -2292,12 +2308,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit += extra_bytes;
}
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send skb if rtx queue is empty.
+ /* Always send skb if rtx queue is empty or has one skb.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (tcp_rtx_queue_empty(sk))
+ if (tcp_rtx_queue_empty_or_single_skb(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2500,7 +2516,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 timeout, rto_delta_us;
+ u32 timeout, timeout_us, rto_delta_us;
int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
@@ -2524,11 +2540,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
* sample is available then probe after TCP_TIMEOUT_INIT.
*/
if (tp->srtt_us) {
- timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+ timeout_us = tp->srtt_us >> 2;
if (tp->packets_out == 1)
- timeout += TCP_RTO_MIN;
+ timeout_us += tcp_rto_min_us(sk);
else
- timeout += TCP_TIMEOUT_MIN;
+ timeout_us += TCP_TIMEOUT_MIN_US;
+ timeout = usecs_to_jiffies(timeout_us);
} else {
timeout = TCP_TIMEOUT_INIT;
}
@@ -2911,7 +2928,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
struct tcp_sock *tp = tcp_sk(sk);
unsigned int cur_mss;
int diff, len, err;
-
+ int avail_wnd;
/* Inconclusive MTU probe */
if (icsk->icsk_mtup.probe_size)
@@ -2928,7 +2945,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
if (skb_still_in_host_queue(sk, skb))
return -EBUSY;
+start:
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
+ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
+ TCP_SKB_CB(skb)->seq++;
+ goto start;
+ }
if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
WARN_ON_ONCE(1);
return -EINVAL;
@@ -2941,17 +2964,25 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
return -EHOSTUNREACH; /* Routing failure or similar. */
cur_mss = tcp_current_mss(sk);
+ avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the
* case, when window is shrunk to zero. In this case
- * our retransmit serves as a zero window probe.
+ * our retransmit of one segment serves as a zero window probe.
*/
- if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
- TCP_SKB_CB(skb)->seq != tp->snd_una)
- return -EAGAIN;
+ if (avail_wnd <= 0) {
+ if (TCP_SKB_CB(skb)->seq != tp->snd_una)
+ return -EAGAIN;
+ avail_wnd = cur_mss;
+ }
len = cur_mss * segs;
+ if (len > avail_wnd) {
+ len = rounddown(avail_wnd, cur_mss);
+ if (!len)
+ len = avail_wnd;
+ }
if (skb->len > len) {
if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
cur_mss, GFP_ATOMIC))
@@ -2965,8 +2996,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
diff -= tcp_skb_pcount(skb);
if (diff)
tcp_adjust_pcount(sk, skb, diff);
- if (skb->len < cur_mss)
- tcp_retrans_try_collapse(sk, skb, cur_mss);
+ avail_wnd = min_t(int, avail_wnd, cur_mss);
+ if (skb->len < avail_wnd)
+ tcp_retrans_try_collapse(sk, skb, avail_wnd);
}
/* RFC3168, section 6.1.1.1. ECN fallback */
@@ -3134,11 +3166,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
*/
void sk_forced_mem_schedule(struct sock *sk, int size)
{
- int amt;
+ int delta, amt;
- if (size <= sk->sk_forward_alloc)
+ delta = size - sk->sk_forward_alloc;
+ if (delta <= 0)
return;
- amt = sk_mem_pages(size);
+ amt = sk_mem_pages(delta);
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
sk_memory_allocated_add(sk, amt);
@@ -3322,7 +3355,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
- skb->skb_mstamp_ns = cookie_init_timestamp(req);
+ skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
else
#endif
{
@@ -3359,7 +3392,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */