diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 105 |
1 files changed, 66 insertions, 39 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4815cf72569e..8ebcff40bc5a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -429,6 +429,7 @@ void tcp_init_sock(struct sock *sk) /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; + tp->rate_app_limited = 1; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. @@ -437,7 +438,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -504,6 +505,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + u8 shutdown; int state; sock_poll_wait(file, sock, wait); @@ -546,9 +548,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ - if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) + shutdown = READ_ONCE(sk->sk_shutdown); + if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected or passive Fast Open socket? */ @@ -564,8 +567,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tcp_stream_is_readable(tp, target, sk)) mask |= EPOLLIN | EPOLLRDNORM; - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - if (sk_stream_is_writeable(sk)) { + if (!(shutdown & SEND_SHUTDOWN)) { + if (__sk_stream_is_writeable(sk, 1)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -577,7 +580,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * pairs with the input side. */ smp_mb__after_atomic(); - if (sk_stream_is_writeable(sk)) + if (__sk_stream_is_writeable(sk, 1)) mask |= EPOLLOUT | EPOLLWRNORM; } } else @@ -697,7 +700,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sock_net(sk)->ipv4.sysctl_tcp_autocorking && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } @@ -722,6 +725,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. @@ -1148,7 +1152,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -2348,14 +2353,13 @@ bool tcp_check_oom(struct sock *sk, int shift) return too_many_orphans || out_of_socket_memory; } -void tcp_close(struct sock *sk, long timeout) +void __tcp_close(struct sock *sk, long timeout) { struct sk_buff *skb; int data_was_unread = 0; int state; - lock_sock(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); @@ -2518,6 +2522,12 @@ adjudge_to_death: out: bh_unlock_sock(sk); local_bh_enable(); +} + +void tcp_close(struct sock *sk, long timeout) +{ + lock_sock(sk); + __tcp_close(sk, timeout); release_sock(sk); sock_put(sk); } @@ -2579,6 +2589,12 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); @@ -2615,7 +2631,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); - sk->sk_shutdown = 0; + WRITE_ONCE(sk->sk_shutdown, 0); sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); @@ -2634,6 +2650,8 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; + tp->is_cwnd_limited = 0; + tp->max_packets_out = 0; tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; @@ -2651,8 +2669,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(sk->sk_rx_dst); - sk->sk_rx_dst = NULL; + dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; @@ -2673,6 +2690,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->last_oow_ack_time = 0; /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; + tp->rate_app_limited = 1; tp->rack.mstamp = 0; tp->rack.advanced = 0; tp->rack.reo_wnd_steps = 1; @@ -3056,18 +3074,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_LINGER2: if (val < 0) - tp->linger2 = -1; - else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ) - tp->linger2 = 0; + WRITE_ONCE(tp->linger2, -1); + else if (val > TCP_FIN_TIMEOUT_MAX / HZ) + WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); else - tp->linger2 = val * HZ; + WRITE_ONCE(tp->linger2, val * HZ); break; case TCP_DEFER_ACCEPT: /* Translate value in seconds to number of retransmits */ - icsk->icsk_accept_queue.rskq_defer_accept = - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, - TCP_RTO_MAX / HZ); + WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ)); break; case TCP_WINDOW_CLAMP: @@ -3127,7 +3145,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else @@ -3154,7 +3173,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = tcp_repair_set_window(tp, optval, optlen); break; case TCP_NOTSENT_LOWAT: - tp->notsent_lowat = val; + WRITE_ONCE(tp->notsent_lowat, val); sk->sk_write_space(sk); break; case TCP_INQ: @@ -3166,7 +3185,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_TX_DELAY: if (val) tcp_enable_tx_delay(); - tp->tcp_tx_delay = val; + WRITE_ONCE(tp->tcp_tx_delay, val); break; default: err = -ENOPROTOOPT; @@ -3432,15 +3451,16 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; - if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + if (tp->rx_opt.user_mss && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; if (tp->repair) val = tp->rx_opt.mss_clamp; @@ -3464,13 +3484,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; break; case TCP_LINGER2: - val = tp->linger2; + val = READ_ONCE(tp->linger2); if (val >= 0) - val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: - val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, - TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); + val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); + val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; @@ -3610,7 +3631,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - val = icsk->icsk_accept_queue.fastopenq.max_qlen; + val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); break; case TCP_FASTOPEN_CONNECT: @@ -3622,14 +3643,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TX_DELAY: - val = tp->tcp_tx_delay; + val = READ_ONCE(tp->tcp_tx_delay); break; case TCP_TIMESTAMP: val = tcp_time_stamp_raw() + tp->tsoffset; break; case TCP_NOTSENT_LOWAT: - val = tp->notsent_lowat; + val = READ_ONCE(tp->notsent_lowat); break; case TCP_INQ: val = tp->recvmsg_inq; @@ -3766,12 +3787,16 @@ static void __tcp_alloc_md5sig_pool(void) * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool_populated = true; + /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool() + * and tcp_get_md5sig_pool(). + */ + WRITE_ONCE(tcp_md5sig_pool_populated, true); } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool_populated)) { + /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ + if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) { mutex_lock(&tcp_md5sig_mutex); if (!tcp_md5sig_pool_populated) { @@ -3782,7 +3807,8 @@ bool tcp_alloc_md5sig_pool(void) mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool_populated; + /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ + return READ_ONCE(tcp_md5sig_pool_populated); } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -3798,7 +3824,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { local_bh_disable(); - if (tcp_md5sig_pool_populated) { + /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ + if (READ_ONCE(tcp_md5sig_pool_populated)) { /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ smp_rmb(); return this_cpu_ptr(&tcp_md5sig_pool); @@ -3880,7 +3907,7 @@ void tcp_done(struct sock *sk) if (req) reqsk_fastopen_remove(sk, req, false); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); |