diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 90 |
1 files changed, 71 insertions, 19 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 7f213cfcb3cc..5293f2b65fb5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -300,7 +300,7 @@ struct bpf_sk_storage; * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC layer private data + * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit @@ -309,6 +309,7 @@ struct bpf_sk_storage; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start + * @sk_wait_pending: number of threads blocked on this socket * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -390,6 +391,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; + int sk_wait_pending; struct sk_filter __rcu *sk_filter; union { @@ -399,7 +401,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; + struct dst_entry __rcu *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; int sk_sndbuf; @@ -995,8 +997,12 @@ static inline void sock_rps_record_flow(const struct sock *sk) * OR an additional socket flag * [1] : sk_state and sk_prot are in the same cache line. */ - if (sk->sk_state == TCP_ESTABLISHED) - sock_rps_record_flow_hash(sk->sk_rxhash); + if (sk->sk_state == TCP_ESTABLISHED) { + /* This READ_ONCE() is paired with the WRITE_ONCE() + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). + */ + sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); + } } #endif } @@ -1005,20 +1011,25 @@ static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_RPS - if (unlikely(sk->sk_rxhash != skb->hash)) - sk->sk_rxhash = skb->hash; + /* The following WRITE_ONCE() is paired with the READ_ONCE() + * here, and another one in sock_rps_record_flow(). + */ + if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash)) + WRITE_ONCE(sk->sk_rxhash, skb->hash); #endif } static inline void sock_rps_reset_rxhash(struct sock *sk) { #ifdef CONFIG_RPS - sk->sk_rxhash = 0; + /* Paired with READ_ONCE() in sock_rps_record_flow() */ + WRITE_ONCE(sk->sk_rxhash, 0); #endif } #define sk_wait_event(__sk, __timeo, __condition, __wait) \ ({ int __rc; \ + __sk->sk_wait_pending++; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1028,6 +1039,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ + __sk->sk_wait_pending--; \ __rc = __condition; \ __rc; \ }) @@ -1149,6 +1161,7 @@ struct proto { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. + * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes. * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ @@ -1262,6 +1275,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk) return sk->sk_prot->memory_pressure != NULL; } +static inline bool sk_under_global_memory_pressure(const struct sock *sk) +{ + return sk->sk_prot->memory_pressure && + !!READ_ONCE(*sk->sk_prot->memory_pressure); +} + static inline bool sk_under_memory_pressure(const struct sock *sk) { if (!sk->sk_prot->memory_pressure) @@ -1271,7 +1290,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; - return !!*sk->sk_prot->memory_pressure; + return !!READ_ONCE(*sk->sk_prot->memory_pressure); } static inline long @@ -1325,7 +1344,7 @@ proto_memory_pressure(struct proto *prot) { if (!prot->memory_pressure) return false; - return !!*prot->memory_pressure; + return !!READ_ONCE(*prot->memory_pressure); } @@ -1400,7 +1419,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); #if PAGE_SIZE > SK_MEM_QUANTUM val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; @@ -1706,7 +1725,12 @@ void sk_common_release(struct sock *sk); * Default socket callbacks and setup code */ -/* Initialise core socket variables */ +/* Initialise core socket variables using an explicit uid. */ +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid); + +/* Initialise core socket variables. + * Assumes struct socket *sock is embedded in a struct socket_alloc. + */ void sock_init_data(struct socket *sock, struct sock *sk); /* @@ -1758,21 +1782,33 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) /* sk_tx_queue_mapping accept only upto a 16-bit value */ if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX)) return; - sk->sk_tx_queue_mapping = tx_queue; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); } #define NO_QUEUE_MAPPING USHRT_MAX static inline void sk_tx_queue_clear(struct sock *sk) { - sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING); } static inline int sk_tx_queue_get(const struct sock *sk) { - if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_tx_queue_mapping; + if (sk) { + /* Paired with WRITE_ONCE() in sk_tx_queue_clear() + * and sk_tx_queue_set(). + */ + int val = READ_ONCE(sk->sk_tx_queue_mapping); + if (val != NO_QUEUE_MAPPING) + return val; + } return -1; } @@ -1846,6 +1882,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) } kuid_t sock_i_uid(struct sock *sk); +unsigned long __sock_i_ino(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) @@ -1904,7 +1941,7 @@ static inline void dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } } @@ -1915,7 +1952,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = rcu_dereference_protected(sk->sk_dst_cache, lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_dst_cache, dst); @@ -1928,7 +1965,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -2167,11 +2204,26 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc return false; } +static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk) +{ + skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + if (skb) { + if (sk_rmem_schedule(sk, skb, skb->truesize)) { + skb_set_owner_r(skb, sk); + return skb; + } + __kfree_skb(skb); + } + return NULL; +} + void sk_reset_timer(struct sock *sk, struct timer_list *timer, unsigned long expires); void sk_stop_timer(struct sock *sk, struct timer_list *timer); +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer); + int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, @@ -2428,7 +2480,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, __sock_recv_ts_and_drops(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); - else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) + else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP)) sock_write_timestamp(sk, 0); } |