aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h95
1 files changed, 75 insertions, 20 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 30315a03b4ab..81888513b3b9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -312,6 +312,7 @@ struct sock_common {
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -392,6 +393,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;
struct sk_filter __rcu *sk_filter;
union {
@@ -986,8 +988,12 @@ static inline void sock_rps_record_flow(const struct sock *sk)
* OR an additional socket flag
* [1] : sk_state and sk_prot are in the same cache line.
*/
- if (sk->sk_state == TCP_ESTABLISHED)
- sock_rps_record_flow_hash(sk->sk_rxhash);
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
}
#endif
}
@@ -996,20 +1002,25 @@ static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_RPS
- if (unlikely(sk->sk_rxhash != skb->hash))
- sk->sk_rxhash = skb->hash;
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in sock_rps_record_flow().
+ */
+ if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash))
+ WRITE_ONCE(sk->sk_rxhash, skb->hash);
#endif
}
static inline void sock_rps_reset_rxhash(struct sock *sk)
{
#ifdef CONFIG_RPS
- sk->sk_rxhash = 0;
+ /* Paired with READ_ONCE() in sock_rps_record_flow() */
+ WRITE_ONCE(sk->sk_rxhash, 0);
#endif
}
#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1019,6 +1030,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
@@ -1130,7 +1142,7 @@ struct proto {
unsigned int inuse_idx;
#endif
- bool (*stream_memory_free)(const struct sock *sk);
+ bool (*stream_memory_free)(const struct sock *sk, int wake);
bool (*stream_memory_read)(const struct sock *sk);
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
@@ -1140,6 +1152,7 @@ struct proto {
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
+ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
* All the __sk_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
@@ -1212,19 +1225,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
#define sk_refcnt_debug_release(sk) do { } while (0)
#endif /* SOCK_REFCNT_DEBUG */
-static inline bool sk_stream_memory_free(const struct sock *sk)
+static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (sk->sk_wmem_queued >= sk->sk_sndbuf)
return false;
return sk->sk_prot->stream_memory_free ?
- sk->sk_prot->stream_memory_free(sk) : true;
+ sk->sk_prot->stream_memory_free(sk, wake) : true;
}
-static inline bool sk_stream_is_writeable(const struct sock *sk)
+static inline bool sk_stream_memory_free(const struct sock *sk)
+{
+ return __sk_stream_memory_free(sk, 0);
+}
+
+static inline bool __sk_stream_is_writeable(const struct sock *sk, int wake)
{
return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) &&
- sk_stream_memory_free(sk);
+ __sk_stream_memory_free(sk, wake);
+}
+
+static inline bool sk_stream_is_writeable(const struct sock *sk)
+{
+ return __sk_stream_is_writeable(sk, 0);
}
static inline int sk_under_cgroup_hierarchy(struct sock *sk,
@@ -1243,6 +1266,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
return sk->sk_prot->memory_pressure != NULL;
}
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure &&
+ !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
static inline bool sk_under_memory_pressure(const struct sock *sk)
{
if (!sk->sk_prot->memory_pressure)
@@ -1252,7 +1281,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return !!*sk->sk_prot->memory_pressure;
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
static inline long
@@ -1306,7 +1335,7 @@ proto_memory_pressure(struct proto *prot)
{
if (!prot->memory_pressure)
return false;
- return !!*prot->memory_pressure;
+ return !!READ_ONCE(*prot->memory_pressure);
}
@@ -1730,21 +1759,33 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
/* sk_tx_queue_mapping accept only upto a 16-bit value */
if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX))
return;
- sk->sk_tx_queue_mapping = tx_queue;
+ /* Paired with READ_ONCE() in sk_tx_queue_get() and
+ * other WRITE_ONCE() because socket lock might be not held.
+ */
+ WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue);
}
#define NO_QUEUE_MAPPING USHRT_MAX
static inline void sk_tx_queue_clear(struct sock *sk)
{
- sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
+ /* Paired with READ_ONCE() in sk_tx_queue_get() and
+ * other WRITE_ONCE() because socket lock might be not held.
+ */
+ WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING);
}
static inline int sk_tx_queue_get(const struct sock *sk)
{
- if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING)
- return sk->sk_tx_queue_mapping;
+ if (sk) {
+ /* Paired with WRITE_ONCE() in sk_tx_queue_clear()
+ * and sk_tx_queue_set().
+ */
+ int val = READ_ONCE(sk->sk_tx_queue_mapping);
+ if (val != NO_QUEUE_MAPPING)
+ return val;
+ }
return -1;
}
@@ -1818,6 +1859,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
}
kuid_t sock_i_uid(struct sock *sk);
+unsigned long __sock_i_ino(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
@@ -1876,7 +1918,7 @@ static inline void dst_negative_advice(struct sock *sk)
if (ndst != dst) {
rcu_assign_pointer(sk->sk_dst_cache, ndst);
sk_tx_queue_clear(sk);
- sk->sk_dst_pending_confirm = 0;
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
}
}
}
@@ -1887,7 +1929,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
struct dst_entry *old_dst;
sk_tx_queue_clear(sk);
- sk->sk_dst_pending_confirm = 0;
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
old_dst = rcu_dereference_protected(sk->sk_dst_cache,
lockdep_sock_is_held(sk));
rcu_assign_pointer(sk->sk_dst_cache, dst);
@@ -1900,7 +1942,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst)
struct dst_entry *old_dst;
sk_tx_queue_clear(sk);
- sk->sk_dst_pending_confirm = 0;
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
dst_release(old_dst);
}
@@ -2134,6 +2176,19 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
sk_mem_charge(sk, skb->truesize);
}
+static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk)
+{
+ skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ if (skb) {
+ if (sk_rmem_schedule(sk, skb, skb->truesize)) {
+ skb_set_owner_r(skb, sk);
+ return skb;
+ }
+ __kfree_skb(skb);
+ }
+ return NULL;
+}
+
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
unsigned long expires);
@@ -2393,7 +2448,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
__sock_recv_ts_and_drops(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
- else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
+ else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP))
sock_write_timestamp(sk, 0);
}