aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/gfp.h23
-rw-r--r--include/linux/skbuff.h36
-rw-r--r--include/net/busy_poll.h6
-rw-r--r--include/net/flow_dissector.h3
-rw-r--r--include/net/fq.h2
-rw-r--r--include/net/fq_impl.h4
-rw-r--r--include/net/sock.h15
7 files changed, 67 insertions, 22 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index b041f94678de..79d3dab45ceb 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -313,6 +313,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
}
+/**
+ * gfpflags_normal_context - is gfp_flags a normal sleepable context?
+ * @gfp_flags: gfp_flags to test
+ *
+ * Test whether @gfp_flags indicates that the allocation is from the
+ * %current context and allowed to sleep.
+ *
+ * An allocation being allowed to block doesn't mean it owns the %current
+ * context. When direct reclaim path tries to allocate memory, the
+ * allocation context is nested inside whatever %current was doing at the
+ * time of the original allocation. The nested allocation may be allowed
+ * to block but modifying anything %current owns can corrupt the outer
+ * context's expectations.
+ *
+ * %true result from this function indicates that the allocation context
+ * can sleep and use anything that's associated with %current.
+ */
+static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
+{
+ return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
+ __GFP_DIRECT_RECLAIM;
+}
+
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fdb0cd0699b6..ec00d9264e5c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1228,7 +1228,8 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6
return skb->hash;
}
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
+__u32 skb_get_hash_perturb(const struct sk_buff *skb,
+ const siphash_key_t *perturb);
static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
{
@@ -1346,6 +1347,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list)
}
/**
+ * skb_queue_empty_lockless - check if a queue is empty
+ * @list: queue head
+ *
+ * Returns true if the queue is empty, false otherwise.
+ * This variant can be used in lockless contexts.
+ */
+static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list)
+{
+ return READ_ONCE(list->next) == (const struct sk_buff *) list;
+}
+
+
+/**
* skb_queue_is_last - check if skb is the last entry in the queue
* @list: queue head
* @skb: buffer
@@ -1709,9 +1723,11 @@ static inline void __skb_insert(struct sk_buff *newsk,
struct sk_buff *prev, struct sk_buff *next,
struct sk_buff_head *list)
{
- newsk->next = next;
- newsk->prev = prev;
- next->prev = prev->next = newsk;
+ /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */
+ WRITE_ONCE(newsk->next, next);
+ WRITE_ONCE(newsk->prev, prev);
+ WRITE_ONCE(next->prev, newsk);
+ WRITE_ONCE(prev->next, newsk);
list->qlen++;
}
@@ -1722,11 +1738,11 @@ static inline void __skb_queue_splice(const struct sk_buff_head *list,
struct sk_buff *first = list->next;
struct sk_buff *last = list->prev;
- first->prev = prev;
- prev->next = first;
+ WRITE_ONCE(first->prev, prev);
+ WRITE_ONCE(prev->next, first);
- last->next = next;
- next->prev = last;
+ WRITE_ONCE(last->next, next);
+ WRITE_ONCE(next->prev, last);
}
/**
@@ -1867,8 +1883,8 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
next = skb->next;
prev = skb->prev;
skb->next = skb->prev = NULL;
- next->prev = prev;
- prev->next = next;
+ WRITE_ONCE(next->prev, prev);
+ WRITE_ONCE(prev->next, next);
}
/**
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 71c72a939bf8..c86fcadccbd7 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -134,7 +134,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- sk->sk_napi_id = skb->napi_id;
+ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
}
@@ -143,8 +143,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- if (!sk->sk_napi_id)
- sk->sk_napi_id = skb->napi_id;
+ if (!READ_ONCE(sk->sk_napi_id))
+ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
}
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 22aba321282d..227dc0a84172 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/in6.h>
+#include <linux/siphash.h>
#include <uapi/linux/if_ether.h>
/**
@@ -229,7 +230,7 @@ struct flow_dissector {
struct flow_keys {
struct flow_dissector_key_control control;
#define FLOW_KEYS_HASH_START_FIELD basic
- struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT);
struct flow_dissector_key_tags tags;
struct flow_dissector_key_vlan vlan;
struct flow_dissector_key_keyid keyid;
diff --git a/include/net/fq.h b/include/net/fq.h
index 6d8521a30c5c..2c7687902789 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -70,7 +70,7 @@ struct fq {
struct list_head backlogs;
spinlock_t lock;
u32 flows_cnt;
- u32 perturbation;
+ siphash_key_t perturbation;
u32 limit;
u32 memory_limit;
u32 memory_usage;
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index ac1a2317941e..46903e23eab9 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -105,7 +105,7 @@ static struct fq_flow *fq_flow_classify(struct fq *fq,
lockdep_assert_held(&fq->lock);
- hash = skb_get_hash_perturb(skb, fq->perturbation);
+ hash = skb_get_hash_perturb(skb, &fq->perturbation);
idx = reciprocal_scale(hash, fq->flows_cnt);
flow = &fq->flows[idx];
@@ -255,7 +255,7 @@ static int fq_init(struct fq *fq, int flows_cnt)
INIT_LIST_HEAD(&fq->backlogs);
spin_lock_init(&fq->lock);
fq->flows_cnt = max_t(u32, flows_cnt, 1);
- fq->perturbation = prandom_u32();
+ get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
fq->quantum = 300;
fq->limit = 8192;
fq->memory_limit = 16 << 20; /* 16 MBytes */
diff --git a/include/net/sock.h b/include/net/sock.h
index 60eef7f1ac05..7ec4d0bd8d12 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -916,8 +916,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
{
int cpu = raw_smp_processor_id();
- if (unlikely(sk->sk_incoming_cpu != cpu))
- sk->sk_incoming_cpu = cpu;
+ if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
+ WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
static inline void sock_rps_record_flow_hash(__u32 hash)
@@ -2131,12 +2131,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* sk_page_frag - return an appropriate page_frag
* @sk: socket
*
- * If socket allocation mode allows current thread to sleep, it means its
- * safe to use the per task page_frag instead of the per socket one.
+ * Use the per task page_frag instead of the per socket one for
+ * optimization when we know that we're in the normal context and owns
+ * everything that's associated with %current.
+ *
+ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
+ * inside other socket operations and end up recursing into sk_page_frag()
+ * while it's already in use.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
- if (gfpflags_allow_blocking(sk->sk_allocation))
+ if (gfpflags_normal_context(sk->sk_allocation))
return &current->task_frag;
return &sk->sk_frag;