aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch128
1 files changed, 0 insertions, 128 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch
deleted file mode 100644
index 129c2475..00000000
--- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch
+++ /dev/null
@@ -1,128 +0,0 @@
-From 486eb3eb62604a5c691156b6a657b03fe67cb167 Mon Sep 17 00:00:00 2001
-From: Daniel Borkmann <daniel@iogearbox.net>
-Date: Mon, 29 Jan 2018 02:48:58 +0100
-Subject: [PATCH 100/103] bpf: avoid false sharing of map refcount with
- max_entries
-
-[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ]
-
-In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds
-speculation") also change the layout of struct bpf_map such that
-false sharing of fast-path members like max_entries is avoided
-when the maps reference counter is altered. Therefore enforce
-them to be placed into separate cachelines.
-
-pahole dump after change:
-
- struct bpf_map {
- const struct bpf_map_ops * ops; /* 0 8 */
- struct bpf_map * inner_map_meta; /* 8 8 */
- void * security; /* 16 8 */
- enum bpf_map_type map_type; /* 24 4 */
- u32 key_size; /* 28 4 */
- u32 value_size; /* 32 4 */
- u32 max_entries; /* 36 4 */
- u32 map_flags; /* 40 4 */
- u32 pages; /* 44 4 */
- u32 id; /* 48 4 */
- int numa_node; /* 52 4 */
- bool unpriv_array; /* 56 1 */
-
- /* XXX 7 bytes hole, try to pack */
-
- /* --- cacheline 1 boundary (64 bytes) --- */
- struct user_struct * user; /* 64 8 */
- atomic_t refcnt; /* 72 4 */
- atomic_t usercnt; /* 76 4 */
- struct work_struct work; /* 80 32 */
- char name[16]; /* 112 16 */
- /* --- cacheline 2 boundary (128 bytes) --- */
-
- /* size: 128, cachelines: 2, members: 17 */
- /* sum members: 121, holes: 1, sum holes: 7 */
- };
-
-Now all entries in the first cacheline are read only throughout
-the life time of the map, set up once during map creation. Overall
-struct size and number of cachelines doesn't change from the
-reordering. struct bpf_map is usually first member and embedded
-in map structs in specific map implementations, so also avoid those
-members to sit at the end where it could potentially share the
-cacheline with first map values e.g. in the array since remote
-CPUs could trigger map updates just as well for those (easily
-dirtying members like max_entries intentionally as well) while
-having subsequent values in cache.
-
-Quoting from Google's Project Zero blog [1]:
-
- Additionally, at least on the Intel machine on which this was
- tested, bouncing modified cache lines between cores is slow,
- apparently because the MESI protocol is used for cache coherence
- [8]. Changing the reference counter of an eBPF array on one
- physical CPU core causes the cache line containing the reference
- counter to be bounced over to that CPU core, making reads of the
- reference counter on all other CPU cores slow until the changed
- reference counter has been written back to memory. Because the
- length and the reference counter of an eBPF array are stored in
- the same cache line, this also means that changing the reference
- counter on one physical CPU core causes reads of the eBPF array's
- length to be slow on other physical CPU cores (intentional false
- sharing).
-
-While this doesn't 'control' the out-of-bounds speculation through
-masking the index as in commit b2157399cc98, triggering a manipulation
-of the map's reference counter is really trivial, so lets not allow
-to easily affect max_entries from it.
-
-Splitting to separate cachelines also generally makes sense from
-a performance perspective anyway in that fast-path won't have a
-cache miss if the map gets pinned, reused in other progs, etc out
-of control path, thus also avoids unintentional false sharing.
-
- [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html
-
-Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/bpf.h | 16 ++++++++++++----
- 1 file changed, 12 insertions(+), 4 deletions(-)
-
-diff --git a/include/linux/bpf.h b/include/linux/bpf.h
-index 0dbb21b..80064c8 100644
---- a/include/linux/bpf.h
-+++ b/include/linux/bpf.h
-@@ -36,7 +36,10 @@ struct bpf_map_ops {
- };
-
- struct bpf_map {
-- atomic_t refcnt;
-+ /* 1st cacheline with read-mostly members of which some
-+ * are also accessed in fast-path (e.g. ops, max_entries).
-+ */
-+ const struct bpf_map_ops *ops ____cacheline_aligned;
- enum bpf_map_type map_type;
- u32 key_size;
- u32 value_size;
-@@ -44,10 +47,15 @@ struct bpf_map {
- u32 map_flags;
- u32 pages;
- bool unpriv_array;
-- struct user_struct *user;
-- const struct bpf_map_ops *ops;
-- struct work_struct work;
-+ /* 7 bytes hole */
-+
-+ /* 2nd cacheline with misc members to avoid false sharing
-+ * particularly with refcounting.
-+ */
-+ struct user_struct *user ____cacheline_aligned;
-+ atomic_t refcnt;
- atomic_t usercnt;
-+ struct work_struct work;
- };
-
- struct bpf_map_type_list {
---
-2.7.4
-