1 files changed, 128 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch
new file mode 100644
index 00000000..129c2475
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch
@@ -0,0 +1,128 @@
+From 486eb3eb62604a5c691156b6a657b03fe67cb167 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 29 Jan 2018 02:48:58 +0100
+Subject: [PATCH 100/103] bpf: avoid false sharing of map refcount with
+ max_entries
+
+[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ]
+
+In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds
+speculation") also change the layout of struct bpf_map such that
+false sharing of fast-path members like max_entries is avoided
+when the maps reference counter is altered. Therefore enforce
+them to be placed into separate cachelines.
+
+pahole dump after change:
+
+  struct bpf_map {
+        const struct bpf_map_ops  * ops;                 /*     0     8 */
+        struct bpf_map *           inner_map_meta;       /*     8     8 */
+        void *                     security;             /*    16     8 */
+        enum bpf_map_type          map_type;             /*    24     4 */
+        u32                        key_size;             /*    28     4 */
+        u32                        value_size;           /*    32     4 */
+        u32                        max_entries;          /*    36     4 */
+        u32                        map_flags;            /*    40     4 */
+        u32                        pages;                /*    44     4 */
+        u32                        id;                   /*    48     4 */
+        int                        numa_node;            /*    52     4 */
+        bool                       unpriv_array;         /*    56     1 */
+
+        /* XXX 7 bytes hole, try to pack */
+
+        /* --- cacheline 1 boundary (64 bytes) --- */
+        struct user_struct *       user;                 /*    64     8 */
+        atomic_t                   refcnt;               /*    72     4 */
+        atomic_t                   usercnt;              /*    76     4 */
+        struct work_struct         work;                 /*    80    32 */
+        char                       name[16];             /*   112    16 */
+        /* --- cacheline 2 boundary (128 bytes) --- */
+
+        /* size: 128, cachelines: 2, members: 17 */
+        /* sum members: 121, holes: 1, sum holes: 7 */
+  };
+
+Now all entries in the first cacheline are read only throughout
+the life time of the map, set up once during map creation. Overall
+struct size and number of cachelines doesn't change from the
+reordering. struct bpf_map is usually first member and embedded
+in map structs in specific map implementations, so also avoid those
+members to sit at the end where it could potentially share the
+cacheline with first map values e.g. in the array since remote
+CPUs could trigger map updates just as well for those (easily
+dirtying members like max_entries intentionally as well) while
+having subsequent values in cache.
+
+Quoting from Google's Project Zero blog [1]:
+
+  Additionally, at least on the Intel machine on which this was
+  tested, bouncing modified cache lines between cores is slow,
+  apparently because the MESI protocol is used for cache coherence
+  [8]. Changing the reference counter of an eBPF array on one
+  physical CPU core causes the cache line containing the reference
+  counter to be bounced over to that CPU core, making reads of the
+  reference counter on all other CPU cores slow until the changed
+  reference counter has been written back to memory. Because the
+  length and the reference counter of an eBPF array are stored in
+  the same cache line, this also means that changing the reference
+  counter on one physical CPU core causes reads of the eBPF array's
+  length to be slow on other physical CPU cores (intentional false
+  sharing).
+
+While this doesn't 'control' the out-of-bounds speculation through
+masking the index as in commit b2157399cc98, triggering a manipulation
+of the map's reference counter is really trivial, so lets not allow
+to easily affect max_entries from it.
+
+Splitting to separate cachelines also generally makes sense from
+a performance perspective anyway in that fast-path won't have a
+cache miss if the map gets pinned, reused in other progs, etc out
+of control path, thus also avoids unintentional false sharing.
+
+  [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index 0dbb21b..80064c8 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -36,7 +36,10 @@ struct bpf_map_ops {
+ };
+ 
+ struct bpf_map {
+-	atomic_t refcnt;
++	/* 1st cacheline with read-mostly members of which some
++	 * are also accessed in fast-path (e.g. ops, max_entries).
++	 */
++	const struct bpf_map_ops *ops ____cacheline_aligned;
+ 	enum bpf_map_type map_type;
+ 	u32 key_size;
+ 	u32 value_size;
+@@ -44,10 +47,15 @@ struct bpf_map {
+ 	u32 map_flags;
+ 	u32 pages;
+ 	bool unpriv_array;
+-	struct user_struct *user;
+-	const struct bpf_map_ops *ops;
+-	struct work_struct work;
++	/* 7 bytes hole */
++
++	/* 2nd cacheline with misc members to avoid false sharing
++	 * particularly with refcounting.
++	 */
++	struct user_struct *user ____cacheline_aligned;
++	atomic_t refcnt;
+ 	atomic_t usercnt;
++	struct work_struct work;
+ };
+ 
+ struct bpf_map_type_list {
+-- 
+2.7.4
+