common/recipes-kernel/linux/linux-yocto-4.9.21/0098-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222

From fa0b0b09af62c9157bb3bcc893a175dcd7034197 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 29 Jan 2018 02:48:56 +0100
Subject: [PATCH 098/103] bpf: introduce BPF_JIT_ALWAYS_ON config

[ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ]

The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715.

A quote from goolge project zero blog:
"At this point, it would normally be necessary to locate gadgets in
the host kernel code that can be used to actually leak data by reading
from an attacker-controlled location, shifting and masking the result
appropriately and then using the result of that as offset to an
attacker-controlled address for a load. But piecing gadgets together
and figuring out which ones work in a speculation context seems annoying.
So instead, we decided to use the eBPF interpreter, which is built into
the host kernel - while there is no legitimate way to invoke it from inside
a VM, the presence of the code in the host kernel's text section is sufficient
to make it usable for the attack, just like with ordinary ROP gadgets."

To make attacker job harder introduce BPF_JIT_ALWAYS_ON config
option that removes interpreter from the kernel in favor of JIT-only mode.
So far eBPF JIT is supported by:
x64, arm64, arm32, sparc64, s390, powerpc64, mips64

The start of JITed program is randomized and code page is marked as read-only.
In addition "constant blinding" can be turned on with net.core.bpf_jit_harden

v2->v3:
- move __bpf_prog_ret0 under ifdef (Daniel)

v1->v2:
- fix init order, test_bpf and cBPF (Daniel's feedback)
- fix offloaded bpf (Jakub's feedback)
- add 'return 0' dummy in case something can invoke prog->bpf_func
- retarget bpf tree. For bpf-next the patch would need one extra hunk.
  It will be sent when the trees are merged back to net-next

Considered doing:
  int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT;
but it seems better to land the patch as-is and in bpf-next remove
bpf_jit_enable global variable from all JITs, consolidate in one place
and remove this jit_init() function.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 init/Kconfig               |  7 +++++++
 kernel/bpf/core.c          | 18 ++++++++++++++++++
 lib/test_bpf.c             | 11 +++++++----
 net/core/filter.c          |  6 ++----
 net/core/sysctl_net_core.c |  6 ++++++
 net/socket.c               |  9 +++++++++
 6 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 7f69e2e..e9b989c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1634,6 +1634,13 @@ config BPF_SYSCALL
 	  Enable the bpf() system call that allows to manipulate eBPF
 	  programs and maps via file descriptors.
 
+config BPF_JIT_ALWAYS_ON
+	bool "Permanently enable BPF JIT and remove BPF interpreter"
+	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+	help
+	  Enables BPF JIT and removes BPF interpreter to avoid
+	  speculative execution of BPF instructions by the interpreter
+
 config SHMEM
 	bool "Use full shmem filesystem" if EXPERT
 	default y
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ab9576b..64c4b13 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -458,6 +458,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 EXPORT_SYMBOL_GPL(__bpf_call_base);
 
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
  *	__bpf_prog_run - run eBPF program on a given context
  *	@ctx: is the data we are operating on
@@ -923,6 +924,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 }
 STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
 
+#else
+static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn)
+{
+	return 0;
+}
+#endif
+
 bool bpf_prog_array_compatible(struct bpf_array *array,
 			       const struct bpf_prog *fp)
 {
@@ -970,7 +978,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	fp->bpf_func = (void *) __bpf_prog_run;
+#else
+	fp->bpf_func = (void *) __bpf_prog_ret0;
+#endif
 
 	/* eBPF JITs can rewrite the program in case constant
 	 * blinding is active. However, in case of error during
@@ -979,6 +991,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 	 * be JITed, but falls back to the interpreter.
 	 */
 	fp = bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	if (!fp->jited) {
+		*err = -ENOTSUPP;
+		return fp;
+	}
+#endif
 	bpf_prog_lock_ro(fp);
 
 	/* The tail call compatibility check can only be done at
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 0362da0..158b4a3 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5601,9 +5601,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
 				return NULL;
 			}
 		}
-		/* We don't expect to fail. */
 		if (*err) {
-			pr_cont("FAIL to attach err=%d len=%d\n",
+			pr_cont("FAIL to prog_create err=%d len=%d\n",
 				*err, fprog.len);
 			return NULL;
 		}
@@ -5626,6 +5625,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
 		 * checks.
 		 */
 		fp = bpf_prog_select_runtime(fp, err);
+		if (*err) {
+			pr_cont("FAIL to select_runtime err=%d\n", *err);
+			return NULL;
+		}
 		break;
 	}
 
@@ -5811,8 +5814,8 @@ static __init int test_bpf(void)
 				pass_cnt++;
 				continue;
 			}
-
-			return err;
+			err_cnt++;
+			continue;
 		}
 
 		pr_cont("jited:%u ", fp->jited);
diff --git a/net/core/filter.c b/net/core/filter.c
index b391209..c066b00 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1005,11 +1005,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 		 */
 		goto out_err_free;
 
-	/* We are guaranteed to never error here with cBPF to eBPF
-	 * transitions, since there's no issue with type compatibility
-	 * checks on program arrays.
-	 */
 	fp = bpf_prog_select_runtime(fp, &err);
+	if (err)
+		goto out_err_free;
 
 	kfree(old_prog);
 	return fp;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0df2aa6..9955d6d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = {
 		.data		= &bpf_jit_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 		.proc_handler	= proc_dointvec
+#else
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+#endif
 	},
 # ifdef CONFIG_HAVE_EBPF_JIT
 	{
diff --git a/net/socket.c b/net/socket.c
index 6bbccf0..8a9624e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2547,6 +2547,15 @@ static int __init sock_init(void)
 
 core_initcall(sock_init);	/* early initcall */
 
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	bpf_jit_enable = 1;
+#endif
+	return 0;
+}
+pure_initcall(jit_init);
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
-- 
2.7.4