aboutsummaryrefslogtreecommitdiffstats
path: root/features/seccomp/seccomp-add-SECCOMP_RET_ERRNO.patch
blob: 2bb5adcf0e06a303f0c71b4509da8675aeaa6086 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
From dbb9ea8331cefce3fe15499126a7a1d29beb5d70 Mon Sep 17 00:00:00 2001
From: Will Drewry <wad@chromium.org>
Date: Thu, 12 Apr 2012 16:47:59 -0500
Subject: [PATCH] seccomp: add SECCOMP_RET_ERRNO

commit acf3b2c71ed20c53dc69826683417703c2a88059 upstream.

This change adds the SECCOMP_RET_ERRNO as a valid return value from a
seccomp filter.  Additionally, it makes the first use of the lower
16-bits for storing a filter-supplied errno.  16-bits is more than
enough for the errno-base.h calls.

Returning errors instead of immediately terminating processes that
violate seccomp policy allow for broader use of this functionality
for kernel attack surface reduction.  For example, a linux container
could maintain a whitelist of pre-existing system calls but drop
all new ones with errnos.  This would keep a logically static attack
surface while providing errnos that may allow for graceful failure
without the downside of do_exit() on a bad call.

This change also changes the signature of __secure_computing.  It
appears the only direct caller is the arm entry code and it clobbers
any possible return value (register) immediately.

Signed-off-by: Will Drewry <wad@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Eric Paris <eparis@redhat.com>

v18: - fix up comments and rebase
     - fix bad var name which was fixed in later revs
     - remove _int() and just change the __secure_computing signature
v16-v17: ...
v15: - use audit_seccomp and add a skip label. (eparis@redhat.com)
     - clean up and pad out return codes (indan@nul.nu)
v14: - no change/rebase
v13: - rebase on to 88ebdda6159ffc15699f204c33feb3e431bf9bdc
v12: - move to WARN_ON if filter is NULL
       (oleg@redhat.com, luto@mit.edu, keescook@chromium.org)
     - return immediately for filter==NULL (keescook@chromium.org)
     - change evaluation to only compare the ACTION so that layered
       errnos don't result in the lowest one being returned.
       (keeschook@chromium.org)
v11: - check for NULL filter (keescook@chromium.org)
v10: - change loaders to fn
 v9: - n/a
 v8: - update Kconfig to note new need for syscall_set_return_value.
     - reordered such that TRAP behavior follows on later.
     - made the for loop a little less indent-y
 v7: - introduced
Signed-off-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/Kconfig            |    6 ++++--
 include/linux/seccomp.h |   10 ++++++----
 kernel/seccomp.c        |   42 ++++++++++++++++++++++++++++++++----------
 3 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 91c2c73..beaab68 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -220,8 +220,10 @@ config HAVE_ARCH_SECCOMP_FILTER
 	bool
 	help
 	  This symbol should be selected by an architecure if it provides
-	  asm/syscall.h, specifically syscall_get_arguments() and
-	  syscall_get_arch().
+	  asm/syscall.h, specifically syscall_get_arguments(),
+	  syscall_get_arch(), and syscall_set_return_value().  Additionally,
+	  its system call entry path must respect a return value of -1 from
+	  __secure_computing() and/or secure_computing().
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 86bb68f..b4ce2c8 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -12,13 +12,14 @@
 
 /*
  * All BPF programs must return a 32-bit value.
- * The bottom 16-bits are reserved for future use.
+ * The bottom 16-bits are for optional return data.
  * The upper 16-bits are ordered from least permissive values to most.
  *
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
 #define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
@@ -64,11 +65,12 @@ struct seccomp {
 	struct seccomp_filter *filter;
 };
 
-extern void __secure_computing(int);
-static inline void secure_computing(int this_syscall)
+extern int __secure_computing(int);
+static inline int secure_computing(int this_syscall)
 {
 	if (unlikely(test_thread_flag(TIF_SECCOMP)))
-		__secure_computing(this_syscall);
+		return  __secure_computing(this_syscall);
+	return 0;
 }
 
 extern long prctl_get_seccomp(void);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 0f7c709..5f78fb6 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -199,15 +199,20 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 static u32 seccomp_run_filters(int syscall)
 {
 	struct seccomp_filter *f;
-	u32 ret = SECCOMP_RET_KILL;
+	u32 ret = SECCOMP_RET_ALLOW;
+
+	/* Ensure unexpected behavior doesn't result in failing open. */
+	if (WARN_ON(current->seccomp.filter == NULL))
+		return SECCOMP_RET_KILL;
+
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
-	 * value always takes priority.
+	 * value always takes priority (ignoring the DATA).
 	 */
 	for (f = current->seccomp.filter; f; f = f->prev) {
-		ret = sk_run_filter(NULL, f->insns);
-		if (ret != SECCOMP_RET_ALLOW)
-			break;
+		u32 cur_ret = sk_run_filter(NULL, f->insns);
+		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
+			ret = cur_ret;
 	}
 	return ret;
 }
@@ -346,11 +351,13 @@ static int mode1_syscalls_32[] = {
 };
 #endif
 
-void __secure_computing(int this_syscall)
+int __secure_computing(int this_syscall)
 {
 	int mode = current->seccomp.mode;
 	int exit_sig = 0;
 	int *syscall;
+	u32 ret = SECCOMP_RET_KILL;
+	int data;
 
 	switch (mode) {
 	case SECCOMP_MODE_STRICT:
@@ -361,14 +368,26 @@ void __secure_computing(int this_syscall)
 #endif
 		do {
 			if (*syscall == this_syscall)
-				return;
+				return 0;
 		} while (*++syscall);
 		exit_sig = SIGKILL;
 		break;
 #ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER:
-		if (seccomp_run_filters(this_syscall) == SECCOMP_RET_ALLOW)
-			return;
+		ret = seccomp_run_filters(this_syscall);
+		data = ret & SECCOMP_RET_DATA;
+		switch (ret & SECCOMP_RET_ACTION) {
+		case SECCOMP_RET_ERRNO:
+			/* Set the low-order 16-bits as a errno. */
+			syscall_set_return_value(current, task_pt_regs(current),
+						 -data, 0);
+			goto skip;
+		case SECCOMP_RET_ALLOW:
+			return 0;
+		case SECCOMP_RET_KILL:
+		default:
+			break;
+		}
 		exit_sig = SIGSYS;
 		break;
 #endif
@@ -379,8 +398,11 @@ void __secure_computing(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	audit_seccomp(this_syscall, exit_code, SECCOMP_RET_KILL);
+	audit_seccomp(this_syscall, exit_sig, ret);
 	do_exit(exit_sig);
+skip:
+	audit_seccomp(this_syscall, exit_sig, ret);
+	return -1;
 }
 
 long prctl_get_seccomp(void)
-- 
1.7.9.1