aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch
blob: 01f7310c20c8c156e184654db11ec64e1e2d46b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
From 4e010256639fdd9c87743dc7c7ad6a53bc96c1af Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 5 Sep 2017 12:05:01 -0700
Subject: [PATCH 006/102] kaiser: do not set _PAGE_NX on pgd_none

native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must
avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry:
usually that just generated a warning on exit, but sometimes
more mysterious and damaging failures (our production machines
could not complete booting).

The original fix to this just avoided adding _PAGE_NX to
an empty entry; but eventually more problems surfaced with kexec,
and EFI mapping expected to be a problem too.  So now instead
change native_set_pgd() to update shadow only if _PAGE_USER:

A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure)
use set_pgd() to set up a temporary internal virtual address space, with
physical pages remapped at what Kaiser regards as userspace addresses:
Kaiser then assumes a shadow pgd follows, which it will try to corrupt.

This appears to be responsible for the recent kexec and kdump failures;
though it's unclear how those did not manifest as a problem before.
Ah, the shadow pgd will only be assumed to "follow" if the requested
pgd is on an even-numbered page: so I suppose it was going wrong 50%
of the time all along.

What we need is a flag to set_pgd(), to tell it we're dealing with
userspace.  Er, isn't that what the pgd's _PAGE_USER bit is saying?
Add a test for that.  But we cannot do the same for pgd_clear()
(which may be called to clear corrupted entries - set aside the
question of "corrupt in which pgd?" until later), so there just
rely on pgd_clear() not being called in the problematic cases -
with a WARN_ON_ONCE() which should fire half the time if it is.

But this is getting too big for an inline function: move it into
arch/x86/mm/kaiser.c (which then demands a boot/compressed mod);
and de-void and de-space native_get_shadow/normal_pgd() while here.

Also make an unnecessary change to KASLR's init_trampoline(): it was
using set_pgd() to assign a pgd-value to a global variable (not in a
pg directory page), which was rather scary given Kaiser's previous
set_pgd() implementation: not a problem now, but too scary to leave
as was, it could easily blow up if we have to change set_pgd() again.

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/boot/compressed/misc.h   |  1 +
 arch/x86/include/asm/pgtable_64.h | 51 ++++++++++-----------------------------
 arch/x86/mm/kaiser.c              | 42 ++++++++++++++++++++++++++++++++
 arch/x86/mm/kaslr.c               |  4 +--
 4 files changed, 58 insertions(+), 40 deletions(-)

diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 1c8355e..cd80024 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -9,6 +9,7 @@
  */
 #undef CONFIG_PARAVIRT
 #undef CONFIG_PARAVIRT_SPINLOCKS
+#undef CONFIG_KAISER
 #undef CONFIG_KASAN
 
 #include <linux/linkage.h>
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 000265c..177caf3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_t *pud)
 }
 
 #ifdef CONFIG_KAISER
-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
+extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
+
+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
 {
-	return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
+	return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
 }
 
-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
+static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
 {
-	return (pgd_t *)(void*)((unsigned long)(void*)pgdp &  ~(unsigned long)PAGE_SIZE);
+	return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
 }
 #else
-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
+static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	return pgd;
+}
+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
 {
 	BUILD_BUG_ON(1);
 	return NULL;
 }
-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
+static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
 {
 	return pgdp;
 }
 #endif /* CONFIG_KAISER */
 
-/*
- * Page table pages are page-aligned.  The lower half of the top
- * level is used for userspace and the top half for the kernel.
- * This returns true for user pages that need to get copied into
- * both the user and kernel copies of the page tables, and false
- * for kernel pages that should only be in the kernel copy.
- */
-static inline bool is_userspace_pgd(void *__ptr)
-{
-	unsigned long ptr = (unsigned long)__ptr;
-
-	return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
-}
-
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
-#ifdef CONFIG_KAISER
-	pteval_t extra_kern_pgd_flags = 0;
-	/* Do we need to also populate the shadow pgd? */
-	if (is_userspace_pgd(pgdp)) {
-		native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
-		/*
-		 * Even if the entry is *mapping* userspace, ensure
-		 * that userspace can not use it.  This way, if we
-		 * get out to userspace running on the kernel CR3,
-		 * userspace will crash instead of running.
-		 */
-		extra_kern_pgd_flags = _PAGE_NX;
-	}
-	pgdp->pgd = pgd.pgd;
-	pgdp->pgd |= extra_kern_pgd_flags;
-#else /* CONFIG_KAISER */
-	*pgdp = pgd;
-#endif
+	*pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
 }
 
 static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index 7270a29..8d6061c 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -302,4 +302,46 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size)
 		unmap_pud_range_nofree(pgd, addr, end);
 	}
 }
+
+/*
+ * Page table pages are page-aligned.  The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ * This returns true for user pages that need to get copied into
+ * both the user and kernel copies of the page tables, and false
+ * for kernel pages that should only be in the kernel copy.
+ */
+static inline bool is_userspace_pgd(pgd_t *pgdp)
+{
+	return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
+}
+
+pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	/*
+	 * Do we need to also populate the shadow pgd?  Check _PAGE_USER to
+	 * skip cases like kexec and EFI which make temporary low mappings.
+	 */
+	if (pgd.pgd & _PAGE_USER) {
+		if (is_userspace_pgd(pgdp)) {
+			native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+			/*
+			 * Even if the entry is *mapping* userspace, ensure
+			 * that userspace can not use it.  This way, if we
+			 * get out to userspace running on the kernel CR3,
+			 * userspace will crash instead of running.
+			 */
+			pgd.pgd |= _PAGE_NX;
+		}
+	} else if (!pgd.pgd) {
+		/*
+		 * pgd_clear() cannot check _PAGE_USER, and is even used to
+		 * clear corrupted pgd entries: so just rely on cases like
+		 * kexec and EFI never to be using pgd_clear().
+		 */
+		if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
+		    is_userspace_pgd(pgdp))
+			native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+	}
+	return pgd;
+}
 #endif /* CONFIG_KAISER */
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aed2064..9284ec1 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -189,6 +189,6 @@ void __meminit init_trampoline(void)
 		*pud_tramp = *pud;
 	}
 
-	set_pgd(&trampoline_pgd_entry,
-		__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+	/* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */
+	trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
 }
-- 
2.7.4