diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch new file mode 100644 index 00000000..01f7310c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch @@ -0,0 +1,212 @@ +From 4e010256639fdd9c87743dc7c7ad6a53bc96c1af Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Tue, 5 Sep 2017 12:05:01 -0700 +Subject: [PATCH 006/102] kaiser: do not set _PAGE_NX on pgd_none + +native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must +avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry: +usually that just generated a warning on exit, but sometimes +more mysterious and damaging failures (our production machines +could not complete booting). + +The original fix to this just avoided adding _PAGE_NX to +an empty entry; but eventually more problems surfaced with kexec, +and EFI mapping expected to be a problem too. So now instead +change native_set_pgd() to update shadow only if _PAGE_USER: + +A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure) +use set_pgd() to set up a temporary internal virtual address space, with +physical pages remapped at what Kaiser regards as userspace addresses: +Kaiser then assumes a shadow pgd follows, which it will try to corrupt. + +This appears to be responsible for the recent kexec and kdump failures; +though it's unclear how those did not manifest as a problem before. +Ah, the shadow pgd will only be assumed to "follow" if the requested +pgd is on an even-numbered page: so I suppose it was going wrong 50% +of the time all along. + +What we need is a flag to set_pgd(), to tell it we're dealing with +userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying? +Add a test for that. But we cannot do the same for pgd_clear() +(which may be called to clear corrupted entries - set aside the +question of "corrupt in which pgd?" until later), so there just +rely on pgd_clear() not being called in the problematic cases - +with a WARN_ON_ONCE() which should fire half the time if it is. + +But this is getting too big for an inline function: move it into +arch/x86/mm/kaiser.c (which then demands a boot/compressed mod); +and de-void and de-space native_get_shadow/normal_pgd() while here. + +Also make an unnecessary change to KASLR's init_trampoline(): it was +using set_pgd() to assign a pgd-value to a global variable (not in a +pg directory page), which was rather scary given Kaiser's previous +set_pgd() implementation: not a problem now, but too scary to leave +as was, it could easily blow up if we have to change set_pgd() again. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/boot/compressed/misc.h | 1 + + arch/x86/include/asm/pgtable_64.h | 51 ++++++++++----------------------------- + arch/x86/mm/kaiser.c | 42 ++++++++++++++++++++++++++++++++ + arch/x86/mm/kaslr.c | 4 +-- + 4 files changed, 58 insertions(+), 40 deletions(-) + +diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h +index 1c8355e..cd80024 100644 +--- a/arch/x86/boot/compressed/misc.h ++++ b/arch/x86/boot/compressed/misc.h +@@ -9,6 +9,7 @@ + */ + #undef CONFIG_PARAVIRT + #undef CONFIG_PARAVIRT_SPINLOCKS ++#undef CONFIG_KAISER + #undef CONFIG_KASAN + + #include <linux/linkage.h> +diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h +index 000265c..177caf3 100644 +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_t *pud) + } + + #ifdef CONFIG_KAISER +-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) ++extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); ++ ++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) + { +- return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); ++ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); + } + +-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) ++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) + { +- return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); ++ return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); + } + #else +-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) ++static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) ++{ ++ return pgd; ++} ++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) + { + BUILD_BUG_ON(1); + return NULL; + } +-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) ++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) + { + return pgdp; + } + #endif /* CONFIG_KAISER */ + +-/* +- * Page table pages are page-aligned. The lower half of the top +- * level is used for userspace and the top half for the kernel. +- * This returns true for user pages that need to get copied into +- * both the user and kernel copies of the page tables, and false +- * for kernel pages that should only be in the kernel copy. +- */ +-static inline bool is_userspace_pgd(void *__ptr) +-{ +- unsigned long ptr = (unsigned long)__ptr; +- +- return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); +-} +- + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) + { +-#ifdef CONFIG_KAISER +- pteval_t extra_kern_pgd_flags = 0; +- /* Do we need to also populate the shadow pgd? */ +- if (is_userspace_pgd(pgdp)) { +- native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; +- /* +- * Even if the entry is *mapping* userspace, ensure +- * that userspace can not use it. This way, if we +- * get out to userspace running on the kernel CR3, +- * userspace will crash instead of running. +- */ +- extra_kern_pgd_flags = _PAGE_NX; +- } +- pgdp->pgd = pgd.pgd; +- pgdp->pgd |= extra_kern_pgd_flags; +-#else /* CONFIG_KAISER */ +- *pgdp = pgd; +-#endif ++ *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); + } + + static inline void native_pgd_clear(pgd_t *pgd) +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 7270a29..8d6061c 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -302,4 +302,46 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) + unmap_pud_range_nofree(pgd, addr, end); + } + } ++ ++/* ++ * Page table pages are page-aligned. The lower half of the top ++ * level is used for userspace and the top half for the kernel. ++ * This returns true for user pages that need to get copied into ++ * both the user and kernel copies of the page tables, and false ++ * for kernel pages that should only be in the kernel copy. ++ */ ++static inline bool is_userspace_pgd(pgd_t *pgdp) ++{ ++ return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2); ++} ++ ++pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) ++{ ++ /* ++ * Do we need to also populate the shadow pgd? Check _PAGE_USER to ++ * skip cases like kexec and EFI which make temporary low mappings. ++ */ ++ if (pgd.pgd & _PAGE_USER) { ++ if (is_userspace_pgd(pgdp)) { ++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; ++ /* ++ * Even if the entry is *mapping* userspace, ensure ++ * that userspace can not use it. This way, if we ++ * get out to userspace running on the kernel CR3, ++ * userspace will crash instead of running. ++ */ ++ pgd.pgd |= _PAGE_NX; ++ } ++ } else if (!pgd.pgd) { ++ /* ++ * pgd_clear() cannot check _PAGE_USER, and is even used to ++ * clear corrupted pgd entries: so just rely on cases like ++ * kexec and EFI never to be using pgd_clear(). ++ */ ++ if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) && ++ is_userspace_pgd(pgdp)) ++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; ++ } ++ return pgd; ++} + #endif /* CONFIG_KAISER */ +diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c +index aed2064..9284ec1 100644 +--- a/arch/x86/mm/kaslr.c ++++ b/arch/x86/mm/kaslr.c +@@ -189,6 +189,6 @@ void __meminit init_trampoline(void) + *pud_tramp = *pud; + } + +- set_pgd(&trampoline_pgd_entry, +- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); ++ /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */ ++ trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); + } +-- +2.7.4 + |