diff options
Diffstat (limited to 'common/recipes-kernel/linux')
171 files changed, 22028 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-compile-error-without-vsyscall.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-compile-error-without-vsyscall.patch new file mode 100644 index 00000000..bb09930a --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-compile-error-without-vsyscall.patch @@ -0,0 +1,50 @@ +From 0f1e01960c3e082feac098be5b754ad3e06c820a Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Tue, 13 Feb 2018 16:45:20 +0100 +Subject: [PATCH 01/12] kaiser: fix compile error without vsyscall +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Tobias noticed a compile error on 4.4.115, and it's the same on 4.9.80: +arch/x86/mm/kaiser.c: In function ‘kaiser_init’: +arch/x86/mm/kaiser.c:348:8: error: ‘vsyscall_pgprot’ undeclared + (first use in this function) + +It seems like his combination of kernel options doesn't work for KAISER. +X86_VSYSCALL_EMULATION is not set on his system, while LEGACY_VSYSCALL +is set to NONE (LEGACY_VSYSCALL_NONE=y). He managed to get things +compiling again, by moving the 'extern unsigned long vsyscall_pgprot' +outside of the preprocessor statement. This works because the optimizer +removes that code (vsyscall_enabled() is always false) - and that's how +it was done in some older backports. + +Reported-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/vsyscall.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h +index 9ee8506..62210da 100644 +--- a/arch/x86/include/asm/vsyscall.h ++++ b/arch/x86/include/asm/vsyscall.h +@@ -13,7 +13,6 @@ extern void map_vsyscall(void); + */ + extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); + extern bool vsyscall_enabled(void); +-extern unsigned long vsyscall_pgprot; + #else + static inline void map_vsyscall(void) {} + static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +@@ -22,5 +21,6 @@ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) + } + static inline bool vsyscall_enabled(void) { return false; } + #endif ++extern unsigned long vsyscall_pgprot; + + #endif /* _ASM_X86_VSYSCALL_H */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-intel_bts-perf-crashes.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-intel_bts-perf-crashes.patch new file mode 100644 index 00000000..3e53e978 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-intel_bts-perf-crashes.patch @@ -0,0 +1,135 @@ +From f07b0b948b09b02e7386560ad509d1afdbd6ef0b Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Mon, 29 Jan 2018 18:16:55 -0800 +Subject: [PATCH 01/42] kaiser: fix intel_bts perf crashes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Vince reported perf_fuzzer quickly locks up on 4.15-rc7 with PTI; +Robert reported Bad RIP with KPTI and Intel BTS also on 4.15-rc7: +honggfuzz -f /tmp/somedirectorywithatleastonefile \ + --linux_perf_bts_edge -s -- /bin/true +(honggfuzz from https://github.com/google/honggfuzz) crashed with +BUG: unable to handle kernel paging request at ffff9d3215100000 +(then narrowed it down to +perf record --per-thread -e intel_bts//u -- /bin/ls). + +The intel_bts driver does not use the 'normal' BTS buffer which is +exposed through kaiser_add_mapping(), but instead uses the memory +allocated for the perf AUX buffer. + +This obviously comes apart when using PTI, because then the kernel +mapping, which includes that AUX buffer memory, disappears while +switched to user page tables. + +Easily fixed in old-Kaiser backports, by applying kaiser_add_mapping() +to those pages; perhaps not so easy for upstream, where 4.15-rc8 commit +99a9dc98ba52 ("x86,perf: Disable intel_bts when PTI") disables for now. + +Slightly reorganized surrounding code in bts_buffer_setup_aux(), +so it can better match bts_buffer_free_aux(): free_aux with an #ifdef +to avoid the loop when PTI is off, but setup_aux needs to loop anyway +(and kaiser_add_mapping() is cheap when PTI config is off or "pti=off"). + +Reported-by: Vince Weaver <vincent.weaver@maine.edu> +Reported-by: Robert Święcki <robert@swiecki.net> +Analyzed-by: Peter Zijlstra <peterz@infradead.org> +Analyzed-by: Stephane Eranian <eranian@google.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Ingo Molnar <mingo@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Vince Weaver <vince@deater.net> +Cc: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/events/intel/bts.c | 44 +++++++++++++++++++++++++++++++++----------- + 1 file changed, 33 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c +index 982c9e3..21298c1 100644 +--- a/arch/x86/events/intel/bts.c ++++ b/arch/x86/events/intel/bts.c +@@ -22,6 +22,7 @@ + #include <linux/debugfs.h> + #include <linux/device.h> + #include <linux/coredump.h> ++#include <linux/kaiser.h> + + #include <asm-generic/sizes.h> + #include <asm/perf_event.h> +@@ -77,6 +78,23 @@ static size_t buf_size(struct page *page) + return 1 << (PAGE_SHIFT + page_private(page)); + } + ++static void bts_buffer_free_aux(void *data) ++{ ++#ifdef CONFIG_PAGE_TABLE_ISOLATION ++ struct bts_buffer *buf = data; ++ int nbuf; ++ ++ for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { ++ struct page *page = buf->buf[nbuf].page; ++ void *kaddr = page_address(page); ++ size_t page_size = buf_size(page); ++ ++ kaiser_remove_mapping((unsigned long)kaddr, page_size); ++ } ++#endif ++ kfree(data); ++} ++ + static void * + bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) + { +@@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) + buf->real_size = size - size % BTS_RECORD_SIZE; + + for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { +- unsigned int __nr_pages; ++ void *kaddr = pages[pg]; ++ size_t page_size; ++ ++ page = virt_to_page(kaddr); ++ page_size = buf_size(page); ++ ++ if (kaiser_add_mapping((unsigned long)kaddr, ++ page_size, __PAGE_KERNEL) < 0) { ++ buf->nr_bufs = nbuf; ++ bts_buffer_free_aux(buf); ++ return NULL; ++ } + +- page = virt_to_page(pages[pg]); +- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + buf->buf[nbuf].page = page; + buf->buf[nbuf].offset = offset; + buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); +- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; ++ buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; + pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; + buf->buf[nbuf].size -= pad; + +- pg += __nr_pages; +- offset += __nr_pages << PAGE_SHIFT; ++ pg += page_size >> PAGE_SHIFT; ++ offset += page_size; + } + + return buf; + } + +-static void bts_buffer_free_aux(void *data) +-{ +- kfree(data); +-} +- + static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) + { + return buf->buf[idx].offset + buf->buf[idx].displacement; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch new file mode 100644 index 00000000..cca9a97c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch @@ -0,0 +1,183 @@ +From c1a85d38da405ddd17f7f5255a36405bf3414e60 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Mon, 17 Jul 2017 16:10:33 -0500 +Subject: [PATCH 001/102] x86/boot: Add early cmdline parsing for options with + arguments +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit e505371dd83963caae1a37ead9524e8d997341be upstream. + +Add a cmdline_find_option() function to look for cmdline options that +take arguments. The argument is returned in a supplied buffer and the +argument length (regardless of whether it fits in the supplied buffer) +is returned, with -1 indicating not found. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Alexander Potapenko <glider@google.com> +Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arnd Bergmann <arnd@arndb.de> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brijesh Singh <brijesh.singh@amd.com> +Cc: Dave Young <dyoung@redhat.com> +Cc: Dmitry Vyukov <dvyukov@google.com> +Cc: Jonathan Corbet <corbet@lwn.net> +Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Larry Woodman <lwoodman@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Matt Fleming <matt@codeblueprint.co.uk> +Cc: Michael S. Tsirkin <mst@redhat.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: Rik van Riel <riel@redhat.com> +Cc: Toshimitsu Kani <toshi.kani@hpe.com> +Cc: kasan-dev@googlegroups.com +Cc: kvm@vger.kernel.org +Cc: linux-arch@vger.kernel.org +Cc: linux-doc@vger.kernel.org +Cc: linux-efi@vger.kernel.org +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cmdline.h | 2 + + arch/x86/lib/cmdline.c | 105 +++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 107 insertions(+) + +diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h +index e01f7f7..84ae170 100644 +--- a/arch/x86/include/asm/cmdline.h ++++ b/arch/x86/include/asm/cmdline.h +@@ -2,5 +2,7 @@ + #define _ASM_X86_CMDLINE_H + + int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); ++int cmdline_find_option(const char *cmdline_ptr, const char *option, ++ char *buffer, int bufsize); + + #endif /* _ASM_X86_CMDLINE_H */ +diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c +index 5cc78bf..3261abb 100644 +--- a/arch/x86/lib/cmdline.c ++++ b/arch/x86/lib/cmdline.c +@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, + return 0; /* Buffer overrun */ + } + ++/* ++ * Find a non-boolean option (i.e. option=argument). In accordance with ++ * standard Linux practice, if this option is repeated, this returns the ++ * last instance on the command line. ++ * ++ * @cmdline: the cmdline string ++ * @max_cmdline_size: the maximum size of cmdline ++ * @option: option string to look for ++ * @buffer: memory buffer to return the option argument ++ * @bufsize: size of the supplied memory buffer ++ * ++ * Returns the length of the argument (regardless of if it was ++ * truncated to fit in the buffer), or -1 on not found. ++ */ ++static int ++__cmdline_find_option(const char *cmdline, int max_cmdline_size, ++ const char *option, char *buffer, int bufsize) ++{ ++ char c; ++ int pos = 0, len = -1; ++ const char *opptr = NULL; ++ char *bufptr = buffer; ++ enum { ++ st_wordstart = 0, /* Start of word/after whitespace */ ++ st_wordcmp, /* Comparing this word */ ++ st_wordskip, /* Miscompare, skip */ ++ st_bufcpy, /* Copying this to buffer */ ++ } state = st_wordstart; ++ ++ if (!cmdline) ++ return -1; /* No command line */ ++ ++ /* ++ * This 'pos' check ensures we do not overrun ++ * a non-NULL-terminated 'cmdline' ++ */ ++ while (pos++ < max_cmdline_size) { ++ c = *(char *)cmdline++; ++ if (!c) ++ break; ++ ++ switch (state) { ++ case st_wordstart: ++ if (myisspace(c)) ++ break; ++ ++ state = st_wordcmp; ++ opptr = option; ++ /* fall through */ ++ ++ case st_wordcmp: ++ if ((c == '=') && !*opptr) { ++ /* ++ * We matched all the way to the end of the ++ * option we were looking for, prepare to ++ * copy the argument. ++ */ ++ len = 0; ++ bufptr = buffer; ++ state = st_bufcpy; ++ break; ++ } else if (c == *opptr++) { ++ /* ++ * We are currently matching, so continue ++ * to the next character on the cmdline. ++ */ ++ break; ++ } ++ state = st_wordskip; ++ /* fall through */ ++ ++ case st_wordskip: ++ if (myisspace(c)) ++ state = st_wordstart; ++ break; ++ ++ case st_bufcpy: ++ if (myisspace(c)) { ++ state = st_wordstart; ++ } else { ++ /* ++ * Increment len, but don't overrun the ++ * supplied buffer and leave room for the ++ * NULL terminator. ++ */ ++ if (++len < bufsize) ++ *bufptr++ = c; ++ } ++ break; ++ } ++ } ++ ++ if (bufsize) ++ *bufptr = '\0'; ++ ++ return len; ++} ++ + int cmdline_find_option_bool(const char *cmdline, const char *option) + { + return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + } ++ ++int cmdline_find_option(const char *cmdline, const char *option, char *buffer, ++ int bufsize) ++{ ++ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, ++ buffer, bufsize); ++} +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch new file mode 100644 index 00000000..db1a2245 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch @@ -0,0 +1,105 @@ +From 0b113edb84e5133f4844eeec2889faced402a41c Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Sat, 22 Apr 2017 00:01:20 -0700 +Subject: [PATCH 01/14] x86/mm: Remove flush_tlb() and flush_tlb_current_task() + +commit 29961b59a51f8c6838a26a45e871a7ed6771809b upstream. + +I was trying to figure out what how flush_tlb_current_task() would +possibly work correctly if current->mm != current->active_mm, but I +realized I could spare myself the effort: it has no callers except +the unused flush_tlb() macro. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Nadav Amit <namit@vmware.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/e52d64c11690f85e9f1d69d7b48cc2269cd2e94b.1492844372.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tlbflush.h | 9 --------- + arch/x86/mm/tlb.c | 17 ----------------- + 2 files changed, 26 deletions(-) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 183af59..db8952a 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -261,7 +261,6 @@ static inline void __flush_tlb_one(unsigned long addr) + /* + * TLB flushing: + * +- * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page +@@ -293,11 +292,6 @@ static inline void flush_tlb_all(void) + __flush_tlb_all(); + } + +-static inline void flush_tlb(void) +-{ +- __flush_tlb_up(); +-} +- + static inline void local_flush_tlb(void) + { + __flush_tlb_up(); +@@ -359,14 +353,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, + flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) + + extern void flush_tlb_all(void); +-extern void flush_tlb_current_task(void); + extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long vmflag); + extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); + +-#define flush_tlb() flush_tlb_current_task() +- + void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long start, unsigned long end); +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 0cf44ac..c045051 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -320,23 +320,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, + smp_call_function_many(cpumask, flush_tlb_func, &info, 1); + } + +-void flush_tlb_current_task(void) +-{ +- struct mm_struct *mm = current->mm; +- +- preempt_disable(); +- +- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); +- +- /* This is an implicit full barrier that synchronizes with switch_mm. */ +- local_flush_tlb(); +- +- trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); +- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) +- flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); +- preempt_enable(); +-} +- + /* + * See Documentation/x86/tlb.txt for details. We choose 33 + * because it is large enough to cover the vast majority (at +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch new file mode 100644 index 00000000..1006a947 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch @@ -0,0 +1,117 @@ +From 5b4a083e3f13f1bbea53075da6dc33b1e9dc3b62 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 5 Feb 2018 17:18:17 -0800 +Subject: [PATCH 02/12] x86/entry/64/compat: Clear registers for compat + syscalls, to reduce speculation attack surface + +commit 6b8cf5cc9965673951f1ab3f0e3cf23d06e3e2ee upstream. + +At entry userspace may have populated registers with values that could +otherwise be useful in a speculative execution attack. Clear them to +minimize the kernel's attack surface. + +Originally-From: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Cc: <stable@vger.kernel.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/151787989697.7847.4083702787288600552.stgit@dwillia2-desk3.amr.corp.intel.com +[ Made small improvements to the changelog. ] +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64_compat.S | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index d76a976..92c5573 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -83,15 +83,25 @@ ENTRY(entry_SYSENTER_compat) + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ + pushq $0 /* pt_regs->r8 = 0 */ ++ xorq %r8, %r8 /* nospec r8 */ + pushq $0 /* pt_regs->r9 = 0 */ ++ xorq %r9, %r9 /* nospec r9 */ + pushq $0 /* pt_regs->r10 = 0 */ ++ xorq %r10, %r10 /* nospec r10 */ + pushq $0 /* pt_regs->r11 = 0 */ ++ xorq %r11, %r11 /* nospec r11 */ + pushq %rbx /* pt_regs->rbx */ ++ xorl %ebx, %ebx /* nospec rbx */ + pushq %rbp /* pt_regs->rbp (will be overwritten) */ ++ xorl %ebp, %ebp /* nospec rbp */ + pushq $0 /* pt_regs->r12 = 0 */ ++ xorq %r12, %r12 /* nospec r12 */ + pushq $0 /* pt_regs->r13 = 0 */ ++ xorq %r13, %r13 /* nospec r13 */ + pushq $0 /* pt_regs->r14 = 0 */ ++ xorq %r14, %r14 /* nospec r14 */ + pushq $0 /* pt_regs->r15 = 0 */ ++ xorq %r15, %r15 /* nospec r15 */ + cld + + /* +@@ -209,15 +219,25 @@ ENTRY(entry_SYSCALL_compat) + pushq %rbp /* pt_regs->cx (stashed in bp) */ + pushq $-ENOSYS /* pt_regs->ax */ + pushq $0 /* pt_regs->r8 = 0 */ ++ xorq %r8, %r8 /* nospec r8 */ + pushq $0 /* pt_regs->r9 = 0 */ ++ xorq %r9, %r9 /* nospec r9 */ + pushq $0 /* pt_regs->r10 = 0 */ ++ xorq %r10, %r10 /* nospec r10 */ + pushq $0 /* pt_regs->r11 = 0 */ ++ xorq %r11, %r11 /* nospec r11 */ + pushq %rbx /* pt_regs->rbx */ ++ xorl %ebx, %ebx /* nospec rbx */ + pushq %rbp /* pt_regs->rbp (will be overwritten) */ ++ xorl %ebp, %ebp /* nospec rbp */ + pushq $0 /* pt_regs->r12 = 0 */ ++ xorq %r12, %r12 /* nospec r12 */ + pushq $0 /* pt_regs->r13 = 0 */ ++ xorq %r13, %r13 /* nospec r13 */ + pushq $0 /* pt_regs->r14 = 0 */ ++ xorq %r14, %r14 /* nospec r14 */ + pushq $0 /* pt_regs->r15 = 0 */ ++ xorq %r15, %r15 /* nospec r15 */ + + /* + * User mode is traced as though IRQs are on, and SYSENTER +@@ -320,15 +340,25 @@ ENTRY(entry_INT80_compat) + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ + pushq $0 /* pt_regs->r8 = 0 */ ++ xorq %r8, %r8 /* nospec r8 */ + pushq $0 /* pt_regs->r9 = 0 */ ++ xorq %r9, %r9 /* nospec r9 */ + pushq $0 /* pt_regs->r10 = 0 */ ++ xorq %r10, %r10 /* nospec r10 */ + pushq $0 /* pt_regs->r11 = 0 */ ++ xorq %r11, %r11 /* nospec r11 */ + pushq %rbx /* pt_regs->rbx */ ++ xorl %ebx, %ebx /* nospec rbx */ + pushq %rbp /* pt_regs->rbp */ ++ xorl %ebp, %ebp /* nospec rbp */ + pushq %r12 /* pt_regs->r12 */ ++ xorq %r12, %r12 /* nospec r12 */ + pushq %r13 /* pt_regs->r13 */ ++ xorq %r13, %r13 /* nospec r13 */ + pushq %r14 /* pt_regs->r14 */ ++ xorq %r14, %r14 /* nospec r14 */ + pushq %r15 /* pt_regs->r15 */ ++ xorq %r15, %r15 /* nospec r15 */ + cld + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch new file mode 100644 index 00000000..ca442137 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch @@ -0,0 +1,77 @@ +From 745a39ba56433b3a62b6c9fba45efa0038a0f19b Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 29 Jun 2017 08:53:20 -0700 +Subject: [PATCH 002/102] x86/mm: Add the 'nopcid' boot option to turn off PCID + +commit 0790c9aad84901ca1bdc14746175549c8b5da215 upstream. + +The parameter is only present on x86_64 systems to save a few bytes, +as PCID is always disabled on x86_32. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Reviewed-by: Nadav Amit <nadav.amit@gmail.com> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 2 ++ + arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++ + 2 files changed, 20 insertions(+) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 65b05ba..a303387 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2785,6 +2785,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + nopat [X86] Disable PAT (page attribute table extension of + pagetables) support. + ++ nopcid [X86-64] Disable the PCID cpu feature. ++ + norandmaps Don't use address space randomization. Equivalent to + echo 0 > /proc/sys/kernel/randomize_va_space + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 4eece91..81c8a53 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -163,6 +163,24 @@ static int __init x86_mpx_setup(char *s) + } + __setup("nompx", x86_mpx_setup); + ++#ifdef CONFIG_X86_64 ++static int __init x86_pcid_setup(char *s) ++{ ++ /* require an exact match without trailing characters */ ++ if (strlen(s)) ++ return 0; ++ ++ /* do not emit a message if the feature is not present */ ++ if (!boot_cpu_has(X86_FEATURE_PCID)) ++ return 1; ++ ++ setup_clear_cpu_cap(X86_FEATURE_PCID); ++ pr_info("nopcid: PCID feature disabled\n"); ++ return 1; ++} ++__setup("nopcid", x86_pcid_setup); ++#endif ++ + static int __init x86_noinvpcid_setup(char *s) + { + /* noinvpcid doesn't accept parameters */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch new file mode 100644 index 00000000..125c9159 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch @@ -0,0 +1,83 @@ +From d7185b4bc1a4bb697f514e447697bd535979dac3 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Sat, 22 Apr 2017 00:01:21 -0700 +Subject: [PATCH 02/14] x86/mm: Make flush_tlb_mm_range() more predictable + +commit ce27374fabf553153c3f53efcaa9bfab9216bd8c upstream. + +I'm about to rewrite the function almost completely, but first I +want to get a functional change out of the way. Currently, if +flush_tlb_mm_range() does not flush the local TLB at all, it will +never do individual page flushes on remote CPUs. This seems to be +an accident, and preserving it will be awkward. Let's change it +first so that any regressions in the rewrite will be easier to +bisect and so that the rewrite can attempt to change no visible +behavior at all. + +The fix is simple: we can simply avoid short-circuiting the +calculation of base_pages_to_flush. + +As a side effect, this also eliminates a potential corner case: if +tlb_single_page_flush_ceiling == TLB_FLUSH_ALL, flush_tlb_mm_range() +could have ended up flushing the entire address space one page at a +time. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Acked-by: Dave Hansen <dave.hansen@intel.com> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Nadav Amit <namit@vmware.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/4b29b771d9975aad7154c314534fec235618175a.1492844372.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/tlb.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index c045051..2f9d41f 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -340,6 +340,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long base_pages_to_flush = TLB_FLUSH_ALL; + + preempt_disable(); ++ ++ if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) ++ base_pages_to_flush = (end - start) >> PAGE_SHIFT; ++ if (base_pages_to_flush > tlb_single_page_flush_ceiling) ++ base_pages_to_flush = TLB_FLUSH_ALL; ++ + if (current->active_mm != mm) { + /* Synchronize with switch_mm. */ + smp_mb(); +@@ -356,15 +362,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + goto out; + } + +- if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) +- base_pages_to_flush = (end - start) >> PAGE_SHIFT; +- + /* + * Both branches below are implicit full barriers (MOV to CR or + * INVLPG) that synchronize with switch_mm. + */ +- if (base_pages_to_flush > tlb_single_page_flush_ceiling) { +- base_pages_to_flush = TLB_FLUSH_ALL; ++ if (base_pages_to_flush == TLB_FLUSH_ALL) { + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + local_flush_tlb(); + } else { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch new file mode 100644 index 00000000..730dc7cc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch @@ -0,0 +1,74 @@ +From 3474ee0a656102dc872ccffc8a80eeb87a9ce502 Mon Sep 17 00:00:00 2001 +From: Dave Hansen <dave.hansen@linux.intel.com> +Date: Mon, 29 Jan 2018 18:17:26 -0800 +Subject: [PATCH 02/42] x86/pti: Make unpoison of pgd for trusted boot work for + real + +commit 445b69e3b75e42362a5bdc13c8b8f61599e2228a upstream + +The inital fix for trusted boot and PTI potentially misses the pgd clearing +if pud_alloc() sets a PGD. It probably works in *practice* because for two +adjacent calls to map_tboot_page() that share a PGD entry, the first will +clear NX, *then* allocate and set the PGD (without NX clear). The second +call will *not* allocate but will clear the NX bit. + +Defer the NX clearing to a point after it is known that all top-level +allocations have occurred. Add a comment to clarify why. + +[ tglx: Massaged changelog ] + +[ hughd notes: I have not tested tboot, but this looks to me as necessary +and as safe in old-Kaiser backports as it is upstream; I'm not submitting +the commit-to-be-fixed 262b6b30087, since it was undone by 445b69e3b75e, +and makes conflict trouble because of 5-level's p4d versus 4-level's pgd.] + +Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled") +Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> +Cc: Jon Masters <jcm@redhat.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: peterz@infradead.org +Cc: ning.sun@intel.com +Cc: tboot-devel@lists.sourceforge.net +Cc: andi@firstfloor.org +Cc: luto@kernel.org +Cc: law@redhat.com +Cc: pbonzini@redhat.com +Cc: torvalds@linux-foundation.org +Cc: gregkh@linux-foundation.org +Cc: dwmw@amazon.co.uk +Cc: nickc@redhat.com +Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com +Cc: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/tboot.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c +index 8402907..21454e2 100644 +--- a/arch/x86/kernel/tboot.c ++++ b/arch/x86/kernel/tboot.c +@@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, + return -1; + set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); + pte_unmap(pte); ++ ++ /* ++ * PTI poisons low addresses in the kernel page tables in the ++ * name of making them unusable for userspace. To execute ++ * code at such a low address, the poison must be cleared. ++ * ++ * Note: 'pgd' actually gets set in pud_alloc(). ++ */ ++ pgd->pgd &= ~_PAGE_NX; ++ + return 0; + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch new file mode 100644 index 00000000..df60ee58 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch @@ -0,0 +1,69 @@ +From cff1c9cfd81b8a7cc350a02d37668b1e3896287e Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Mon, 29 Jan 2018 18:17:58 -0800 +Subject: [PATCH 03/42] kaiser: allocate pgd with order 0 when pti=off + +The 4.9.77 version of "x86/pti/efi: broken conversion from efi to kernel +page table" looked nicer than the 4.4.112 version, but was suboptimal on +machines booted with "pti=off" (or on AMD machines): it allocated pgd +with an order 1 page whatever the setting of kaiser_enabled. + +Fix that by moving the definition of PGD_ALLOCATION_ORDER from +asm/pgalloc.h to asm/pgtable.h, which already defines kaiser_enabled. + +Fixes: 1b92c48a2eeb ("x86/pti/efi: broken conversion from efi to kernel page table") +Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com> +Cc: Steven Sistare <steven.sistare@oracle.com> +Cc: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/pgalloc.h | 11 ----------- + arch/x86/include/asm/pgtable.h | 6 ++++++ + 2 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h +index 1178a51..b6d4259 100644 +--- a/arch/x86/include/asm/pgalloc.h ++++ b/arch/x86/include/asm/pgalloc.h +@@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} + */ + extern gfp_t __userpte_alloc_gfp; + +-#ifdef CONFIG_PAGE_TABLE_ISOLATION +-/* +- * Instead of one PGD, we acquire two PGDs. Being order-1, it is +- * both 8k in size and 8k-aligned. That lets us just flip bit 12 +- * in a pointer to swap between the two 4k halves. +- */ +-#define PGD_ALLOCATION_ORDER 1 +-#else +-#define PGD_ALLOCATION_ORDER 0 +-#endif +- + /* + * Allocate and free page tables. + */ +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 2536f90..5af0401 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -20,9 +20,15 @@ + + #ifdef CONFIG_PAGE_TABLE_ISOLATION + extern int kaiser_enabled; ++/* ++ * Instead of one PGD, we acquire two PGDs. Being order-1, it is ++ * both 8k in size and 8k-aligned. That lets us just flip bit 12 ++ * in a pointer to swap between the two 4k halves. ++ */ + #else + #define kaiser_enabled 0 + #endif ++#define PGD_ALLOCATION_ORDER kaiser_enabled + + void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); + void ptdump_walk_pgd_level_checkwx(void); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch new file mode 100644 index 00000000..52c9e9af --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch @@ -0,0 +1,114 @@ +From 10a3a93d1447313c0a4509ae140a0d0aa1e9acb6 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 29 Jun 2017 08:53:21 -0700 +Subject: [PATCH 003/102] x86/mm: Enable CR4.PCIDE on supported systems + +commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream. + +We can use PCID if the CPU has PCID and PGE and we're not on Xen. + +By itself, this has no effect. A followup patch will start using PCID. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Reviewed-by: Nadav Amit <nadav.amit@gmail.com> +Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Juergen Gross <jgross@suse.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tlbflush.h | 8 ++++++++ + arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++ + arch/x86/xen/enlighten.c | 6 ++++++ + 3 files changed, 36 insertions(+) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index fc5abff..c13041e 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -192,6 +192,14 @@ static inline void __flush_tlb_all(void) + __flush_tlb_global(); + else + __flush_tlb(); ++ ++ /* ++ * Note: if we somehow had PCID but not PGE, then this wouldn't work -- ++ * we'd end up flushing kernel translations for the current ASID but ++ * we might fail to flush kernel translations for other cached ASIDs. ++ * ++ * To avoid this issue, we force PCID off if PGE is off. ++ */ + } + + static inline void __flush_tlb_one(unsigned long addr) +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 81c8a53..91588be 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -324,6 +324,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) + } + } + ++static void setup_pcid(struct cpuinfo_x86 *c) ++{ ++ if (cpu_has(c, X86_FEATURE_PCID)) { ++ if (cpu_has(c, X86_FEATURE_PGE)) { ++ cr4_set_bits(X86_CR4_PCIDE); ++ } else { ++ /* ++ * flush_tlb_all(), as currently implemented, won't ++ * work if PCID is on but PGE is not. Since that ++ * combination doesn't exist on real hardware, there's ++ * no reason to try to fully support it, but it's ++ * polite to avoid corrupting data if we're on ++ * an improperly configured VM. ++ */ ++ clear_cpu_cap(c, X86_FEATURE_PCID); ++ } ++ } ++} ++ + /* + * Protection Keys are not available in 32-bit mode. + */ +@@ -1082,6 +1101,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) + setup_smep(c); + setup_smap(c); + ++ /* Set up PCID */ ++ setup_pcid(c); ++ + /* + * The vendor-specific functions might have changed features. + * Now we do "generic changes." +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index bdd8556..5226379 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -442,6 +442,12 @@ static void __init xen_init_cpuid_mask(void) + ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + ++ /* ++ * Xen PV would need some work to support PCID: CR3 handling as well ++ * as xen_flush_tlb_others() would need updating. ++ */ ++ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */ ++ + if (!xen_initial_domain()) + cpuid_leaf1_edx_mask &= + ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch new file mode 100644 index 00000000..07dd1bf0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch @@ -0,0 +1,109 @@ +From f34570e1f6c56f5557b9a3acd73fce47f5727479 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Mon, 22 May 2017 15:30:01 -0700 +Subject: [PATCH 03/14] x86/mm: Reimplement flush_tlb_page() using + flush_tlb_mm_range() + +commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream. + +flush_tlb_page() was very similar to flush_tlb_mm_range() except that +it had a couple of issues: + + - It was missing an smp_mb() in the case where + current->active_mm != mm. (This is a longstanding bug reported by Nadav Amit) + + - It was missing tracepoints and vm counter updates. + +The only reason that I can see for keeping it at as a separate +function is that it could avoid a few branches that +flush_tlb_mm_range() needs to decide to flush just one page. This +hardly seems worthwhile. If we decide we want to get rid of those +branches again, a better way would be to introduce an +__flush_tlb_mm_range() helper and make both flush_tlb_page() and +flush_tlb_mm_range() use it. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Acked-by: Kees Cook <keescook@chromium.org> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Borislav Petkov <bpetkov@suse.de> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Nadav Amit <nadav.amit@gmail.com> +Cc: Nadav Amit <namit@vmware.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tlbflush.h | 6 +++++- + arch/x86/mm/tlb.c | 27 --------------------------- + 2 files changed, 5 insertions(+), 28 deletions(-) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index db8952a..eb5b512 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -353,11 +353,15 @@ static inline void flush_tlb_kernel_range(unsigned long start, + flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) + + extern void flush_tlb_all(void); +-extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long vmflag); + extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); + ++static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) ++{ ++ flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); ++} ++ + void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long start, unsigned long end); +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 2f9d41f..6884228 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -387,33 +387,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + preempt_enable(); + } + +-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) +-{ +- struct mm_struct *mm = vma->vm_mm; +- +- preempt_disable(); +- +- if (current->active_mm == mm) { +- if (current->mm) { +- /* +- * Implicit full barrier (INVLPG) that synchronizes +- * with switch_mm. +- */ +- __flush_tlb_one(start); +- } else { +- leave_mm(smp_processor_id()); +- +- /* Synchronize with switch_mm. */ +- smp_mb(); +- } +- } +- +- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) +- flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); +- +- preempt_enable(); +-} +- + static void do_flush_tlb_all(void *info) + { + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-speculation-Update-Speculation-Control-microcode.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-speculation-Update-Speculation-Control-microcode.patch new file mode 100644 index 00000000..c78b3e80 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-speculation-Update-Speculation-Control-microcode.patch @@ -0,0 +1,69 @@ +From f01ffef1901eda027651aba518686d44ed9fccf3 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Sat, 10 Feb 2018 23:39:22 +0000 +Subject: [PATCH 03/12] x86/speculation: Update Speculation Control microcode + blacklist + +commit 1751342095f0d2b36fa8114d8e12c5688c455ac4 upstream. + +Intel have retroactively blessed the 0xc2 microcode on Skylake mobile +and desktop parts, and the Gemini Lake 0x22 microcode is apparently fine +too. We blacklisted the latter purely because it was present with all +the other problematic ones in the 2018-01-08 release, but now it's +explicitly listed as OK. + +We still list 0x84 for the various Kaby Lake / Coffee Lake parts, as +that appeared in one version of the blacklist and then reverted to +0x80 again. We can change it if 0x84 is actually announced to be safe. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: arjan.van.de.ven@intel.com +Cc: jmattson@google.com +Cc: karahmed@amazon.de +Cc: kvm@vger.kernel.org +Cc: pbonzini@redhat.com +Cc: rkrcmar@redhat.com +Cc: sironi@amazon.de +Link: http://lkml.kernel.org/r/1518305967-31356-2-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/intel.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 4097b43..e3b00ac 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -82,8 +82,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, +- { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, +- { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, +@@ -95,8 +93,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, +- /* Updated in the 20180108 release; blacklist until we know otherwise */ +- { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, + /* Observed in the wild */ + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch new file mode 100644 index 00000000..7b0132c6 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch @@ -0,0 +1,1025 @@ +From ebbc1312aa1e8495c5a920640ecd961251e136a8 Mon Sep 17 00:00:00 2001 +From: Richard Fellner <richard.fellner@student.tugraz.at> +Date: Thu, 4 May 2017 14:26:50 +0200 +Subject: [PATCH 004/102] KAISER: Kernel Address Isolation + +This patch introduces our implementation of KAISER (Kernel Address Isolation to +have Side-channels Efficiently Removed), a kernel isolation technique to close +hardware side channels on kernel address information. + +More information about the patch can be found on: + + https://github.com/IAIK/KAISER + +From: Richard Fellner <richard.fellner@student.tugraz.at> +From: Daniel Gruss <daniel.gruss@iaik.tugraz.at> +Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode +Date: Thu, 4 May 2017 14:26:50 +0200 +Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2 +Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5 + +To: <linux-kernel@vger.kernel.org> +To: <kernel-hardening@lists.openwall.com> +Cc: <clementine.maurice@iaik.tugraz.at> +Cc: <moritz.lipp@iaik.tugraz.at> +Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at> +Cc: Richard Fellner <richard.fellner@student.tugraz.at> +Cc: Ingo Molnar <mingo@kernel.org> +Cc: <kirill.shutemov@linux.intel.com> +Cc: <anders.fogh@gdata-adan.de> + +After several recent works [1,2,3] KASLR on x86_64 was basically +considered dead by many researchers. We have been working on an +efficient but effective fix for this problem and found that not mapping +the kernel space when running in user mode is the solution to this +problem [4] (the corresponding paper [5] will be presented at ESSoS17). + +With this RFC patch we allow anybody to configure their kernel with the +flag CONFIG_KAISER to add our defense mechanism. + +If there are any questions we would love to answer them. +We also appreciate any comments! + +Cheers, +Daniel (+ the KAISER team from Graz University of Technology) + +[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf +[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf +[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf +[4] https://github.com/IAIK/KAISER +[5] https://gruss.cc/files/kaiser.pdf + +[patch based also on +https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch] + +Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at> +Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at> +Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at> +Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at> +Acked-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 17 ++++ + arch/x86/entry/entry_64_compat.S | 7 +- + arch/x86/include/asm/hw_irq.h | 2 +- + arch/x86/include/asm/kaiser.h | 113 +++++++++++++++++++++++++ + arch/x86/include/asm/pgtable.h | 4 + + arch/x86/include/asm/pgtable_64.h | 21 +++++ + arch/x86/include/asm/pgtable_types.h | 12 ++- + arch/x86/include/asm/processor.h | 7 +- + arch/x86/kernel/cpu/common.c | 4 +- + arch/x86/kernel/espfix_64.c | 6 ++ + arch/x86/kernel/head_64.S | 16 +++- + arch/x86/kernel/irqinit.c | 2 +- + arch/x86/kernel/process.c | 2 +- + arch/x86/mm/Makefile | 2 +- + arch/x86/mm/kaiser.c | 160 +++++++++++++++++++++++++++++++++++ + arch/x86/mm/pageattr.c | 2 +- + arch/x86/mm/pgtable.c | 26 ++++++ + include/asm-generic/vmlinux.lds.h | 11 ++- + include/linux/percpu-defs.h | 30 +++++++ + init/main.c | 6 ++ + kernel/fork.c | 8 ++ + security/Kconfig | 7 ++ + 22 files changed, 449 insertions(+), 16 deletions(-) + create mode 100644 arch/x86/include/asm/kaiser.h + create mode 100644 arch/x86/mm/kaiser.c + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index ef766a3..6c880dc 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -36,6 +36,7 @@ + #include <asm/smap.h> + #include <asm/pgtable_types.h> + #include <asm/export.h> ++#include <asm/kaiser.h> + #include <linux/err.h> + + /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ +@@ -146,6 +147,7 @@ ENTRY(entry_SYSCALL_64) + * it is too small to ever cause noticeable irq latency. + */ + SWAPGS_UNSAFE_STACK ++ SWITCH_KERNEL_CR3_NO_STACK + /* + * A hypervisor implementation might want to use a label + * after the swapgs, so that it can do the swapgs +@@ -228,6 +230,7 @@ entry_SYSCALL_64_fastpath: + movq RIP(%rsp), %rcx + movq EFLAGS(%rsp), %r11 + RESTORE_C_REGS_EXCEPT_RCX_R11 ++ SWITCH_USER_CR3 + movq RSP(%rsp), %rsp + USERGS_SYSRET64 + +@@ -323,10 +326,12 @@ return_from_SYSCALL_64: + syscall_return_via_sysret: + /* rcx and r11 are already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_RCX_R11 ++ SWITCH_USER_CR3 + movq RSP(%rsp), %rsp + USERGS_SYSRET64 + + opportunistic_sysret_failed: ++ SWITCH_USER_CR3 + SWAPGS + jmp restore_c_regs_and_iret + END(entry_SYSCALL_64) +@@ -424,6 +429,7 @@ ENTRY(ret_from_fork) + movq %rsp, %rdi + call syscall_return_slowpath /* returns with IRQs disabled */ + TRACE_IRQS_ON /* user mode is traced as IRQS on */ ++ SWITCH_USER_CR3 + SWAPGS + jmp restore_regs_and_iret + +@@ -478,6 +484,7 @@ END(irq_entries_start) + * tracking that we're in kernel mode. + */ + SWAPGS ++ SWITCH_KERNEL_CR3 + + /* + * We need to tell lockdep that IRQs are off. We can't do this until +@@ -535,6 +542,7 @@ GLOBAL(retint_user) + mov %rsp,%rdi + call prepare_exit_to_usermode + TRACE_IRQS_IRETQ ++ SWITCH_USER_CR3 + SWAPGS + jmp restore_regs_and_iret + +@@ -612,6 +620,7 @@ native_irq_return_ldt: + + pushq %rdi /* Stash user RDI */ + SWAPGS ++ SWITCH_KERNEL_CR3 + movq PER_CPU_VAR(espfix_waddr), %rdi + movq %rax, (0*8)(%rdi) /* user RAX */ + movq (1*8)(%rsp), %rax /* user RIP */ +@@ -638,6 +647,7 @@ native_irq_return_ldt: + * still points to an RO alias of the ESPFIX stack. + */ + orq PER_CPU_VAR(espfix_stack), %rax ++ SWITCH_USER_CR3 + SWAPGS + movq %rax, %rsp + +@@ -1034,6 +1044,7 @@ ENTRY(paranoid_entry) + testl %edx, %edx + js 1f /* negative -> in kernel */ + SWAPGS ++ SWITCH_KERNEL_CR3 + xorl %ebx, %ebx + 1: ret + END(paranoid_entry) +@@ -1056,6 +1067,7 @@ ENTRY(paranoid_exit) + testl %ebx, %ebx /* swapgs needed? */ + jnz paranoid_exit_no_swapgs + TRACE_IRQS_IRETQ ++ SWITCH_USER_CR3_NO_STACK + SWAPGS_UNSAFE_STACK + jmp paranoid_exit_restore + paranoid_exit_no_swapgs: +@@ -1084,6 +1096,7 @@ ENTRY(error_entry) + * from user mode due to an IRET fault. + */ + SWAPGS ++ SWITCH_KERNEL_CR3 + + .Lerror_entry_from_usermode_after_swapgs: + /* +@@ -1135,6 +1148,7 @@ ENTRY(error_entry) + * Switch to kernel gsbase: + */ + SWAPGS ++ SWITCH_KERNEL_CR3 + + /* + * Pretend that the exception came from user mode: set up pt_regs +@@ -1233,6 +1247,7 @@ ENTRY(nmi) + */ + + SWAPGS_UNSAFE_STACK ++ SWITCH_KERNEL_CR3_NO_STACK + cld + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp +@@ -1273,6 +1288,7 @@ ENTRY(nmi) + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ ++ SWITCH_USER_CR3 + SWAPGS + jmp restore_c_regs_and_iret + +@@ -1484,6 +1500,7 @@ end_repeat_nmi: + testl %ebx, %ebx /* swapgs needed? */ + jnz nmi_restore + nmi_swapgs: ++ SWITCH_USER_CR3_NO_STACK + SWAPGS_UNSAFE_STACK + nmi_restore: + RESTORE_EXTRA_REGS +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index e1721da..f0e384e 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -13,6 +13,7 @@ + #include <asm/irqflags.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/kaiser.h> + #include <linux/linkage.h> + #include <linux/err.h> + +@@ -48,6 +49,7 @@ + ENTRY(entry_SYSENTER_compat) + /* Interrupts are off on entry. */ + SWAPGS_UNSAFE_STACK ++ SWITCH_KERNEL_CR3_NO_STACK + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + /* +@@ -184,6 +186,7 @@ ENDPROC(entry_SYSENTER_compat) + ENTRY(entry_SYSCALL_compat) + /* Interrupts are off on entry. */ + SWAPGS_UNSAFE_STACK ++ SWITCH_KERNEL_CR3_NO_STACK + + /* Stash user ESP and switch to the kernel stack. */ + movl %esp, %r8d +@@ -259,6 +262,7 @@ sysret32_from_system_call: + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r10 ++ SWITCH_USER_CR3 + movq RSP-ORIG_RAX(%rsp), %rsp + swapgs + sysretl +@@ -297,7 +301,7 @@ ENTRY(entry_INT80_compat) + PARAVIRT_ADJUST_EXCEPTION_FRAME + ASM_CLAC /* Do this early to minimize exposure */ + SWAPGS +- ++ SWITCH_KERNEL_CR3_NO_STACK + /* + * User tracing code (ptrace or signal handlers) might assume that + * the saved RAX contains a 32-bit number when we're invoking a 32-bit +@@ -338,6 +342,7 @@ ENTRY(entry_INT80_compat) + + /* Go back to user mode. */ + TRACE_IRQS_ON ++ SWITCH_USER_CR3_NO_STACK + SWAPGS + jmp restore_regs_and_iret + END(entry_INT80_compat) +diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h +index b90e105..0817d63 100644 +--- a/arch/x86/include/asm/hw_irq.h ++++ b/arch/x86/include/asm/hw_irq.h +@@ -178,7 +178,7 @@ extern char irq_entries_start[]; + #define VECTOR_RETRIGGERED ((void *)~0UL) + + typedef struct irq_desc* vector_irq_t[NR_VECTORS]; +-DECLARE_PER_CPU(vector_irq_t, vector_irq); ++DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq); + + #endif /* !ASSEMBLY_ */ + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +new file mode 100644 +index 0000000..63ee830 +--- /dev/null ++++ b/arch/x86/include/asm/kaiser.h +@@ -0,0 +1,113 @@ ++#ifndef _ASM_X86_KAISER_H ++#define _ASM_X86_KAISER_H ++ ++/* This file includes the definitions for the KAISER feature. ++ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory. ++ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped, ++ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled, ++ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled. ++ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory. ++ * ++ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions ++ * of the user space, or the stacks. ++ */ ++#ifdef __ASSEMBLY__ ++#ifdef CONFIG_KAISER ++ ++.macro _SWITCH_TO_KERNEL_CR3 reg ++movq %cr3, \reg ++andq $(~0x1000), \reg ++movq \reg, %cr3 ++.endm ++ ++.macro _SWITCH_TO_USER_CR3 reg ++movq %cr3, \reg ++orq $(0x1000), \reg ++movq \reg, %cr3 ++.endm ++ ++.macro SWITCH_KERNEL_CR3 ++pushq %rax ++_SWITCH_TO_KERNEL_CR3 %rax ++popq %rax ++.endm ++ ++.macro SWITCH_USER_CR3 ++pushq %rax ++_SWITCH_TO_USER_CR3 %rax ++popq %rax ++.endm ++ ++.macro SWITCH_KERNEL_CR3_NO_STACK ++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) ++_SWITCH_TO_KERNEL_CR3 %rax ++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax ++.endm ++ ++ ++.macro SWITCH_USER_CR3_NO_STACK ++ ++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) ++_SWITCH_TO_USER_CR3 %rax ++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax ++ ++.endm ++ ++#else /* CONFIG_KAISER */ ++ ++.macro SWITCH_KERNEL_CR3 reg ++.endm ++.macro SWITCH_USER_CR3 reg ++.endm ++.macro SWITCH_USER_CR3_NO_STACK ++.endm ++.macro SWITCH_KERNEL_CR3_NO_STACK ++.endm ++ ++#endif /* CONFIG_KAISER */ ++#else /* __ASSEMBLY__ */ ++ ++ ++#ifdef CONFIG_KAISER ++// Upon kernel/user mode switch, it may happen that ++// the address space has to be switched before the registers have been stored. ++// To change the address space, another register is needed. ++// A register therefore has to be stored/restored. ++// ++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); ++ ++#endif /* CONFIG_KAISER */ ++ ++/** ++ * shadowmem_add_mapping - map a virtual memory part to the shadow mapping ++ * @addr: the start address of the range ++ * @size: the size of the range ++ * @flags: The mapping flags of the pages ++ * ++ * the mapping is done on a global scope, so no bigger synchronization has to be done. ++ * the pages have to be manually unmapped again when they are not needed any longer. ++ */ ++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); ++ ++ ++/** ++ * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping ++ * @addr: the start address of the range ++ * @size: the size of the range ++ */ ++extern void kaiser_remove_mapping(unsigned long start, unsigned long size); ++ ++/** ++ * shadowmem_initialize_mapping - Initalize the shadow mapping ++ * ++ * most parts of the shadow mapping can be mapped upon boot time. ++ * only the thread stacks have to be mapped on runtime. ++ * the mapped regions are not unmapped at all. ++ */ ++extern void kaiser_init(void); ++ ++#endif ++ ++ ++ ++#endif /* _ASM_X86_KAISER_H */ +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 437feb4..4b479c9 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -904,6 +904,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, + static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) + { + memcpy(dst, src, count * sizeof(pgd_t)); ++#ifdef CONFIG_KAISER ++ // clone the shadow pgd part as well ++ memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t)); ++#endif + } + + #define PTE_SHIFT ilog2(PTRS_PER_PTE) +diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h +index 1cc82ec..e6ea39f 100644 +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_t *pud) + native_set_pud(pud, native_make_pud(0)); + } + ++#ifdef CONFIG_KAISER ++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) { ++ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); ++} ++ ++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) { ++ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); ++} ++#endif /* CONFIG_KAISER */ ++ + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) + { ++#ifdef CONFIG_KAISER ++ // We know that a pgd is page aligned. ++ // Therefore the lower indices have to be mapped to user space. ++ // These pages are mapped to the shadow mapping. ++ if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) { ++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; ++ } ++ ++ pgdp->pgd = pgd.pgd & ~_PAGE_USER; ++#else /* CONFIG_KAISER */ + *pgdp = pgd; ++#endif + } + + static inline void native_pgd_clear(pgd_t *pgd) +diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h +index 8b4de22..00fecbb 100644 +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -45,7 +45,11 @@ + #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) + #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) + #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) ++#ifdef CONFIG_KAISER ++#define _PAGE_GLOBAL (_AT(pteval_t, 0)) ++#else ++#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) ++#endif + #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) + #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) + #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +@@ -119,7 +123,11 @@ + #define _PAGE_DEVMAP (_AT(pteval_t, 0)) + #endif + +-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) ++#ifdef CONFIG_KAISER ++#define _PAGE_PROTNONE (_AT(pteval_t, 0)) ++#else ++#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) ++#endif + + #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_DIRTY) +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index 83db0ea..3d4784e2 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -308,7 +308,7 @@ struct tss_struct { + + } ____cacheline_aligned; + +-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); ++DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss); + + #ifdef CONFIG_X86_32 + DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); +@@ -335,6 +335,11 @@ union irq_stack_union { + char gs_base[40]; + unsigned long stack_canary; + }; ++ ++ struct { ++ char irq_stack_pointer[64]; ++ char unused[IRQ_STACK_SIZE - 64]; ++ }; + }; + + DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 91588be..3efde13 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -93,7 +93,7 @@ static const struct cpu_dev default_cpu = { + + static const struct cpu_dev *this_cpu = &default_cpu; + +-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { ++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = { + #ifdef CONFIG_X86_64 + /* + * We need valid kernel segments for data and code in long mode too +@@ -1365,7 +1365,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { + [DEBUG_STACK - 1] = DEBUG_STKSZ + }; + +-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks ++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); + + /* May not be marked __init: used by software suspend */ +diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c +index 04f89ca..9ff875a 100644 +--- a/arch/x86/kernel/espfix_64.c ++++ b/arch/x86/kernel/espfix_64.c +@@ -41,6 +41,7 @@ + #include <asm/pgalloc.h> + #include <asm/setup.h> + #include <asm/espfix.h> ++#include <asm/kaiser.h> + + /* + * Note: we only need 6*8 = 48 bytes for the espfix stack, but round +@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void) + /* Install the espfix pud into the kernel page directory */ + pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); ++#ifdef CONFIG_KAISER ++ // add the esp stack pud to the shadow mapping here. ++ // This can be done directly, because the fixup stack has its own pud ++ set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page))); ++#endif + + /* Randomize the locations */ + init_espfix_random(); +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index b4421cc..9e849b5 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -405,6 +405,14 @@ GLOBAL(early_recursion_flag) + .balign PAGE_SIZE; \ + GLOBAL(name) + ++#ifdef CONFIG_KAISER ++#define NEXT_PGD_PAGE(name) \ ++ .balign 2 * PAGE_SIZE; \ ++GLOBAL(name) ++#else ++#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) ++#endif ++ + /* Automate the creation of 1 to 1 mapping pmd entries */ + #define PMDS(START, PERM, COUNT) \ + i = 0 ; \ +@@ -414,7 +422,7 @@ GLOBAL(name) + .endr + + __INITDATA +-NEXT_PAGE(early_level4_pgt) ++NEXT_PGD_PAGE(early_level4_pgt) + .fill 511,8,0 + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + +@@ -424,10 +432,10 @@ NEXT_PAGE(early_dynamic_pgts) + .data + + #ifndef CONFIG_XEN +-NEXT_PAGE(init_level4_pgt) +- .fill 512,8,0 ++NEXT_PGD_PAGE(init_level4_pgt) ++ .fill 2*512,8,0 + #else +-NEXT_PAGE(init_level4_pgt) ++NEXT_PGD_PAGE(init_level4_pgt) + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE +diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c +index 1423ab1..f480b38 100644 +--- a/arch/x86/kernel/irqinit.c ++++ b/arch/x86/kernel/irqinit.c +@@ -51,7 +51,7 @@ static struct irqaction irq2 = { + .flags = IRQF_NO_THREAD, + }; + +-DEFINE_PER_CPU(vector_irq_t, vector_irq) = { ++DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = { + [0 ... NR_VECTORS - 1] = VECTOR_UNUSED, + }; + +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 8e10e72..a55b320 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -41,7 +41,7 @@ + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { ++__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = { + .x86_tss = { + .sp0 = TOP_OF_INIT_STACK, + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile +index 96d2b84..682c162 100644 +--- a/arch/x86/mm/Makefile ++++ b/arch/x86/mm/Makefile +@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o + obj-$(CONFIG_X86_INTEL_MPX) += mpx.o + obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o + obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +- ++obj-$(CONFIG_KAISER) += kaiser.o +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +new file mode 100644 +index 0000000..cf1bb92 +--- /dev/null ++++ b/arch/x86/mm/kaiser.c +@@ -0,0 +1,160 @@ ++ ++ ++#include <linux/kernel.h> ++#include <linux/errno.h> ++#include <linux/string.h> ++#include <linux/types.h> ++#include <linux/bug.h> ++#include <linux/init.h> ++#include <linux/spinlock.h> ++#include <linux/mm.h> ++ ++#include <linux/uaccess.h> ++#include <asm/pgtable.h> ++#include <asm/pgalloc.h> ++#include <asm/desc.h> ++#ifdef CONFIG_KAISER ++ ++__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); ++ ++/** ++ * Get the real ppn from a address in kernel mapping. ++ * @param address The virtual adrress ++ * @return the physical address ++ */ ++static inline unsigned long get_pa_from_mapping (unsigned long address) ++{ ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ ++ pgd = pgd_offset_k(address); ++ BUG_ON(pgd_none(*pgd) || pgd_large(*pgd)); ++ ++ pud = pud_offset(pgd, address); ++ BUG_ON(pud_none(*pud)); ++ ++ if (pud_large(*pud)) { ++ return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK); ++ } ++ ++ pmd = pmd_offset(pud, address); ++ BUG_ON(pmd_none(*pmd)); ++ ++ if (pmd_large(*pmd)) { ++ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK); ++ } ++ ++ pte = pte_offset_kernel(pmd, address); ++ BUG_ON(pte_none(*pte)); ++ ++ return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK); ++} ++ ++void _kaiser_copy (unsigned long start_addr, unsigned long size, ++ unsigned long flags) ++{ ++ pgd_t *pgd; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ unsigned long address; ++ unsigned long end_addr = start_addr + size; ++ unsigned long target_address; ++ ++ for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1)); ++ address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) { ++ target_address = get_pa_from_mapping(address); ++ ++ pgd = native_get_shadow_pgd(pgd_offset_k(address)); ++ ++ BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n"); ++ BUG_ON(pgd_large(*pgd)); ++ ++ pud = pud_offset(pgd, address); ++ if (pud_none(*pud)) { ++ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address)))); ++ } ++ BUG_ON(pud_large(*pud)); ++ ++ pmd = pmd_offset(pud, address); ++ if (pmd_none(*pmd)) { ++ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address)))); ++ } ++ BUG_ON(pmd_large(*pmd)); ++ ++ pte = pte_offset_kernel(pmd, address); ++ if (pte_none(*pte)) { ++ set_pte(pte, __pte(flags | target_address)); ++ } else { ++ BUG_ON(__pa(pte_page(*pte)) != target_address); ++ } ++ } ++} ++ ++// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping ++static inline void __init _kaiser_init(void) ++{ ++ pgd_t *pgd; ++ int i = 0; ++ ++ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); ++ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { ++ set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0)))); ++ } ++} ++ ++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; ++spinlock_t shadow_table_lock; ++void __init kaiser_init(void) ++{ ++ int cpu; ++ spin_lock_init(&shadow_table_lock); ++ ++ spin_lock(&shadow_table_lock); ++ ++ _kaiser_init(); ++ ++ for_each_possible_cpu(cpu) { ++ // map the per cpu user variables ++ _kaiser_copy( ++ (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)), ++ (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start, ++ __PAGE_KERNEL); ++ } ++ ++ // map the entry/exit text section, which is responsible to switch between user- and kernel mode ++ _kaiser_copy( ++ (unsigned long) __entry_text_start, ++ (unsigned long) __entry_text_end - (unsigned long) __entry_text_start, ++ __PAGE_KERNEL_RX); ++ ++ // the fixed map address of the idt_table ++ _kaiser_copy( ++ (unsigned long) idt_descr.address, ++ sizeof(gate_desc) * NR_VECTORS, ++ __PAGE_KERNEL_RO); ++ ++ spin_unlock(&shadow_table_lock); ++} ++ ++// add a mapping to the shadow-mapping, and synchronize the mappings ++void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) ++{ ++ spin_lock(&shadow_table_lock); ++ _kaiser_copy(addr, size, flags); ++ spin_unlock(&shadow_table_lock); ++} ++ ++extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end); ++void kaiser_remove_mapping(unsigned long start, unsigned long size) ++{ ++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start)); ++ spin_lock(&shadow_table_lock); ++ do { ++ unmap_pud_range(pgd, start, start + size); ++ } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size))); ++ spin_unlock(&shadow_table_lock); ++} ++#endif /* CONFIG_KAISER */ +diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c +index e3353c9..c17412f 100644 +--- a/arch/x86/mm/pageattr.c ++++ b/arch/x86/mm/pageattr.c +@@ -823,7 +823,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) + pud_clear(pud); + } + +-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) ++void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) + { + pud_t *pud = pud_offset(pgd, start); + +diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c +index 3feec5a..27d218b 100644 +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -346,12 +346,38 @@ static inline void _pgd_free(pgd_t *pgd) + #else + static inline pgd_t *_pgd_alloc(void) + { ++#ifdef CONFIG_KAISER ++ // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory ++ // block. Therefore, we have to allocate at least 3 pages. However, the ++ // __get_free_pages returns us 4 pages. Hence, we store the base pointer at ++ // the beginning of the page of our 8kb-aligned memory block in order to ++ // correctly free it afterwars. ++ ++ unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE)); ++ ++ if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages) ++ { ++ *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages; ++ return (pgd_t *) pages; ++ } ++ else ++ { ++ *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages; ++ return (pgd_t *) (pages + PAGE_SIZE); ++ } ++#else + return (pgd_t *)__get_free_page(PGALLOC_GFP); ++#endif + } + + static inline void _pgd_free(pgd_t *pgd) + { ++#ifdef CONFIG_KAISER ++ unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE)); ++ free_pages(pages, get_order(4*PAGE_SIZE)); ++#else + free_page((unsigned long)pgd); ++#endif + } + #endif /* CONFIG_X86_PAE */ + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 31e1d63..0b16b5d 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -764,7 +764,16 @@ + */ + #define PERCPU_INPUT(cacheline) \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ +- *(.data..percpu..first) \ ++ \ ++ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ ++ *(.data..percpu..first) \ ++ . = ALIGN(cacheline); \ ++ *(.data..percpu..user_mapped) \ ++ *(.data..percpu..user_mapped..shared_aligned) \ ++ . = ALIGN(PAGE_SIZE); \ ++ *(.data..percpu..user_mapped..page_aligned) \ ++ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ ++ \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..page_aligned) \ + . = ALIGN(cacheline); \ +diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h +index 8f16299..8ea945f 100644 +--- a/include/linux/percpu-defs.h ++++ b/include/linux/percpu-defs.h +@@ -35,6 +35,12 @@ + + #endif + ++#ifdef CONFIG_KAISER ++#define USER_MAPPED_SECTION "..user_mapped" ++#else ++#define USER_MAPPED_SECTION "" ++#endif ++ + /* + * Base implementations of per-CPU variable declarations and definitions, where + * the section in which the variable is to be placed is provided by the +@@ -115,6 +121,12 @@ + #define DEFINE_PER_CPU(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "") + ++#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ ++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) ++ ++#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ ++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) ++ + /* + * Declaration/definition used for per-CPU variables that must come first in + * the set of variables. +@@ -144,6 +156,14 @@ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + ++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ ++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ ++ ____cacheline_aligned_in_smp ++ ++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ ++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ ++ ____cacheline_aligned_in_smp ++ + #define DECLARE_PER_CPU_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \ + ____cacheline_aligned +@@ -162,6 +182,16 @@ + #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(PAGE_SIZE) ++/* ++ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. ++ */ ++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ ++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ ++ __aligned(PAGE_SIZE) ++ ++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ ++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ ++ __aligned(PAGE_SIZE) + + /* + * Declaration/definition used for per-CPU variables that must be read mostly. +diff --git a/init/main.c b/init/main.c +index f23b7fa..d2c8c23 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -87,6 +87,9 @@ + #include <asm/setup.h> + #include <asm/sections.h> + #include <asm/cacheflush.h> ++#ifdef CONFIG_KAISER ++#include <asm/kaiser.h> ++#endif + + static int kernel_init(void *); + +@@ -474,6 +477,9 @@ static void __init mm_init(void) + pgtable_init(); + vmalloc_init(); + ioremap_huge_init(); ++#ifdef CONFIG_KAISER ++ kaiser_init(); ++#endif + } + + asmlinkage __visible void __init start_kernel(void) +diff --git a/kernel/fork.c b/kernel/fork.c +index fc76aff..d34394e 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -211,8 +211,12 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) + #endif + } + ++extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size); + static inline void free_thread_stack(struct task_struct *tsk) + { ++#ifdef CONFIG_KAISER ++ kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE); ++#endif + #ifdef CONFIG_VMAP_STACK + if (task_stack_vm_area(tsk)) { + unsigned long flags; +@@ -468,6 +472,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) + *stackend = STACK_END_MAGIC; /* for overflow detection */ + } + ++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); + static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + { + struct task_struct *tsk; +@@ -495,6 +500,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + * functions again. + */ + tsk->stack = stack; ++#ifdef CONFIG_KAISER ++ kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); ++#endif + #ifdef CONFIG_VMAP_STACK + tsk->stack_vm_area = stack_vm_area; + #endif +diff --git a/security/Kconfig b/security/Kconfig +index 118f454..f515ac3 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -30,6 +30,13 @@ config SECURITY + model will be used. + + If you are unsure how to answer this question, answer N. ++config KAISER ++ bool "Remove the kernel mapping in user mode" ++ depends on X86_64 ++ depends on !PARAVIRT ++ help ++ This enforces a strict kernel and user space isolation in order to close ++ hardware side channels on kernel address information. + + config SECURITYFS + bool "Enable the securityfs filesystem" +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch new file mode 100644 index 00000000..990cb048 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch @@ -0,0 +1,87 @@ +From 06424642a3712e54821ac22bba000779c0004faa Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Thu, 28 Sep 2017 16:58:26 -0500 +Subject: [PATCH 04/42] x86/asm: Fix inline asm call constraints for GCC 4.4 + +commit 520a13c530aeb5f63e011d668c42db1af19ed349 upstream. + +The kernel test bot (run by Xiaolong Ye) reported that the following commit: + + f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") + +is causing double faults in a kernel compiled with GCC 4.4. + +Linus subsequently diagnosed the crash pattern and the buggy commit and found that +the issue is with this code: + + register unsigned int __asm_call_sp asm("esp"); + #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) + +Even on a 64-bit kernel, it's using ESP instead of RSP. That causes GCC +to produce the following bogus code: + + ffffffff8147461d: 89 e0 mov %esp,%eax + ffffffff8147461f: 4c 89 f7 mov %r14,%rdi + ffffffff81474622: 4c 89 fe mov %r15,%rsi + ffffffff81474625: ba 20 00 00 00 mov $0x20,%edx + ffffffff8147462a: 89 c4 mov %eax,%esp + ffffffff8147462c: e8 bf 52 05 00 callq ffffffff814c98f0 <copy_user_generic_unrolled> + +Despite the absurdity of it backing up and restoring the stack pointer +for no reason, the bug is actually the fact that it's only backing up +and restoring the lower 32 bits of the stack pointer. The upper 32 bits +are getting cleared out, corrupting the stack pointer. + +So change the '__asm_call_sp' register variable to be associated with +the actual full-size stack pointer. + +This also requires changing the __ASM_SEL() macro to be based on the +actual compiled arch size, rather than the CONFIG value, because +CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso). +Otherwise Clang fails to build the kernel because it complains about the +use of a 64-bit register (RSP) in a 32-bit file. + +Reported-and-Bisected-and-Tested-by: kernel test robot <xiaolong.ye@intel.com> +Diagnosed-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Alexander Potapenko <glider@google.com> +Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arnd Bergmann <arnd@arndb.de> +Cc: Dmitriy Vyukov <dvyukov@google.com> +Cc: LKP <lkp@01.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Matthias Kaehlcke <mka@chromium.org> +Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") +Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Matthias Kaehlcke <mka@chromium.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/asm.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h +index 0052352..7bb29a4 100644 +--- a/arch/x86/include/asm/asm.h ++++ b/arch/x86/include/asm/asm.h +@@ -11,10 +11,12 @@ + # define __ASM_FORM_COMMA(x) " " #x "," + #endif + +-#ifdef CONFIG_X86_32 ++#ifndef __x86_64__ ++/* 32 bit */ + # define __ASM_SEL(a,b) __ASM_FORM(a) + # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) + #else ++/* 64 bit */ + # define __ASM_SEL(a,b) __ASM_FORM(b) + # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch new file mode 100644 index 00000000..24b7bdc8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch @@ -0,0 +1,314 @@ +From e55eb19b04f78aa3343a6eae99fd557f613ccd99 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Sun, 28 May 2017 10:00:14 -0700 +Subject: [PATCH 04/14] x86/mm: Remove the UP asm/tlbflush.h code, always use + the (formerly) SMP code + +commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream. + +The UP asm/tlbflush.h generates somewhat nicer code than the SMP version. +Aside from that, it's fallen quite a bit behind the SMP code: + + - flush_tlb_mm_range() didn't flush individual pages if the range + was small. + + - The lazy TLB code was much weaker. This usually wouldn't matter, + but, if a kernel thread flushed its lazy "active_mm" more than + once (due to reclaim or similar), it wouldn't be unlazied and + would instead pointlessly flush repeatedly. + + - Tracepoints were missing. + +Aside from that, simply having the UP code around was a maintanence +burden, since it means that any change to the TLB flush code had to +make sure not to break it. + +Simplify everything by deleting the UP code. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bpetkov@suse.de> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Nadav Amit <nadav.amit@gmail.com> +Cc: Nadav Amit <namit@vmware.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-mm@kvack.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/Kconfig | 2 +- + arch/x86/include/asm/hardirq.h | 2 +- + arch/x86/include/asm/mmu.h | 6 --- + arch/x86/include/asm/mmu_context.h | 2 - + arch/x86/include/asm/tlbflush.h | 78 +------------------------------------- + arch/x86/mm/init.c | 2 - + arch/x86/mm/tlb.c | 17 +-------- + 7 files changed, 5 insertions(+), 104 deletions(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 7132252..f0bcf23 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -45,7 +45,7 @@ config X86 + select ARCH_USE_CMPXCHG_LOCKREF if X86_64 + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS +- select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP ++ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + select ARCH_WANTS_DYNAMIC_TASK_STRUCT + select ARCH_WANT_FRAME_POINTERS + select ARCH_WANT_IPC_PARSE_VERSION if X86_32 +diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h +index 59405a2..9b76cd3 100644 +--- a/arch/x86/include/asm/hardirq.h ++++ b/arch/x86/include/asm/hardirq.h +@@ -22,8 +22,8 @@ typedef struct { + #ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; +- unsigned int irq_tlb_count; + #endif ++ unsigned int irq_tlb_count; + #ifdef CONFIG_X86_THERMAL_VECTOR + unsigned int irq_thermal_count; + #endif +diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h +index 72198c6..8b272a0 100644 +--- a/arch/x86/include/asm/mmu.h ++++ b/arch/x86/include/asm/mmu.h +@@ -33,12 +33,6 @@ typedef struct { + #endif + } mm_context_t; + +-#ifdef CONFIG_SMP + void leave_mm(int cpu); +-#else +-static inline void leave_mm(int cpu) +-{ +-} +-#endif + + #endif /* _ASM_X86_MMU_H */ +diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h +index 8e0a9fe..762d6c6 100644 +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) + + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) + { +-#ifdef CONFIG_SMP + if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); +-#endif + } + + static inline int init_new_context(struct task_struct *tsk, +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index eb5b512..94146f6 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -7,6 +7,7 @@ + #include <asm/processor.h> + #include <asm/cpufeature.h> + #include <asm/special_insns.h> ++#include <asm/smp.h> + + static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +@@ -65,10 +66,8 @@ static inline void invpcid_flush_all_nonglobals(void) + #endif + + struct tlb_state { +-#ifdef CONFIG_SMP + struct mm_struct *active_mm; + int state; +-#endif + + /* + * Access to this CR4 shadow and to H/W CR4 is protected by +@@ -272,79 +271,6 @@ static inline void __flush_tlb_one(unsigned long addr) + * and page-granular flushes are available only on i486 and up. + */ + +-#ifndef CONFIG_SMP +- +-/* "_up" is for UniProcessor. +- * +- * This is a helper for other header functions. *Not* intended to be called +- * directly. All global TLB flushes need to either call this, or to bump the +- * vm statistics themselves. +- */ +-static inline void __flush_tlb_up(void) +-{ +- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); +- __flush_tlb(); +-} +- +-static inline void flush_tlb_all(void) +-{ +- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); +- __flush_tlb_all(); +-} +- +-static inline void local_flush_tlb(void) +-{ +- __flush_tlb_up(); +-} +- +-static inline void flush_tlb_mm(struct mm_struct *mm) +-{ +- if (mm == current->active_mm) +- __flush_tlb_up(); +-} +- +-static inline void flush_tlb_page(struct vm_area_struct *vma, +- unsigned long addr) +-{ +- if (vma->vm_mm == current->active_mm) +- __flush_tlb_one(addr); +-} +- +-static inline void flush_tlb_range(struct vm_area_struct *vma, +- unsigned long start, unsigned long end) +-{ +- if (vma->vm_mm == current->active_mm) +- __flush_tlb_up(); +-} +- +-static inline void flush_tlb_mm_range(struct mm_struct *mm, +- unsigned long start, unsigned long end, unsigned long vmflag) +-{ +- if (mm == current->active_mm) +- __flush_tlb_up(); +-} +- +-static inline void native_flush_tlb_others(const struct cpumask *cpumask, +- struct mm_struct *mm, +- unsigned long start, +- unsigned long end) +-{ +-} +- +-static inline void reset_lazy_tlbstate(void) +-{ +-} +- +-static inline void flush_tlb_kernel_range(unsigned long start, +- unsigned long end) +-{ +- flush_tlb_all(); +-} +- +-#else /* SMP */ +- +-#include <asm/smp.h> +- + #define local_flush_tlb() __flush_tlb() + + #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) +@@ -375,8 +301,6 @@ static inline void reset_lazy_tlbstate(void) + this_cpu_write(cpu_tlbstate.active_mm, &init_mm); + } + +-#endif /* SMP */ +- + #ifndef CONFIG_PARAVIRT + #define flush_tlb_others(mask, mm, start, end) \ + native_flush_tlb_others(mask, mm, start, end) +diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c +index 05a9855..a5e79b4 100644 +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -745,10 +745,8 @@ void __init zone_sizes_init(void) + } + + DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { +-#ifdef CONFIG_SMP + .active_mm = &init_mm, + .state = 0, +-#endif + .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ + }; + EXPORT_SYMBOL_GPL(cpu_tlbstate); +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 6884228..613d07e 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -16,7 +16,7 @@ + #include <asm/kaiser.h> + + /* +- * Smarter SMP flushing macros. ++ * TLB flushing, formerly SMP-only + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about +@@ -29,8 +29,6 @@ + * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi + */ + +-#ifdef CONFIG_SMP +- + struct flush_tlb_info { + struct mm_struct *flush_mm; + unsigned long flush_start; +@@ -90,8 +88,6 @@ void leave_mm(int cpu) + } + EXPORT_SYMBOL_GPL(leave_mm); + +-#endif /* CONFIG_SMP */ +- + void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) + { +@@ -122,10 +118,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + set_pgd(pgd, init_mm.pgd[stack_pgd_index]); + } + +-#ifdef CONFIG_SMP + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); + this_cpu_write(cpu_tlbstate.active_mm, next); +-#endif + + cpumask_set_cpu(cpu, mm_cpumask(next)); + +@@ -183,9 +177,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + if (unlikely(prev->context.ldt != next->context.ldt)) + load_mm_ldt(next); + #endif +- } +-#ifdef CONFIG_SMP +- else { ++ } else { + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); + +@@ -212,11 +204,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + load_mm_ldt(next); + } + } +-#endif + } + +-#ifdef CONFIG_SMP +- + /* + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] +@@ -471,5 +460,3 @@ static int __init create_tlb_single_page_flush_ceiling(void) + return 0; + } + late_initcall(create_tlb_single_page_flush_ceiling); +- +-#endif /* CONFIG_SMP */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-speculation-Correct-Speculation-Control-microcod.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-speculation-Correct-Speculation-Control-microcod.patch new file mode 100644 index 00000000..20c32ab8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-speculation-Correct-Speculation-Control-microcod.patch @@ -0,0 +1,78 @@ +From d0ed9c041b4312a7245912bee08d0c6e7631c9a1 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Mon, 12 Feb 2018 15:27:34 +0000 +Subject: [PATCH 04/12] x86/speculation: Correct Speculation Control microcode + blacklist again + +commit d37fc6d360a404b208547ba112e7dabb6533c7fc upstream. + +Arjan points out that the Intel document only clears the 0xc2 microcode +on *some* parts with CPUID 506E3 (INTEL_FAM6_SKYLAKE_DESKTOP stepping 3). +For the Skylake H/S platform it's OK but for Skylake E3 which has the +same CPUID it isn't (yet) cleared. + +So removing it from the blacklist was premature. Put it back for now. + +Also, Arjan assures me that the 0x84 microcode for Kaby Lake which was +featured in one of the early revisions of the Intel document was never +released to the public, and won't be until/unless it is also validated +as safe. So those can change to 0x80 which is what all *other* versions +of the doc have identified. + +Once the retrospective testing of existing public microcodes is done, we +should be back into a mode where new microcodes are only released in +batches and we shouldn't even need to update the blacklist for those +anyway, so this tweaking of the list isn't expected to be a thing which +keeps happening. + +Requested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: arjan.van.de.ven@intel.com +Cc: dave.hansen@intel.com +Cc: kvm@vger.kernel.org +Cc: pbonzini@redhat.com +Link: http://lkml.kernel.org/r/1518449255-2182-1-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/intel.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index e3b00ac..02cb2e3 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -75,13 +75,14 @@ struct sku_microcode { + u32 microcode; + }; + static const struct sku_microcode spectre_bad_microcodes[] = { +- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, +- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, +- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, +- { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, +- { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, ++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, ++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, ++ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kaiser-merged-update.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kaiser-merged-update.patch new file mode 100644 index 00000000..52bf5963 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kaiser-merged-update.patch @@ -0,0 +1,1327 @@ +From 48523e23d22e5a66009d404caca4721b84cde67a Mon Sep 17 00:00:00 2001 +From: Dave Hansen <dave.hansen@linux.intel.com> +Date: Wed, 30 Aug 2017 16:23:00 -0700 +Subject: [PATCH 005/102] kaiser: merged update + +Merged fixes and cleanups, rebased to 4.9.51 tree (no 5-level paging). + +Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 105 ++++++++++-- + arch/x86/include/asm/kaiser.h | 43 +++-- + arch/x86/include/asm/pgtable.h | 18 +- + arch/x86/include/asm/pgtable_64.h | 48 +++++- + arch/x86/include/asm/pgtable_types.h | 6 +- + arch/x86/kernel/espfix_64.c | 13 +- + arch/x86/kernel/head_64.S | 19 ++- + arch/x86/kernel/ldt.c | 27 ++- + arch/x86/kernel/tracepoint.c | 2 + + arch/x86/mm/kaiser.c | 313 +++++++++++++++++++++++++---------- + arch/x86/mm/pageattr.c | 63 +++++-- + arch/x86/mm/pgtable.c | 40 ++--- + include/linux/kaiser.h | 26 +++ + kernel/fork.c | 9 +- + security/Kconfig | 5 + + 15 files changed, 549 insertions(+), 188 deletions(-) + create mode 100644 include/linux/kaiser.h + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 6c880dc..d84e3a7 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -230,6 +230,13 @@ entry_SYSCALL_64_fastpath: + movq RIP(%rsp), %rcx + movq EFLAGS(%rsp), %r11 + RESTORE_C_REGS_EXCEPT_RCX_R11 ++ /* ++ * This opens a window where we have a user CR3, but are ++ * running in the kernel. This makes using the CS ++ * register useless for telling whether or not we need to ++ * switch CR3 in NMIs. Normal interrupts are OK because ++ * they are off here. ++ */ + SWITCH_USER_CR3 + movq RSP(%rsp), %rsp + USERGS_SYSRET64 +@@ -326,11 +333,25 @@ return_from_SYSCALL_64: + syscall_return_via_sysret: + /* rcx and r11 are already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_RCX_R11 ++ /* ++ * This opens a window where we have a user CR3, but are ++ * running in the kernel. This makes using the CS ++ * register useless for telling whether or not we need to ++ * switch CR3 in NMIs. Normal interrupts are OK because ++ * they are off here. ++ */ + SWITCH_USER_CR3 + movq RSP(%rsp), %rsp + USERGS_SYSRET64 + + opportunistic_sysret_failed: ++ /* ++ * This opens a window where we have a user CR3, but are ++ * running in the kernel. This makes using the CS ++ * register useless for telling whether or not we need to ++ * switch CR3 in NMIs. Normal interrupts are OK because ++ * they are off here. ++ */ + SWITCH_USER_CR3 + SWAPGS + jmp restore_c_regs_and_iret +@@ -1087,6 +1108,13 @@ ENTRY(error_entry) + cld + SAVE_C_REGS 8 + SAVE_EXTRA_REGS 8 ++ /* ++ * error_entry() always returns with a kernel gsbase and ++ * CR3. We must also have a kernel CR3/gsbase before ++ * calling TRACE_IRQS_*. Just unconditionally switch to ++ * the kernel CR3 here. ++ */ ++ SWITCH_KERNEL_CR3 + xorl %ebx, %ebx + testb $3, CS+8(%rsp) + jz .Lerror_kernelspace +@@ -1096,7 +1124,6 @@ ENTRY(error_entry) + * from user mode due to an IRET fault. + */ + SWAPGS +- SWITCH_KERNEL_CR3 + + .Lerror_entry_from_usermode_after_swapgs: + /* +@@ -1148,7 +1175,6 @@ ENTRY(error_entry) + * Switch to kernel gsbase: + */ + SWAPGS +- SWITCH_KERNEL_CR3 + + /* + * Pretend that the exception came from user mode: set up pt_regs +@@ -1247,7 +1273,10 @@ ENTRY(nmi) + */ + + SWAPGS_UNSAFE_STACK +- SWITCH_KERNEL_CR3_NO_STACK ++ /* ++ * percpu variables are mapped with user CR3, so no need ++ * to switch CR3 here. ++ */ + cld + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp +@@ -1281,14 +1310,33 @@ ENTRY(nmi) + + movq %rsp, %rdi + movq $-1, %rsi ++#ifdef CONFIG_KAISER ++ /* Unconditionally use kernel CR3 for do_nmi() */ ++ /* %rax is saved above, so OK to clobber here */ ++ movq %cr3, %rax ++ pushq %rax ++#ifdef CONFIG_KAISER_REAL_SWITCH ++ andq $(~0x1000), %rax ++#endif ++ movq %rax, %cr3 ++#endif + call do_nmi ++ /* ++ * Unconditionally restore CR3. I know we return to ++ * kernel code that needs user CR3, but do we ever return ++ * to "user mode" where we need the kernel CR3? ++ */ ++#ifdef CONFIG_KAISER ++ popq %rax ++ mov %rax, %cr3 ++#endif + + /* + * Return back to user mode. We must *not* do the normal exit +- * work, because we don't want to enable interrupts. Fortunately, +- * do_nmi doesn't modify pt_regs. ++ * work, because we don't want to enable interrupts. Do not ++ * switch to user CR3: we might be going back to kernel code ++ * that had a user CR3 set. + */ +- SWITCH_USER_CR3 + SWAPGS + jmp restore_c_regs_and_iret + +@@ -1484,23 +1532,54 @@ end_repeat_nmi: + ALLOC_PT_GPREGS_ON_STACK + + /* +- * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit +- * as we should not be calling schedule in NMI context. +- * Even with normal interrupts enabled. An NMI should not be +- * setting NEED_RESCHED or anything that normal interrupts and +- * exceptions might do. ++ * Use the same approach as paranoid_entry to handle SWAPGS, but ++ * without CR3 handling since we do that differently in NMIs. No ++ * need to use paranoid_exit as we should not be calling schedule ++ * in NMI context. Even with normal interrupts enabled. An NMI ++ * should not be setting NEED_RESCHED or anything that normal ++ * interrupts and exceptions might do. + */ +- call paranoid_entry ++ cld ++ SAVE_C_REGS ++ SAVE_EXTRA_REGS ++ movl $1, %ebx ++ movl $MSR_GS_BASE, %ecx ++ rdmsr ++ testl %edx, %edx ++ js 1f /* negative -> in kernel */ ++ SWAPGS ++ xorl %ebx, %ebx ++1: ++#ifdef CONFIG_KAISER ++ /* Unconditionally use kernel CR3 for do_nmi() */ ++ /* %rax is saved above, so OK to clobber here */ ++ movq %cr3, %rax ++ pushq %rax ++#ifdef CONFIG_KAISER_REAL_SWITCH ++ andq $(~0x1000), %rax ++#endif ++ movq %rax, %cr3 ++#endif + + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ + movq %rsp, %rdi ++ addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */ + movq $-1, %rsi + call do_nmi ++ /* ++ * Unconditionally restore CR3. We might be returning to ++ * kernel code that needs user CR3, like just just before ++ * a sysret. ++ */ ++#ifdef CONFIG_KAISER ++ popq %rax ++ mov %rax, %cr3 ++#endif + + testl %ebx, %ebx /* swapgs needed? */ + jnz nmi_restore + nmi_swapgs: +- SWITCH_USER_CR3_NO_STACK ++ /* We fixed up CR3 above, so no need to switch it here */ + SWAPGS_UNSAFE_STACK + nmi_restore: + RESTORE_EXTRA_REGS +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 63ee830..0703f48 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -16,13 +16,17 @@ + + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg ++#ifdef CONFIG_KAISER_REAL_SWITCH + andq $(~0x1000), \reg ++#endif + movq \reg, %cr3 + .endm + + .macro _SWITCH_TO_USER_CR3 reg + movq %cr3, \reg ++#ifdef CONFIG_KAISER_REAL_SWITCH + orq $(0x1000), \reg ++#endif + movq \reg, %cr3 + .endm + +@@ -65,48 +69,53 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + .endm + + #endif /* CONFIG_KAISER */ ++ + #else /* __ASSEMBLY__ */ + + + #ifdef CONFIG_KAISER +-// Upon kernel/user mode switch, it may happen that +-// the address space has to be switched before the registers have been stored. +-// To change the address space, another register is needed. +-// A register therefore has to be stored/restored. +-// +-DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); ++/* ++ * Upon kernel/user mode switch, it may happen that the address ++ * space has to be switched before the registers have been ++ * stored. To change the address space, another register is ++ * needed. A register therefore has to be stored/restored. ++*/ + +-#endif /* CONFIG_KAISER */ ++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + + /** +- * shadowmem_add_mapping - map a virtual memory part to the shadow mapping ++ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping + * @addr: the start address of the range + * @size: the size of the range + * @flags: The mapping flags of the pages + * +- * the mapping is done on a global scope, so no bigger synchronization has to be done. +- * the pages have to be manually unmapped again when they are not needed any longer. ++ * The mapping is done on a global scope, so no bigger ++ * synchronization has to be done. the pages have to be ++ * manually unmapped again when they are not needed any longer. + */ +-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); ++extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); + + + /** +- * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping ++ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping + * @addr: the start address of the range + * @size: the size of the range + */ + extern void kaiser_remove_mapping(unsigned long start, unsigned long size); + + /** +- * shadowmem_initialize_mapping - Initalize the shadow mapping ++ * kaiser_initialize_mapping - Initalize the shadow mapping + * +- * most parts of the shadow mapping can be mapped upon boot time. +- * only the thread stacks have to be mapped on runtime. +- * the mapped regions are not unmapped at all. ++ * Most parts of the shadow mapping can be mapped upon boot ++ * time. Only per-process things like the thread stacks ++ * or a new LDT have to be mapped at runtime. These boot- ++ * time mappings are permanent and nevertunmapped. + */ + extern void kaiser_init(void); + +-#endif ++#endif /* CONFIG_KAISER */ ++ ++#endif /* __ASSEMBLY */ + + + +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 4b479c9..1cee98e 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -690,7 +690,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) + + static inline int pgd_bad(pgd_t pgd) + { +- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; ++ pgdval_t ignore_flags = _PAGE_USER; ++ /* ++ * We set NX on KAISER pgds that map userspace memory so ++ * that userspace can not meaningfully use the kernel ++ * page table by accident; it will fault on the first ++ * instruction it tries to run. See native_set_pgd(). ++ */ ++ if (IS_ENABLED(CONFIG_KAISER)) ++ ignore_flags |= _PAGE_NX; ++ ++ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; + } + + static inline int pgd_none(pgd_t pgd) +@@ -905,8 +915,10 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) + { + memcpy(dst, src, count * sizeof(pgd_t)); + #ifdef CONFIG_KAISER +- // clone the shadow pgd part as well +- memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t)); ++ /* Clone the shadow pgd part as well */ ++ memcpy(native_get_shadow_pgd(dst), ++ native_get_shadow_pgd(src), ++ count * sizeof(pgd_t)); + #endif + } + +diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h +index e6ea39f..000265c 100644 +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -107,26 +107,58 @@ static inline void native_pud_clear(pud_t *pud) + } + + #ifdef CONFIG_KAISER +-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) { ++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) ++{ + return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); + } + +-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) { ++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) ++{ + return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); + } ++#else ++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) ++{ ++ BUILD_BUG_ON(1); ++ return NULL; ++} ++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) ++{ ++ return pgdp; ++} + #endif /* CONFIG_KAISER */ + ++/* ++ * Page table pages are page-aligned. The lower half of the top ++ * level is used for userspace and the top half for the kernel. ++ * This returns true for user pages that need to get copied into ++ * both the user and kernel copies of the page tables, and false ++ * for kernel pages that should only be in the kernel copy. ++ */ ++static inline bool is_userspace_pgd(void *__ptr) ++{ ++ unsigned long ptr = (unsigned long)__ptr; ++ ++ return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); ++} ++ + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) + { + #ifdef CONFIG_KAISER +- // We know that a pgd is page aligned. +- // Therefore the lower indices have to be mapped to user space. +- // These pages are mapped to the shadow mapping. +- if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) { ++ pteval_t extra_kern_pgd_flags = 0; ++ /* Do we need to also populate the shadow pgd? */ ++ if (is_userspace_pgd(pgdp)) { + native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; ++ /* ++ * Even if the entry is *mapping* userspace, ensure ++ * that userspace can not use it. This way, if we ++ * get out to userspace running on the kernel CR3, ++ * userspace will crash instead of running. ++ */ ++ extra_kern_pgd_flags = _PAGE_NX; + } +- +- pgdp->pgd = pgd.pgd & ~_PAGE_USER; ++ pgdp->pgd = pgd.pgd; ++ pgdp->pgd |= extra_kern_pgd_flags; + #else /* CONFIG_KAISER */ + *pgdp = pgd; + #endif +diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h +index 00fecbb..8bc8d02 100644 +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -48,7 +48,7 @@ + #ifdef CONFIG_KAISER + #define _PAGE_GLOBAL (_AT(pteval_t, 0)) + #else +-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) ++#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) + #endif + #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) + #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) +@@ -123,11 +123,7 @@ + #define _PAGE_DEVMAP (_AT(pteval_t, 0)) + #endif + +-#ifdef CONFIG_KAISER +-#define _PAGE_PROTNONE (_AT(pteval_t, 0)) +-#else + #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +-#endif + + #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_DIRTY) +diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c +index 9ff875a..560c2fd 100644 +--- a/arch/x86/kernel/espfix_64.c ++++ b/arch/x86/kernel/espfix_64.c +@@ -127,11 +127,14 @@ void __init init_espfix_bsp(void) + /* Install the espfix pud into the kernel page directory */ + pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); +-#ifdef CONFIG_KAISER +- // add the esp stack pud to the shadow mapping here. +- // This can be done directly, because the fixup stack has its own pud +- set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page))); +-#endif ++ /* ++ * Just copy the top-level PGD that is mapping the espfix ++ * area to ensure it is mapped into the shadow user page ++ * tables. ++ */ ++ if (IS_ENABLED(CONFIG_KAISER)) ++ set_pgd(native_get_shadow_pgd(pgd_p), ++ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); + + /* Randomize the locations */ + init_espfix_random(); +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index 9e849b5..5775379 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -406,11 +406,24 @@ GLOBAL(early_recursion_flag) + GLOBAL(name) + + #ifdef CONFIG_KAISER ++/* ++ * Each PGD needs to be 8k long and 8k aligned. We do not ++ * ever go out to userspace with these, so we do not ++ * strictly *need* the second page, but this allows us to ++ * have a single set_pgd() implementation that does not ++ * need to worry about whether it has 4k or 8k to work ++ * with. ++ * ++ * This ensures PGDs are 8k long: ++ */ ++#define KAISER_USER_PGD_FILL 512 ++/* This ensures they are 8k-aligned: */ + #define NEXT_PGD_PAGE(name) \ + .balign 2 * PAGE_SIZE; \ + GLOBAL(name) + #else + #define NEXT_PGD_PAGE(name) NEXT_PAGE(name) ++#define KAISER_USER_PGD_FILL 0 + #endif + + /* Automate the creation of 1 to 1 mapping pmd entries */ +@@ -425,6 +438,7 @@ GLOBAL(name) + NEXT_PGD_PAGE(early_level4_pgt) + .fill 511,8,0 + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE ++ .fill KAISER_USER_PGD_FILL,8,0 + + NEXT_PAGE(early_dynamic_pgts) + .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 +@@ -433,7 +447,8 @@ NEXT_PAGE(early_dynamic_pgts) + + #ifndef CONFIG_XEN + NEXT_PGD_PAGE(init_level4_pgt) +- .fill 2*512,8,0 ++ .fill 512,8,0 ++ .fill KAISER_USER_PGD_FILL,8,0 + #else + NEXT_PGD_PAGE(init_level4_pgt) + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE +@@ -442,6 +457,7 @@ NEXT_PGD_PAGE(init_level4_pgt) + .org init_level4_pgt + L4_START_KERNEL*8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE ++ .fill KAISER_USER_PGD_FILL,8,0 + + NEXT_PAGE(level3_ident_pgt) + .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE +@@ -452,6 +468,7 @@ NEXT_PAGE(level2_ident_pgt) + */ + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) + #endif ++ .fill KAISER_USER_PGD_FILL,8,0 + + NEXT_PAGE(level3_kernel_pgt) + .fill L3_START_KERNEL,8,0 +diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c +index 6707039..3c2d55b 100644 +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -17,6 +17,7 @@ + #include <linux/uaccess.h> + + #include <asm/ldt.h> ++#include <asm/kaiser.h> + #include <asm/desc.h> + #include <asm/mmu_context.h> + #include <asm/syscalls.h> +@@ -33,11 +34,21 @@ static void flush_ldt(void *current_mm) + set_ldt(pc->ldt->entries, pc->ldt->size); + } + ++static void __free_ldt_struct(struct ldt_struct *ldt) ++{ ++ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(ldt->entries); ++ else ++ free_page((unsigned long)ldt->entries); ++ kfree(ldt); ++} ++ + /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ + static struct ldt_struct *alloc_ldt_struct(int size) + { + struct ldt_struct *new_ldt; + int alloc_size; ++ int ret = 0; + + if (size > LDT_ENTRIES) + return NULL; +@@ -65,6 +76,14 @@ static struct ldt_struct *alloc_ldt_struct(int size) + return NULL; + } + ++ // FIXME: make kaiser_add_mapping() return an error code ++ // when it fails ++ kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, ++ __PAGE_KERNEL); ++ if (ret) { ++ __free_ldt_struct(new_ldt); ++ return NULL; ++ } + new_ldt->size = size; + return new_ldt; + } +@@ -91,12 +110,10 @@ static void free_ldt_struct(struct ldt_struct *ldt) + if (likely(!ldt)) + return; + ++ kaiser_remove_mapping((unsigned long)ldt->entries, ++ ldt->size * LDT_ENTRY_SIZE); + paravirt_free_ldt(ldt->entries, ldt->size); +- if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) +- vfree(ldt->entries); +- else +- free_page((unsigned long)ldt->entries); +- kfree(ldt); ++ __free_ldt_struct(ldt); + } + + /* +diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c +index 1c113db..2bb5ee4 100644 +--- a/arch/x86/kernel/tracepoint.c ++++ b/arch/x86/kernel/tracepoint.c +@@ -9,10 +9,12 @@ + #include <linux/atomic.h> + + atomic_t trace_idt_ctr = ATOMIC_INIT(0); ++__aligned(PAGE_SIZE) + struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, + (unsigned long) trace_idt_table }; + + /* No need to be aligned, but done to keep all IDTs defined the same way. */ ++__aligned(PAGE_SIZE) + gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss; + + static int trace_irq_vector_refcount; +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index cf1bb92..7270a29 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -1,160 +1,305 @@ +- +- ++#include <linux/bug.h> + #include <linux/kernel.h> + #include <linux/errno.h> + #include <linux/string.h> + #include <linux/types.h> + #include <linux/bug.h> + #include <linux/init.h> ++#include <linux/interrupt.h> + #include <linux/spinlock.h> + #include <linux/mm.h> +- + #include <linux/uaccess.h> ++ ++#include <asm/kaiser.h> + #include <asm/pgtable.h> + #include <asm/pgalloc.h> + #include <asm/desc.h> + #ifdef CONFIG_KAISER + + __visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); ++/* ++ * At runtime, the only things we map are some things for CPU ++ * hotplug, and stacks for new processes. No two CPUs will ever ++ * be populating the same addresses, so we only need to ensure ++ * that we protect between two CPUs trying to allocate and ++ * populate the same page table page. ++ * ++ * Only take this lock when doing a set_p[4um]d(), but it is not ++ * needed for doing a set_pte(). We assume that only the *owner* ++ * of a given allocation will be doing this for _their_ ++ * allocation. ++ * ++ * This ensures that once a system has been running for a while ++ * and there have been stacks all over and these page tables ++ * are fully populated, there will be no further acquisitions of ++ * this lock. ++ */ ++static DEFINE_SPINLOCK(shadow_table_allocation_lock); + +-/** +- * Get the real ppn from a address in kernel mapping. +- * @param address The virtual adrress +- * @return the physical address ++/* ++ * Returns -1 on error. + */ +-static inline unsigned long get_pa_from_mapping (unsigned long address) ++static inline unsigned long get_pa_from_mapping(unsigned long vaddr) + { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +- pgd = pgd_offset_k(address); +- BUG_ON(pgd_none(*pgd) || pgd_large(*pgd)); +- +- pud = pud_offset(pgd, address); +- BUG_ON(pud_none(*pud)); ++ pgd = pgd_offset_k(vaddr); ++ /* ++ * We made all the kernel PGDs present in kaiser_init(). ++ * We expect them to stay that way. ++ */ ++ BUG_ON(pgd_none(*pgd)); ++ /* ++ * PGDs are either 512GB or 128TB on all x86_64 ++ * configurations. We don't handle these. ++ */ ++ BUG_ON(pgd_large(*pgd)); + +- if (pud_large(*pud)) { +- return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK); ++ pud = pud_offset(pgd, vaddr); ++ if (pud_none(*pud)) { ++ WARN_ON_ONCE(1); ++ return -1; + } + +- pmd = pmd_offset(pud, address); +- BUG_ON(pmd_none(*pmd)); ++ if (pud_large(*pud)) ++ return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK); + +- if (pmd_large(*pmd)) { +- return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK); ++ pmd = pmd_offset(pud, vaddr); ++ if (pmd_none(*pmd)) { ++ WARN_ON_ONCE(1); ++ return -1; + } + +- pte = pte_offset_kernel(pmd, address); +- BUG_ON(pte_none(*pte)); ++ if (pmd_large(*pmd)) ++ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK); + +- return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK); ++ pte = pte_offset_kernel(pmd, vaddr); ++ if (pte_none(*pte)) { ++ WARN_ON_ONCE(1); ++ return -1; ++ } ++ ++ return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); + } + +-void _kaiser_copy (unsigned long start_addr, unsigned long size, +- unsigned long flags) ++/* ++ * This is a relatively normal page table walk, except that it ++ * also tries to allocate page tables pages along the way. ++ * ++ * Returns a pointer to a PTE on success, or NULL on failure. ++ */ ++static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) + { +- pgd_t *pgd; +- pud_t *pud; + pmd_t *pmd; +- pte_t *pte; +- unsigned long address; +- unsigned long end_addr = start_addr + size; +- unsigned long target_address; ++ pud_t *pud; ++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); ++ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + +- for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1)); +- address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) { +- target_address = get_pa_from_mapping(address); ++ might_sleep(); ++ if (is_atomic) { ++ gfp &= ~GFP_KERNEL; ++ gfp |= __GFP_HIGH | __GFP_ATOMIC; ++ } + +- pgd = native_get_shadow_pgd(pgd_offset_k(address)); ++ if (pgd_none(*pgd)) { ++ WARN_ONCE(1, "All shadow pgds should have been populated"); ++ return NULL; ++ } ++ BUILD_BUG_ON(pgd_large(*pgd) != 0); + +- BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n"); +- BUG_ON(pgd_large(*pgd)); ++ pud = pud_offset(pgd, address); ++ /* The shadow page tables do not use large mappings: */ ++ if (pud_large(*pud)) { ++ WARN_ON(1); ++ return NULL; ++ } ++ if (pud_none(*pud)) { ++ unsigned long new_pmd_page = __get_free_page(gfp); ++ if (!new_pmd_page) ++ return NULL; ++ spin_lock(&shadow_table_allocation_lock); ++ if (pud_none(*pud)) ++ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); ++ else ++ free_page(new_pmd_page); ++ spin_unlock(&shadow_table_allocation_lock); ++ } + +- pud = pud_offset(pgd, address); +- if (pud_none(*pud)) { +- set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address)))); +- } +- BUG_ON(pud_large(*pud)); ++ pmd = pmd_offset(pud, address); ++ /* The shadow page tables do not use large mappings: */ ++ if (pmd_large(*pmd)) { ++ WARN_ON(1); ++ return NULL; ++ } ++ if (pmd_none(*pmd)) { ++ unsigned long new_pte_page = __get_free_page(gfp); ++ if (!new_pte_page) ++ return NULL; ++ spin_lock(&shadow_table_allocation_lock); ++ if (pmd_none(*pmd)) ++ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); ++ else ++ free_page(new_pte_page); ++ spin_unlock(&shadow_table_allocation_lock); ++ } + +- pmd = pmd_offset(pud, address); +- if (pmd_none(*pmd)) { +- set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address)))); +- } +- BUG_ON(pmd_large(*pmd)); ++ return pte_offset_kernel(pmd, address); ++} + +- pte = pte_offset_kernel(pmd, address); ++int kaiser_add_user_map(const void *__start_addr, unsigned long size, ++ unsigned long flags) ++{ ++ int ret = 0; ++ pte_t *pte; ++ unsigned long start_addr = (unsigned long )__start_addr; ++ unsigned long address = start_addr & PAGE_MASK; ++ unsigned long end_addr = PAGE_ALIGN(start_addr + size); ++ unsigned long target_address; ++ ++ for (;address < end_addr; address += PAGE_SIZE) { ++ target_address = get_pa_from_mapping(address); ++ if (target_address == -1) { ++ ret = -EIO; ++ break; ++ } ++ pte = kaiser_pagetable_walk(address, false); + if (pte_none(*pte)) { + set_pte(pte, __pte(flags | target_address)); + } else { +- BUG_ON(__pa(pte_page(*pte)) != target_address); ++ pte_t tmp; ++ set_pte(&tmp, __pte(flags | target_address)); ++ WARN_ON_ONCE(!pte_same(*pte, tmp)); + } + } ++ return ret; ++} ++ ++static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags) ++{ ++ unsigned long size = end - start; ++ ++ return kaiser_add_user_map(start, size, flags); + } + +-// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping +-static inline void __init _kaiser_init(void) ++/* ++ * Ensure that the top level of the (shadow) page tables are ++ * entirely populated. This ensures that all processes that get ++ * forked have the same entries. This way, we do not have to ++ * ever go set up new entries in older processes. ++ * ++ * Note: we never free these, so there are no updates to them ++ * after this. ++ */ ++static void __init kaiser_init_all_pgds(void) + { + pgd_t *pgd; + int i = 0; + + pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); + for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { +- set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0)))); ++ pgd_t new_pgd; ++ pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); ++ if (!pud) { ++ WARN_ON(1); ++ break; ++ } ++ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); ++ /* ++ * Make sure not to stomp on some other pgd entry. ++ */ ++ if (!pgd_none(pgd[i])) { ++ WARN_ON(1); ++ continue; ++ } ++ set_pgd(pgd + i, new_pgd); + } + } + ++#define kaiser_add_user_map_early(start, size, flags) do { \ ++ int __ret = kaiser_add_user_map(start, size, flags); \ ++ WARN_ON(__ret); \ ++} while (0) ++ ++#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \ ++ int __ret = kaiser_add_user_map_ptrs(start, end, flags); \ ++ WARN_ON(__ret); \ ++} while (0) ++ + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +-spinlock_t shadow_table_lock; ++/* ++ * If anything in here fails, we will likely die on one of the ++ * first kernel->user transitions and init will die. But, we ++ * will have most of the kernel up by then and should be able to ++ * get a clean warning out of it. If we BUG_ON() here, we run ++ * the risk of being before we have good console output. ++ */ + void __init kaiser_init(void) + { + int cpu; +- spin_lock_init(&shadow_table_lock); +- +- spin_lock(&shadow_table_lock); + +- _kaiser_init(); ++ kaiser_init_all_pgds(); + + for_each_possible_cpu(cpu) { +- // map the per cpu user variables +- _kaiser_copy( +- (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)), +- (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start, +- __PAGE_KERNEL); ++ void *percpu_vaddr = __per_cpu_user_mapped_start + ++ per_cpu_offset(cpu); ++ unsigned long percpu_sz = __per_cpu_user_mapped_end - ++ __per_cpu_user_mapped_start; ++ kaiser_add_user_map_early(percpu_vaddr, percpu_sz, ++ __PAGE_KERNEL); + } + +- // map the entry/exit text section, which is responsible to switch between user- and kernel mode +- _kaiser_copy( +- (unsigned long) __entry_text_start, +- (unsigned long) __entry_text_end - (unsigned long) __entry_text_start, +- __PAGE_KERNEL_RX); ++ /* ++ * Map the entry/exit text section, which is needed at ++ * switches from user to and from kernel. ++ */ ++ kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end, ++ __PAGE_KERNEL_RX); + +- // the fixed map address of the idt_table +- _kaiser_copy( +- (unsigned long) idt_descr.address, +- sizeof(gate_desc) * NR_VECTORS, +- __PAGE_KERNEL_RO); +- +- spin_unlock(&shadow_table_lock); ++#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) ++ kaiser_add_user_map_ptrs_early(__irqentry_text_start, ++ __irqentry_text_end, ++ __PAGE_KERNEL_RX); ++#endif ++ kaiser_add_user_map_early((void *)idt_descr.address, ++ sizeof(gate_desc) * NR_VECTORS, ++ __PAGE_KERNEL_RO); ++#ifdef CONFIG_TRACING ++ kaiser_add_user_map_early(&trace_idt_descr, ++ sizeof(trace_idt_descr), ++ __PAGE_KERNEL); ++ kaiser_add_user_map_early(&trace_idt_table, ++ sizeof(gate_desc) * NR_VECTORS, ++ __PAGE_KERNEL); ++#endif ++ kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr), ++ __PAGE_KERNEL); ++ kaiser_add_user_map_early(&debug_idt_table, ++ sizeof(gate_desc) * NR_VECTORS, ++ __PAGE_KERNEL); + } + ++extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end); + // add a mapping to the shadow-mapping, and synchronize the mappings +-void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) ++int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) + { +- spin_lock(&shadow_table_lock); +- _kaiser_copy(addr, size, flags); +- spin_unlock(&shadow_table_lock); ++ return kaiser_add_user_map((const void *)addr, size, flags); + } + +-extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end); + void kaiser_remove_mapping(unsigned long start, unsigned long size) + { +- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start)); +- spin_lock(&shadow_table_lock); +- do { +- unmap_pud_range(pgd, start, start + size); +- } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size))); +- spin_unlock(&shadow_table_lock); ++ unsigned long end = start + size; ++ unsigned long addr; ++ ++ for (addr = start; addr < end; addr += PGDIR_SIZE) { ++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); ++ /* ++ * unmap_p4d_range() handles > P4D_SIZE unmaps, ++ * so no need to trim 'end'. ++ */ ++ unmap_pud_range_nofree(pgd, addr, end); ++ } + } + #endif /* CONFIG_KAISER */ +diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c +index c17412f..73dcb0e1 100644 +--- a/arch/x86/mm/pageattr.c ++++ b/arch/x86/mm/pageattr.c +@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock); + #define CPA_FLUSHTLB 1 + #define CPA_ARRAY 2 + #define CPA_PAGES_ARRAY 4 ++#define CPA_FREE_PAGETABLES 8 + + #ifdef CONFIG_PROC_FS + static unsigned long direct_pages_count[PG_LEVEL_NUM]; +@@ -729,10 +730,13 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, + return 0; + } + +-static bool try_to_free_pte_page(pte_t *pte) ++static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte) + { + int i; + ++ if (!(cpa->flags & CPA_FREE_PAGETABLES)) ++ return false; ++ + for (i = 0; i < PTRS_PER_PTE; i++) + if (!pte_none(pte[i])) + return false; +@@ -741,10 +745,13 @@ static bool try_to_free_pte_page(pte_t *pte) + return true; + } + +-static bool try_to_free_pmd_page(pmd_t *pmd) ++static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd) + { + int i; + ++ if (!(cpa->flags & CPA_FREE_PAGETABLES)) ++ return false; ++ + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_none(pmd[i])) + return false; +@@ -753,7 +760,9 @@ static bool try_to_free_pmd_page(pmd_t *pmd) + return true; + } + +-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) ++static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd, ++ unsigned long start, ++ unsigned long end) + { + pte_t *pte = pte_offset_kernel(pmd, start); + +@@ -764,22 +773,23 @@ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) + pte++; + } + +- if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { ++ if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) { + pmd_clear(pmd); + return true; + } + return false; + } + +-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, ++static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd, + unsigned long start, unsigned long end) + { +- if (unmap_pte_range(pmd, start, end)) +- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) ++ if (unmap_pte_range(cpa, pmd, start, end)) ++ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); + } + +-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) ++static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, ++ unsigned long start, unsigned long end) + { + pmd_t *pmd = pmd_offset(pud, start); + +@@ -790,7 +800,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) + unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + +- __unmap_pmd_range(pud, pmd, start, pre_end); ++ __unmap_pmd_range(cpa, pud, pmd, start, pre_end); + + start = pre_end; + pmd++; +@@ -803,7 +813,8 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) + if (pmd_large(*pmd)) + pmd_clear(pmd); + else +- __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); ++ __unmap_pmd_range(cpa, pud, pmd, ++ start, start + PMD_SIZE); + + start += PMD_SIZE; + pmd++; +@@ -813,17 +824,19 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) + * 4K leftovers? + */ + if (start < end) +- return __unmap_pmd_range(pud, pmd, start, end); ++ return __unmap_pmd_range(cpa, pud, pmd, start, end); + + /* + * Try again to free the PMD page if haven't succeeded above. + */ + if (!pud_none(*pud)) +- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) ++ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); + } + +-void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) ++static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd, ++ unsigned long start, ++ unsigned long end) + { + pud_t *pud = pud_offset(pgd, start); + +@@ -834,7 +847,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) + unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + +- unmap_pmd_range(pud, start, pre_end); ++ unmap_pmd_range(cpa, pud, start, pre_end); + + start = pre_end; + pud++; +@@ -848,7 +861,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) + if (pud_large(*pud)) + pud_clear(pud); + else +- unmap_pmd_range(pud, start, start + PUD_SIZE); ++ unmap_pmd_range(cpa, pud, start, start + PUD_SIZE); + + start += PUD_SIZE; + pud++; +@@ -858,7 +871,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) + * 2M leftovers? + */ + if (start < end) +- unmap_pmd_range(pud, start, end); ++ unmap_pmd_range(cpa, pud, start, end); + + /* + * No need to try to free the PUD page because we'll free it in +@@ -866,6 +879,24 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) + */ + } + ++static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) ++{ ++ struct cpa_data cpa = { ++ .flags = CPA_FREE_PAGETABLES, ++ }; ++ ++ __unmap_pud_range(&cpa, pgd, start, end); ++} ++ ++void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end) ++{ ++ struct cpa_data cpa = { ++ .flags = 0, ++ }; ++ ++ __unmap_pud_range(&cpa, pgd, start, end); ++} ++ + static int alloc_pte_page(pmd_t *pmd) + { + pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); +diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c +index 27d218b..352fd01 100644 +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -344,40 +344,26 @@ static inline void _pgd_free(pgd_t *pgd) + kmem_cache_free(pgd_cache, pgd); + } + #else +-static inline pgd_t *_pgd_alloc(void) +-{ ++ + #ifdef CONFIG_KAISER +- // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory +- // block. Therefore, we have to allocate at least 3 pages. However, the +- // __get_free_pages returns us 4 pages. Hence, we store the base pointer at +- // the beginning of the page of our 8kb-aligned memory block in order to +- // correctly free it afterwars. +- +- unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE)); +- +- if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages) +- { +- *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages; +- return (pgd_t *) pages; +- } +- else +- { +- *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages; +- return (pgd_t *) (pages + PAGE_SIZE); +- } ++/* ++ * Instead of one pmd, we aquire two pmds. Being order-1, it is ++ * both 8k in size and 8k-aligned. That lets us just flip bit 12 ++ * in a pointer to swap between the two 4k halves. ++ */ ++#define PGD_ALLOCATION_ORDER 1 + #else +- return (pgd_t *)__get_free_page(PGALLOC_GFP); ++#define PGD_ALLOCATION_ORDER 0 + #endif ++ ++static inline pgd_t *_pgd_alloc(void) ++{ ++ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); + } + + static inline void _pgd_free(pgd_t *pgd) + { +-#ifdef CONFIG_KAISER +- unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE)); +- free_pages(pages, get_order(4*PAGE_SIZE)); +-#else +- free_page((unsigned long)pgd); +-#endif ++ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); + } + #endif /* CONFIG_X86_PAE */ + +diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h +new file mode 100644 +index 0000000..9db5433 +--- /dev/null ++++ b/include/linux/kaiser.h +@@ -0,0 +1,26 @@ ++#ifndef _INCLUDE_KAISER_H ++#define _INCLUDE_KAISER_H ++ ++#ifdef CONFIG_KAISER ++#include <asm/kaiser.h> ++#else ++ ++/* ++ * These stubs are used whenever CONFIG_KAISER is off, which ++ * includes architectures that support KAISER, but have it ++ * disabled. ++ */ ++ ++static inline void kaiser_init(void) ++{ ++} ++static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) ++{ ++} ++static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) ++{ ++ return 0; ++} ++ ++#endif /* !CONFIG_KAISER */ ++#endif /* _INCLUDE_KAISER_H */ +diff --git a/kernel/fork.c b/kernel/fork.c +index d34394e..8013f22 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -58,6 +58,7 @@ + #include <linux/tsacct_kern.h> + #include <linux/cn_proc.h> + #include <linux/freezer.h> ++#include <linux/kaiser.h> + #include <linux/delayacct.h> + #include <linux/taskstats_kern.h> + #include <linux/random.h> +@@ -472,7 +473,6 @@ void set_task_stack_end_magic(struct task_struct *tsk) + *stackend = STACK_END_MAGIC; /* for overflow detection */ + } + +-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); + static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + { + struct task_struct *tsk; +@@ -500,9 +500,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + * functions again. + */ + tsk->stack = stack; +-#ifdef CONFIG_KAISER +- kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); +-#endif ++ ++ err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); ++ if (err) ++ goto free_stack; + #ifdef CONFIG_VMAP_STACK + tsk->stack_vm_area = stack_vm_area; + #endif +diff --git a/security/Kconfig b/security/Kconfig +index f515ac3..334d2e8 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -32,12 +32,17 @@ config SECURITY + If you are unsure how to answer this question, answer N. + config KAISER + bool "Remove the kernel mapping in user mode" ++ default y + depends on X86_64 + depends on !PARAVIRT + help + This enforces a strict kernel and user space isolation in order to close + hardware side channels on kernel address information. + ++config KAISER_REAL_SWITCH ++ bool "KAISER: actually switch page tables" ++ default y ++ + config SECURITYFS + bool "Enable the securityfs filesystem" + help +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch new file mode 100644 index 00000000..bbb98553 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch @@ -0,0 +1,105 @@ +From 56f0eb24f5e9ff1faf0818a928a6c4a1004aeef1 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Sun, 18 Dec 2016 17:44:13 +0100 +Subject: [PATCH 05/42] x86/microcode/AMD: Do not load when running on a + hypervisor + +commit a15a753539eca8ba243d576f02e7ca9c4b7d7042 upstream. + +Doing so is completely void of sense for multiple reasons so prevent +it. Set dis_ucode_ldr to true and thus disable the microcode loader by +default to address xen pv guests which execute the AP path but not the +BSP path. + +By having it turned off by default, the APs won't run into the loader +either. + +Also, check CPUID(1).ECX[31] which hypervisors set. Well almost, not the +xen pv one. That one gets the aforementioned "fix". + +Also, improve the detection method by caching the final decision whether +to continue loading in dis_ucode_ldr and do it once on the BSP. The APs +then simply test that value. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Tested-by: Juergen Gross <jgross@suse.com> +Tested-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Acked-by: Juergen Gross <jgross@suse.com> +Link: http://lkml.kernel.org/r/20161218164414.9649-4-bp@alien8.de +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Rolf Neugebauer <rolf.neugebauer@docker.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/microcode/core.c | 28 +++++++++++++++++++--------- + 1 file changed, 19 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c +index 5ce5155..dc0b9f8 100644 +--- a/arch/x86/kernel/cpu/microcode/core.c ++++ b/arch/x86/kernel/cpu/microcode/core.c +@@ -43,7 +43,7 @@ + #define MICROCODE_VERSION "2.01" + + static struct microcode_ops *microcode_ops; +-static bool dis_ucode_ldr; ++static bool dis_ucode_ldr = true; + + /* + * Synchronization. +@@ -73,6 +73,7 @@ struct cpu_info_ctx { + static bool __init check_loader_disabled_bsp(void) + { + static const char *__dis_opt_str = "dis_ucode_ldr"; ++ u32 a, b, c, d; + + #ifdef CONFIG_X86_32 + const char *cmdline = (const char *)__pa_nodebug(boot_command_line); +@@ -85,8 +86,23 @@ static bool __init check_loader_disabled_bsp(void) + bool *res = &dis_ucode_ldr; + #endif + +- if (cmdline_find_option_bool(cmdline, option)) +- *res = true; ++ if (!have_cpuid_p()) ++ return *res; ++ ++ a = 1; ++ c = 0; ++ native_cpuid(&a, &b, &c, &d); ++ ++ /* ++ * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not ++ * completely accurate as xen pv guests don't see that CPUID bit set but ++ * that's good enough as they don't land on the BSP path anyway. ++ */ ++ if (c & BIT(31)) ++ return *res; ++ ++ if (cmdline_find_option_bool(cmdline, option) <= 0) ++ *res = false; + + return *res; + } +@@ -118,9 +134,6 @@ void __init load_ucode_bsp(void) + if (check_loader_disabled_bsp()) + return; + +- if (!have_cpuid_p()) +- return; +- + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); + +@@ -154,9 +167,6 @@ void load_ucode_ap(void) + if (check_loader_disabled_ap()) + return; + +- if (!have_cpuid_p()) +- return; +- + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch new file mode 100644 index 00000000..b21b0f41 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch @@ -0,0 +1,117 @@ +From 9c30656e4da86d6c69ad832ed9cb3e549b939566 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 29 Jun 2017 08:53:15 -0700 +Subject: [PATCH 05/14] x86/mm: Give each mm TLB flush generation a unique ID + +commit f39681ed0f48498b80455095376f11535feea332 upstream. + +This adds two new variables to mmu_context_t: ctx_id and tlb_gen. +ctx_id uniquely identifies the mm_struct and will never be reused. +For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic +count of the number of times that a TLB flush has been requested. +The pair (ctx_id, tlb_gen) can be used as an identifier for TLB +flush actions and will be used in subsequent patches to reliably +determine whether all needed TLB flushes have occurred on a given +CPU. + +This patch is split out for ease of review. By itself, it has no +real effect other than creating and updating the new variables. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Reviewed-by: Nadav Amit <nadav.amit@gmail.com> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/mmu.h | 15 +++++++++++++-- + arch/x86/include/asm/mmu_context.h | 5 +++++ + arch/x86/mm/tlb.c | 2 ++ + 3 files changed, 20 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h +index 8b272a0..e2e0934 100644 +--- a/arch/x86/include/asm/mmu.h ++++ b/arch/x86/include/asm/mmu.h +@@ -3,12 +3,18 @@ + + #include <linux/spinlock.h> + #include <linux/mutex.h> ++#include <linux/atomic.h> + + /* +- * The x86 doesn't have a mmu context, but +- * we put the segment information here. ++ * x86 has arch-specific MMU state beyond what lives in mm_struct. + */ + typedef struct { ++ /* ++ * ctx_id uniquely identifies this mm_struct. A ctx_id will never ++ * be reused, and zero is not a valid ctx_id. ++ */ ++ u64 ctx_id; ++ + #ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + #endif +@@ -33,6 +39,11 @@ typedef struct { + #endif + } mm_context_t; + ++#define INIT_MM_CONTEXT(mm) \ ++ .context = { \ ++ .ctx_id = 1, \ ++ } ++ + void leave_mm(int cpu); + + #endif /* _ASM_X86_MMU_H */ +diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h +index 762d6c6..1ed17c92 100644 +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -12,6 +12,9 @@ + #include <asm/tlbflush.h> + #include <asm/paravirt.h> + #include <asm/mpx.h> ++ ++extern atomic64_t last_mm_ctx_id; ++ + #ifndef CONFIG_PARAVIRT + static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +@@ -106,6 +109,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) + static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) + { ++ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); ++ + #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { + /* pkey 0 is the default and always allocated */ +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 613d07e..146e842 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -29,6 +29,8 @@ + * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi + */ + ++atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); ++ + struct flush_tlb_info { + struct mm_struct *flush_mm; + unsigned long flush_start; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-speculation-Clean-up-various-Spectre-related-det.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-speculation-Clean-up-various-Spectre-related-det.patch new file mode 100644 index 00000000..e6531584 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-speculation-Clean-up-various-Spectre-related-det.patch @@ -0,0 +1,148 @@ +From 891112052277801e900b37496ca8c260a5e7e7e1 Mon Sep 17 00:00:00 2001 +From: Ingo Molnar <mingo@kernel.org> +Date: Tue, 13 Feb 2018 09:03:08 +0100 +Subject: [PATCH 05/12] x86/speculation: Clean up various Spectre related + details + +commit 21e433bdb95bdf3aa48226fd3d33af608437f293 upstream. + +Harmonize all the Spectre messages so that a: + + dmesg | grep -i spectre + +... gives us most Spectre related kernel boot messages. + +Also fix a few other details: + + - clarify a comment about firmware speculation control + + - s/KPTI/PTI + + - remove various line-breaks that made the code uglier + +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 28 +++++++++++----------------- + 1 file changed, 11 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 957ad44..b83e0c9 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -161,8 +161,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { +- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, +- sizeof(arg)); ++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; + +@@ -174,8 +173,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + } + + if (i >= ARRAY_SIZE(mitigation_options)) { +- pr_err("unknown option (%s). Switching to AUTO select\n", +- mitigation_options[i].option); ++ pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + } +@@ -184,8 +182,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { +- pr_err("%s selected but not compiled in. Switching to AUTO select\n", +- mitigation_options[i].option); ++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + +@@ -255,14 +252,14 @@ static void __init spectre_v2_select_mitigation(void) + goto retpoline_auto; + break; + } +- pr_err("kernel not compiled with retpoline; no mitigation available!"); ++ pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); + return; + + retpoline_auto: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + retpoline_amd: + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { +- pr_err("LFENCE not serializing. Switching to generic retpoline\n"); ++ pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); + goto retpoline_generic; + } + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : +@@ -280,7 +277,7 @@ static void __init spectre_v2_select_mitigation(void) + pr_info("%s\n", spectre_v2_strings[mode]); + + /* +- * If neither SMEP or KPTI are available, there is a risk of ++ * If neither SMEP nor PTI are available, there is a risk of + * hitting userspace addresses in the RSB after a context switch + * from a shallow call stack to a deeper one. To prevent this fill + * the entire RSB, even when using IBRS. +@@ -294,21 +291,20 @@ static void __init spectre_v2_select_mitigation(void) + if ((!boot_cpu_has(X86_FEATURE_KAISER) && + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); +- pr_info("Filling RSB on context switch\n"); ++ pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); + } + + /* Initialize Indirect Branch Prediction Barrier if supported */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); +- pr_info("Enabling Indirect Branch Prediction Barrier\n"); ++ pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } + } + + #undef pr_fmt + + #ifdef CONFIG_SYSFS +-ssize_t cpu_show_meltdown(struct device *dev, +- struct device_attribute *attr, char *buf) ++ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) + { + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return sprintf(buf, "Not affected\n"); +@@ -317,16 +313,14 @@ ssize_t cpu_show_meltdown(struct device *dev, + return sprintf(buf, "Vulnerable\n"); + } + +-ssize_t cpu_show_spectre_v1(struct device *dev, +- struct device_attribute *attr, char *buf) ++ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) + { + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + } + +-ssize_t cpu_show_spectre_v2(struct device *dev, +- struct device_attribute *attr, char *buf) ++ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) + { + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch new file mode 100644 index 00000000..01f7310c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch @@ -0,0 +1,212 @@ +From 4e010256639fdd9c87743dc7c7ad6a53bc96c1af Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Tue, 5 Sep 2017 12:05:01 -0700 +Subject: [PATCH 006/102] kaiser: do not set _PAGE_NX on pgd_none + +native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must +avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry: +usually that just generated a warning on exit, but sometimes +more mysterious and damaging failures (our production machines +could not complete booting). + +The original fix to this just avoided adding _PAGE_NX to +an empty entry; but eventually more problems surfaced with kexec, +and EFI mapping expected to be a problem too. So now instead +change native_set_pgd() to update shadow only if _PAGE_USER: + +A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure) +use set_pgd() to set up a temporary internal virtual address space, with +physical pages remapped at what Kaiser regards as userspace addresses: +Kaiser then assumes a shadow pgd follows, which it will try to corrupt. + +This appears to be responsible for the recent kexec and kdump failures; +though it's unclear how those did not manifest as a problem before. +Ah, the shadow pgd will only be assumed to "follow" if the requested +pgd is on an even-numbered page: so I suppose it was going wrong 50% +of the time all along. + +What we need is a flag to set_pgd(), to tell it we're dealing with +userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying? +Add a test for that. But we cannot do the same for pgd_clear() +(which may be called to clear corrupted entries - set aside the +question of "corrupt in which pgd?" until later), so there just +rely on pgd_clear() not being called in the problematic cases - +with a WARN_ON_ONCE() which should fire half the time if it is. + +But this is getting too big for an inline function: move it into +arch/x86/mm/kaiser.c (which then demands a boot/compressed mod); +and de-void and de-space native_get_shadow/normal_pgd() while here. + +Also make an unnecessary change to KASLR's init_trampoline(): it was +using set_pgd() to assign a pgd-value to a global variable (not in a +pg directory page), which was rather scary given Kaiser's previous +set_pgd() implementation: not a problem now, but too scary to leave +as was, it could easily blow up if we have to change set_pgd() again. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/boot/compressed/misc.h | 1 + + arch/x86/include/asm/pgtable_64.h | 51 ++++++++++----------------------------- + arch/x86/mm/kaiser.c | 42 ++++++++++++++++++++++++++++++++ + arch/x86/mm/kaslr.c | 4 +-- + 4 files changed, 58 insertions(+), 40 deletions(-) + +diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h +index 1c8355e..cd80024 100644 +--- a/arch/x86/boot/compressed/misc.h ++++ b/arch/x86/boot/compressed/misc.h +@@ -9,6 +9,7 @@ + */ + #undef CONFIG_PARAVIRT + #undef CONFIG_PARAVIRT_SPINLOCKS ++#undef CONFIG_KAISER + #undef CONFIG_KASAN + + #include <linux/linkage.h> +diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h +index 000265c..177caf3 100644 +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_t *pud) + } + + #ifdef CONFIG_KAISER +-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) ++extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); ++ ++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) + { +- return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); ++ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); + } + +-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) ++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) + { +- return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); ++ return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); + } + #else +-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) ++static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) ++{ ++ return pgd; ++} ++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) + { + BUILD_BUG_ON(1); + return NULL; + } +-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) ++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) + { + return pgdp; + } + #endif /* CONFIG_KAISER */ + +-/* +- * Page table pages are page-aligned. The lower half of the top +- * level is used for userspace and the top half for the kernel. +- * This returns true for user pages that need to get copied into +- * both the user and kernel copies of the page tables, and false +- * for kernel pages that should only be in the kernel copy. +- */ +-static inline bool is_userspace_pgd(void *__ptr) +-{ +- unsigned long ptr = (unsigned long)__ptr; +- +- return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); +-} +- + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) + { +-#ifdef CONFIG_KAISER +- pteval_t extra_kern_pgd_flags = 0; +- /* Do we need to also populate the shadow pgd? */ +- if (is_userspace_pgd(pgdp)) { +- native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; +- /* +- * Even if the entry is *mapping* userspace, ensure +- * that userspace can not use it. This way, if we +- * get out to userspace running on the kernel CR3, +- * userspace will crash instead of running. +- */ +- extra_kern_pgd_flags = _PAGE_NX; +- } +- pgdp->pgd = pgd.pgd; +- pgdp->pgd |= extra_kern_pgd_flags; +-#else /* CONFIG_KAISER */ +- *pgdp = pgd; +-#endif ++ *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); + } + + static inline void native_pgd_clear(pgd_t *pgd) +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 7270a29..8d6061c 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -302,4 +302,46 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) + unmap_pud_range_nofree(pgd, addr, end); + } + } ++ ++/* ++ * Page table pages are page-aligned. The lower half of the top ++ * level is used for userspace and the top half for the kernel. ++ * This returns true for user pages that need to get copied into ++ * both the user and kernel copies of the page tables, and false ++ * for kernel pages that should only be in the kernel copy. ++ */ ++static inline bool is_userspace_pgd(pgd_t *pgdp) ++{ ++ return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2); ++} ++ ++pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) ++{ ++ /* ++ * Do we need to also populate the shadow pgd? Check _PAGE_USER to ++ * skip cases like kexec and EFI which make temporary low mappings. ++ */ ++ if (pgd.pgd & _PAGE_USER) { ++ if (is_userspace_pgd(pgdp)) { ++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; ++ /* ++ * Even if the entry is *mapping* userspace, ensure ++ * that userspace can not use it. This way, if we ++ * get out to userspace running on the kernel CR3, ++ * userspace will crash instead of running. ++ */ ++ pgd.pgd |= _PAGE_NX; ++ } ++ } else if (!pgd.pgd) { ++ /* ++ * pgd_clear() cannot check _PAGE_USER, and is even used to ++ * clear corrupted pgd entries: so just rely on cases like ++ * kexec and EFI never to be using pgd_clear(). ++ */ ++ if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) && ++ is_userspace_pgd(pgdp)) ++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; ++ } ++ return pgd; ++} + #endif /* CONFIG_KAISER */ +diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c +index aed2064..9284ec1 100644 +--- a/arch/x86/mm/kaslr.c ++++ b/arch/x86/mm/kaslr.c +@@ -189,6 +189,6 @@ void __meminit init_trampoline(void) + *pud_tramp = *pud; + } + +- set_pgd(&trampoline_pgd_entry, +- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); ++ /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */ ++ trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch new file mode 100644 index 00000000..e91992c0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch @@ -0,0 +1,63 @@ +From bd9bf4f96e31d86d230db1f5243608f3a500123d Mon Sep 17 00:00:00 2001 +From: Waiman Long <longman@redhat.com> +Date: Mon, 22 Jan 2018 17:09:34 -0500 +Subject: [PATCH 06/42] x86/retpoline: Remove the esp/rsp thunk + +(cherry picked from commit 1df37383a8aeabb9b418698f0bcdffea01f4b1b2) + +It doesn't make sense to have an indirect call thunk with esp/rsp as +retpoline code won't work correctly with the stack pointer register. +Removing it will help compiler writers to catch error in case such +a thunk call is emitted incorrectly. + +Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") +Suggested-by: Jeff Law <law@redhat.com> +Signed-off-by: Waiman Long <longman@redhat.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Kees Cook <keescook@google.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1516658974-27852-1-git-send-email-longman@redhat.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/asm-prototypes.h | 1 - + arch/x86/lib/retpoline.S | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h +index b15aa40..5a25ada 100644 +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -37,5 +37,4 @@ INDIRECT_THUNK(dx) + INDIRECT_THUNK(si) + INDIRECT_THUNK(di) + INDIRECT_THUNK(bp) +-INDIRECT_THUNK(sp) + #endif /* CONFIG_RETPOLINE */ +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index dfb2ba9..c909961 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -36,7 +36,6 @@ GENERATE_THUNK(_ASM_DX) + GENERATE_THUNK(_ASM_SI) + GENERATE_THUNK(_ASM_DI) + GENERATE_THUNK(_ASM_BP) +-GENERATE_THUNK(_ASM_SP) + #ifdef CONFIG_64BIT + GENERATE_THUNK(r8) + GENERATE_THUNK(r9) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch new file mode 100644 index 00000000..8f996720 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch @@ -0,0 +1,39 @@ +From eeedd09281a09c8f0470c638939a5121ca753461 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Tue, 6 Feb 2018 18:22:40 -0800 +Subject: [PATCH 06/12] x86/speculation: Fix up array_index_nospec_mask() asm + constraint + +commit be3233fbfcb8f5acb6e3bcd0895c3ef9e100d470 upstream. + +Allow the compiler to handle @size as an immediate value or memory +directly rather than allocating a register. + +Reported-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/151797010204.1289.1510000292250184993.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/barrier.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h +index 8575903..78d1c6a 100644 +--- a/arch/x86/include/asm/barrier.h ++++ b/arch/x86/include/asm/barrier.h +@@ -39,7 +39,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + + asm ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) +- :"r"(size),"r" (index) ++ :"g"(size),"r" (index) + :"cc"); + return mask; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch new file mode 100644 index 00000000..90877ac8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch @@ -0,0 +1,129 @@ +From b3ad1b7521b3f4aaddc02e93ce3835bcac48da35 Mon Sep 17 00:00:00 2001 +From: Tim Chen <tim.c.chen@linux.intel.com> +Date: Mon, 29 Jan 2018 22:04:47 +0000 +Subject: [PATCH 06/14] x86/speculation: Use Indirect Branch Prediction Barrier + in context switch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 18bf3c3ea8ece8f03b6fc58508f2dfd23c7711c7 upstream. + +Flush indirect branches when switching into a process that marked itself +non dumpable. This protects high value processes like gpg better, +without having too high performance overhead. + +If done naïvely, we could switch to a kernel idle thread and then back +to the original process, such as: + + process A -> idle -> process A + +In such scenario, we do not have to do IBPB here even though the process +is non-dumpable, as we are switching back to the same process after a +hiatus. + +To avoid the redundant IBPB, which is expensive, we track the last mm +user context ID. The cost is to have an extra u64 mm context id to track +the last mm we were using before switching to the init_mm used by idle. +Avoiding the extra IBPB is probably worth the extra memory for this +common scenario. + +For those cases where tlb_defer_switch_to_init_mm() returns true (non +PCID), lazy tlb will defer switch to init_mm, so we will not be changing +the mm for the process A -> idle -> process A switch. So IBPB will be +skipped for this case. + +Thanks to the reviewers and Andy Lutomirski for the suggestion of +using ctx_id which got rid of the problem of mm pointer recycling. + +Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: ak@linux.intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: linux@dominikbrodowski.net +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: luto@kernel.org +Cc: pbonzini@redhat.com +Link: https://lkml.kernel.org/r/1517263487-3708-1-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tlbflush.h | 2 ++ + arch/x86/mm/tlb.c | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 33 insertions(+) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 94146f6..99185a0 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void) + struct tlb_state { + struct mm_struct *active_mm; + int state; ++ /* last user mm's ctx id */ ++ u64 last_ctx_id; + + /* + * Access to this CR4 shadow and to H/W CR4 is protected by +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 146e842..b1bf41b 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -10,6 +10,7 @@ + + #include <asm/tlbflush.h> + #include <asm/mmu_context.h> ++#include <asm/nospec-branch.h> + #include <asm/cache.h> + #include <asm/apic.h> + #include <asm/uv/uv.h> +@@ -106,6 +107,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + unsigned cpu = smp_processor_id(); + + if (likely(prev != next)) { ++ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); ++ ++ /* ++ * Avoid user/user BTB poisoning by flushing the branch ++ * predictor when switching between processes. This stops ++ * one process from doing Spectre-v2 attacks on another. ++ * ++ * As an optimization, flush indirect branches only when ++ * switching into processes that disable dumping. This ++ * protects high value processes like gpg, without having ++ * too high performance overhead. IBPB is *expensive*! ++ * ++ * This will not flush branches when switching into kernel ++ * threads. It will also not flush if we switch to idle ++ * thread and back to the same process. It will flush if we ++ * switch to a different non-dumpable process. ++ */ ++ if (tsk && tsk->mm && ++ tsk->mm->context.ctx_id != last_ctx_id && ++ get_dumpable(tsk->mm) != SUID_DUMP_USER) ++ indirect_branch_prediction_barrier(); ++ + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + /* + * If our current stack is in vmalloc space and isn't +@@ -120,6 +143,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + set_pgd(pgd, init_mm.pgd[stack_pgd_index]); + } + ++ /* ++ * Record last user mm's context id, so we can avoid ++ * flushing branch buffer with IBPB if we switch back ++ * to the same user. ++ */ ++ if (next != &init_mm) ++ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); ++ + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); + this_cpu_write(cpu_tlbstate.active_mm, next); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-bpf-x64-implement-retpoline-for-tail-call.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-bpf-x64-implement-retpoline-for-tail-call.patch new file mode 100644 index 00000000..69809c28 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-bpf-x64-implement-retpoline-for-tail-call.patch @@ -0,0 +1,183 @@ +From 8dfc905d7d2e3c68f31eca0178b6137b2e1fc7f9 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Thu, 8 Mar 2018 16:17:34 +0100 +Subject: [PATCH 07/14] bpf, x64: implement retpoline for tail call + +[ upstream commit a493a87f38cfa48caaa95c9347be2d914c6fdf29 ] + +Implement a retpoline [0] for the BPF tail call JIT'ing that converts +the indirect jump via jmp %rax that is used to make the long jump into +another JITed BPF image. Since this is subject to speculative execution, +we need to control the transient instruction sequence here as well +when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop. +The latter aligns also with what gcc / clang emits (e.g. [1]). + +JIT dump after patch: + + # bpftool p d x i 1 + 0: (18) r2 = map[id:1] + 2: (b7) r3 = 0 + 3: (85) call bpf_tail_call#12 + 4: (b7) r0 = 2 + 5: (95) exit + +With CONFIG_RETPOLINE: + + # bpftool p d j i 1 + [...] + 33: cmp %edx,0x24(%rsi) + 36: jbe 0x0000000000000072 |* + 38: mov 0x24(%rbp),%eax + 3e: cmp $0x20,%eax + 41: ja 0x0000000000000072 | + 43: add $0x1,%eax + 46: mov %eax,0x24(%rbp) + 4c: mov 0x90(%rsi,%rdx,8),%rax + 54: test %rax,%rax + 57: je 0x0000000000000072 | + 59: mov 0x28(%rax),%rax + 5d: add $0x25,%rax + 61: callq 0x000000000000006d |+ + 66: pause | + 68: lfence | + 6b: jmp 0x0000000000000066 | + 6d: mov %rax,(%rsp) | + 71: retq | + 72: mov $0x2,%eax + [...] + + * relative fall-through jumps in error case + + retpoline for indirect jump + +Without CONFIG_RETPOLINE: + + # bpftool p d j i 1 + [...] + 33: cmp %edx,0x24(%rsi) + 36: jbe 0x0000000000000063 |* + 38: mov 0x24(%rbp),%eax + 3e: cmp $0x20,%eax + 41: ja 0x0000000000000063 | + 43: add $0x1,%eax + 46: mov %eax,0x24(%rbp) + 4c: mov 0x90(%rsi,%rdx,8),%rax + 54: test %rax,%rax + 57: je 0x0000000000000063 | + 59: mov 0x28(%rax),%rax + 5d: add $0x25,%rax + 61: jmpq *%rax |- + 63: mov $0x2,%eax + [...] + + * relative fall-through jumps in error case + - plain indirect jump as before + + [0] https://support.google.com/faqs/answer/7625886 + [1] https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b + +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 37 ++++++++++++++++++++++++++++++++++++ + arch/x86/net/bpf_jit_comp.c | 9 +++++---- + 2 files changed, 42 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 76b0585..81a1be3 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) + } + + #endif /* __ASSEMBLY__ */ ++ ++/* ++ * Below is used in the eBPF JIT compiler and emits the byte sequence ++ * for the following assembly: ++ * ++ * With retpolines configured: ++ * ++ * callq do_rop ++ * spec_trap: ++ * pause ++ * lfence ++ * jmp spec_trap ++ * do_rop: ++ * mov %rax,(%rsp) ++ * retq ++ * ++ * Without retpolines configured: ++ * ++ * jmp *%rax ++ */ ++#ifdef CONFIG_RETPOLINE ++# define RETPOLINE_RAX_BPF_JIT_SIZE 17 ++# define RETPOLINE_RAX_BPF_JIT() \ ++ EMIT1_off32(0xE8, 7); /* callq do_rop */ \ ++ /* spec_trap: */ \ ++ EMIT2(0xF3, 0x90); /* pause */ \ ++ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ ++ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ ++ /* do_rop: */ \ ++ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ ++ EMIT1(0xC3); /* retq */ ++#else ++# define RETPOLINE_RAX_BPF_JIT_SIZE 2 ++# define RETPOLINE_RAX_BPF_JIT() \ ++ EMIT2(0xFF, 0xE0); /* jmp *%rax */ ++#endif ++ + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 7840331..1f7ed2e 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -12,6 +12,7 @@ + #include <linux/filter.h> + #include <linux/if_vlan.h> + #include <asm/cacheflush.h> ++#include <asm/nospec-branch.h> + #include <linux/bpf.h> + + int bpf_jit_enable __read_mostly; +@@ -281,7 +282,7 @@ static void emit_bpf_tail_call(u8 **pprog) + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ + offsetof(struct bpf_array, map.max_entries)); +-#define OFFSET1 43 /* number of bytes to jump */ ++#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ + EMIT2(X86_JBE, OFFSET1); /* jbe out */ + label1 = cnt; + +@@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog) + */ + EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ +-#define OFFSET2 32 ++#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) + EMIT2(X86_JA, OFFSET2); /* ja out */ + label2 = cnt; + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ +@@ -304,7 +305,7 @@ static void emit_bpf_tail_call(u8 **pprog) + * goto out; + */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ +-#define OFFSET3 10 ++#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) + EMIT2(X86_JE, OFFSET3); /* je out */ + label3 = cnt; + +@@ -317,7 +318,7 @@ static void emit_bpf_tail_call(u8 **pprog) + * rdi == ctx (1st arg) + * rax == prog->bpf_func + prologue_size + */ +- EMIT2(0xFF, 0xE0); /* jmp rax */ ++ RETPOLINE_RAX_BPF_JIT(); + + /* out: */ + BUILD_BUG_ON(cnt - label1 != OFFSET1); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch new file mode 100644 index 00000000..c34c59d8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch @@ -0,0 +1,145 @@ +From 4b7cba0c9e6cd74dd190b1d730d07f3c682cb1c9 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 3 Sep 2017 18:57:03 -0700 +Subject: [PATCH 007/102] kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE + +Kaiser only needs to map one page of the stack; and +kernel/fork.c did not build on powerpc (no __PAGE_KERNEL). +It's all cleaner if linux/kaiser.h provides kaiser_map_thread_stack() +and kaiser_unmap_thread_stack() wrappers around asm/kaiser.h's +kaiser_add_mapping() and kaiser_remove_mapping(). And use +linux/kaiser.h in init/main.c to avoid the #ifdefs there. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/kaiser.h | 40 +++++++++++++++++++++++++++++++++------- + init/main.c | 6 +----- + kernel/fork.c | 7 ++----- + 3 files changed, 36 insertions(+), 17 deletions(-) + +diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h +index 9db5433..4a4d6d9 100644 +--- a/include/linux/kaiser.h ++++ b/include/linux/kaiser.h +@@ -1,26 +1,52 @@ +-#ifndef _INCLUDE_KAISER_H +-#define _INCLUDE_KAISER_H ++#ifndef _LINUX_KAISER_H ++#define _LINUX_KAISER_H + + #ifdef CONFIG_KAISER + #include <asm/kaiser.h> ++ ++static inline int kaiser_map_thread_stack(void *stack) ++{ ++ /* ++ * Map that page of kernel stack on which we enter from user context. ++ */ ++ return kaiser_add_mapping((unsigned long)stack + ++ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL); ++} ++ ++static inline void kaiser_unmap_thread_stack(void *stack) ++{ ++ /* ++ * Note: may be called even when kaiser_map_thread_stack() failed. ++ */ ++ kaiser_remove_mapping((unsigned long)stack + ++ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE); ++} + #else + + /* + * These stubs are used whenever CONFIG_KAISER is off, which +- * includes architectures that support KAISER, but have it +- * disabled. ++ * includes architectures that support KAISER, but have it disabled. + */ + + static inline void kaiser_init(void) + { + } +-static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) ++static inline int kaiser_add_mapping(unsigned long addr, ++ unsigned long size, unsigned long flags) ++{ ++ return 0; ++} ++static inline void kaiser_remove_mapping(unsigned long start, ++ unsigned long size) + { + } +-static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) ++static inline int kaiser_map_thread_stack(void *stack) + { + return 0; + } ++static inline void kaiser_unmap_thread_stack(void *stack) ++{ ++} + + #endif /* !CONFIG_KAISER */ +-#endif /* _INCLUDE_KAISER_H */ ++#endif /* _LINUX_KAISER_H */ +diff --git a/init/main.c b/init/main.c +index d2c8c23..eb47369 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -81,15 +81,13 @@ + #include <linux/integrity.h> + #include <linux/proc_ns.h> + #include <linux/io.h> ++#include <linux/kaiser.h> + + #include <asm/io.h> + #include <asm/bugs.h> + #include <asm/setup.h> + #include <asm/sections.h> + #include <asm/cacheflush.h> +-#ifdef CONFIG_KAISER +-#include <asm/kaiser.h> +-#endif + + static int kernel_init(void *); + +@@ -477,9 +475,7 @@ static void __init mm_init(void) + pgtable_init(); + vmalloc_init(); + ioremap_huge_init(); +-#ifdef CONFIG_KAISER + kaiser_init(); +-#endif + } + + asmlinkage __visible void __init start_kernel(void) +diff --git a/kernel/fork.c b/kernel/fork.c +index 8013f22..6b0e8bd 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -212,12 +212,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) + #endif + } + +-extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size); + static inline void free_thread_stack(struct task_struct *tsk) + { +-#ifdef CONFIG_KAISER +- kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE); +-#endif ++ kaiser_unmap_thread_stack(tsk->stack); + #ifdef CONFIG_VMAP_STACK + if (task_stack_vm_area(tsk)) { + unsigned long flags; +@@ -501,7 +498,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + */ + tsk->stack = stack; + +- err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); ++ err= kaiser_map_thread_stack(tsk->stack); + if (err) + goto free_stack; + #ifdef CONFIG_VMAP_STACK +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch new file mode 100644 index 00000000..be5712b6 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch @@ -0,0 +1,159 @@ +From dabd9b2a92eda21c93aeee9f7bf8f369fed15833 Mon Sep 17 00:00:00 2001 +From: Andi Kleen <ak@linux.intel.com> +Date: Thu, 25 Jan 2018 15:50:28 -0800 +Subject: [PATCH 07/42] module/retpoline: Warn about missing retpoline in + module + +(cherry picked from commit caf7501a1b4ec964190f31f9c3f163de252273b8) + +There's a risk that a kernel which has full retpoline mitigations becomes +vulnerable when a module gets loaded that hasn't been compiled with the +right compiler or the right option. + +To enable detection of that mismatch at module load time, add a module info +string "retpoline" at build time when the module was compiled with +retpoline support. This only covers compiled C source, but assembler source +or prebuilt object files are not checked. + +If a retpoline enabled kernel detects a non retpoline protected module at +load time, print a warning and report it in the sysfs vulnerability file. + +[ tglx: Massaged changelog ] + +Signed-off-by: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: gregkh@linuxfoundation.org +Cc: torvalds@linux-foundation.org +Cc: jeyu@kernel.org +Cc: arjan@linux.intel.com +Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- + include/linux/module.h | 9 +++++++++ + kernel/module.c | 11 +++++++++++ + scripts/mod/modpost.c | 9 +++++++++ + 4 files changed, 45 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 8cacf62..4cea7d4 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -10,6 +10,7 @@ + #include <linux/init.h> + #include <linux/utsname.h> + #include <linux/cpu.h> ++#include <linux/module.h> + + #include <asm/nospec-branch.h> + #include <asm/cmdline.h> +@@ -92,6 +93,19 @@ static const char *spectre_v2_strings[] = { + #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt + + static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; ++static bool spectre_v2_bad_module; ++ ++#ifdef RETPOLINE ++bool retpoline_module_ok(bool has_retpoline) ++{ ++ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) ++ return true; ++ ++ pr_err("System may be vunerable to spectre v2\n"); ++ spectre_v2_bad_module = true; ++ return false; ++} ++#endif + + static void __init spec2_print_if_insecure(const char *reason) + { +@@ -277,6 +291,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + +- return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); ++ return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], ++ spectre_v2_bad_module ? " - vulnerable module loaded" : ""); + } + #endif +diff --git a/include/linux/module.h b/include/linux/module.h +index 0c3207d..d2224a0 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, + static inline void module_bug_cleanup(struct module *mod) {} + #endif /* CONFIG_GENERIC_BUG */ + ++#ifdef RETPOLINE ++extern bool retpoline_module_ok(bool has_retpoline); ++#else ++static inline bool retpoline_module_ok(bool has_retpoline) ++{ ++ return true; ++} ++#endif ++ + #ifdef CONFIG_MODULE_SIG + static inline bool module_sig_ok(struct module *module) + { +diff --git a/kernel/module.c b/kernel/module.c +index 0e54d5b..07bfb99 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) + } + #endif /* CONFIG_LIVEPATCH */ + ++static void check_modinfo_retpoline(struct module *mod, struct load_info *info) ++{ ++ if (retpoline_module_ok(get_modinfo(info, "retpoline"))) ++ return; ++ ++ pr_warn("%s: loading module not compiled with retpoline compiler.\n", ++ mod->name); ++} ++ + /* Sets info->hdr and info->len. */ + static int copy_module_from_user(const void __user *umod, unsigned long len, + struct load_info *info) +@@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) + add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); + } + ++ check_modinfo_retpoline(mod, info); ++ + if (get_modinfo(info, "staging")) { + add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); + pr_warn("%s: module is from the staging directory, the quality " +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 325f1af..96a8047 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) + buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); + } + ++/* Cannot check for assembler */ ++static void add_retpoline(struct buffer *b) ++{ ++ buf_printf(b, "\n#ifdef RETPOLINE\n"); ++ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); ++ buf_printf(b, "#endif\n"); ++} ++ + static void add_staging_flag(struct buffer *b, const char *name) + { + static const char *staging_dir = "drivers/staging"; +@@ -2474,6 +2482,7 @@ int main(int argc, char **argv) + + add_header(&buf, mod); + add_intree_flag(&buf, !external_module); ++ add_retpoline(&buf); + add_staging_flag(&buf, mod->name); + err |= add_versions(&buf, mod); + add_depends(&buf, mod, modules); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Add-asm-msr-index.h-dependency.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Add-asm-msr-index.h-dependency.patch new file mode 100644 index 00000000..abf0b6ba --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Add-asm-msr-index.h-dependency.patch @@ -0,0 +1,50 @@ +From ae5dca4c2f9a62ec120a32663609b3dabfeb8ae4 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 13 Feb 2018 14:28:19 +0100 +Subject: [PATCH 07/12] x86/speculation: Add <asm/msr-index.h> dependency + +commit ea00f301285ea2f07393678cd2b6057878320c9d upstream. + +Joe Konno reported a compile failure resulting from using an MSR +without inclusion of <asm/msr-index.h>, and while the current code builds +fine (by accident) this needs fixing for future patches. + +Reported-by: Joe Konno <joe.konno@linux.intel.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: arjan@linux.intel.com +Cc: bp@alien8.de +Cc: dan.j.williams@intel.com +Cc: dave.hansen@linux.intel.com +Cc: dwmw2@infradead.org +Cc: dwmw@amazon.co.uk +Cc: gregkh@linuxfoundation.org +Cc: hpa@zytor.com +Cc: jpoimboe@redhat.com +Cc: linux-tip-commits@vger.kernel.org +Cc: luto@kernel.org +Fixes: 20ffa1caecca ("x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support") +Link: http://lkml.kernel.org/r/20180213132819.GJ25201@hirez.programming.kicks-ass.net +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 300cc15..76b0585 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -6,6 +6,7 @@ + #include <asm/alternative.h> + #include <asm/alternative-asm.h> + #include <asm/cpufeatures.h> ++#include <asm/msr-index.h> + + #ifdef __ASSEMBLY__ + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch new file mode 100644 index 00000000..6daaa525 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch @@ -0,0 +1,55 @@ +From fa16ed9c57a88f92dea098848d07f5ffd224a14d Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 3 Sep 2017 17:09:44 -0700 +Subject: [PATCH 008/102] kaiser: fix build and FIXME in alloc_ldt_struct() + +Include linux/kaiser.h instead of asm/kaiser.h to build ldt.c without +CONFIG_KAISER. kaiser_add_mapping() does already return an error code, +so fix the FIXME. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/ldt.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c +index 3c2d55b..8331bad 100644 +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -15,9 +15,9 @@ + #include <linux/slab.h> + #include <linux/vmalloc.h> + #include <linux/uaccess.h> ++#include <linux/kaiser.h> + + #include <asm/ldt.h> +-#include <asm/kaiser.h> + #include <asm/desc.h> + #include <asm/mmu_context.h> + #include <asm/syscalls.h> +@@ -48,7 +48,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) + { + struct ldt_struct *new_ldt; + int alloc_size; +- int ret = 0; ++ int ret; + + if (size > LDT_ENTRIES) + return NULL; +@@ -76,10 +76,8 @@ static struct ldt_struct *alloc_ldt_struct(int size) + return NULL; + } + +- // FIXME: make kaiser_add_mapping() return an error code +- // when it fails +- kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, +- __PAGE_KERNEL); ++ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, ++ __PAGE_KERNEL); + if (ret) { + __free_ldt_struct(new_ldt); + return NULL; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch new file mode 100644 index 00000000..5dc0b927 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch @@ -0,0 +1,760 @@ +From 4ac936f6e6b191d2eac4083da651826a8bb7b03b Mon Sep 17 00:00:00 2001 +From: Jia Zhang <qianyue.zj@alibaba-inc.com> +Date: Mon, 1 Jan 2018 09:52:10 +0800 +Subject: [PATCH 08/12] x86/cpu: Rename cpu_data.x86_mask to + cpu_data.x86_stepping + +commit b399151cb48db30ad1e0e93dd40d68c6d007b637 upstream. + +x86_mask is a confusing name which is hard to associate with the +processor's stepping. + +Additionally, correct an indent issue in lib/cpu.c. + +Signed-off-by: Jia Zhang <qianyue.zj@alibaba-inc.com> +[ Updated it to more recent kernels. ] +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: bp@alien8.de +Cc: tony.luck@intel.com +Link: http://lkml.kernel.org/r/1514771530-70829-1-git-send-email-qianyue.zj@alibaba-inc.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/events/intel/core.c | 2 +- + arch/x86/events/intel/lbr.c | 2 +- + arch/x86/events/intel/p6.c | 2 +- + arch/x86/include/asm/acpi.h | 2 +- + arch/x86/include/asm/processor.h | 2 +- + arch/x86/kernel/amd_nb.c | 2 +- + arch/x86/kernel/asm-offsets_32.c | 2 +- + arch/x86/kernel/cpu/amd.c | 26 +++++++++++++------------- + arch/x86/kernel/cpu/centaur.c | 4 ++-- + arch/x86/kernel/cpu/common.c | 8 ++++---- + arch/x86/kernel/cpu/cyrix.c | 2 +- + arch/x86/kernel/cpu/intel.c | 18 +++++++++--------- + arch/x86/kernel/cpu/microcode/intel.c | 2 +- + arch/x86/kernel/cpu/mtrr/generic.c | 2 +- + arch/x86/kernel/cpu/mtrr/main.c | 4 ++-- + arch/x86/kernel/cpu/proc.c | 4 ++-- + arch/x86/kernel/head_32.S | 4 ++-- + arch/x86/kernel/mpparse.c | 2 +- + arch/x86/lib/cpu.c | 2 +- + drivers/char/hw_random/via-rng.c | 2 +- + drivers/cpufreq/acpi-cpufreq.c | 2 +- + drivers/cpufreq/longhaul.c | 6 +++--- + drivers/cpufreq/p4-clockmod.c | 2 +- + drivers/cpufreq/powernow-k7.c | 2 +- + drivers/cpufreq/speedstep-centrino.c | 4 ++-- + drivers/cpufreq/speedstep-lib.c | 6 +++--- + drivers/crypto/padlock-aes.c | 2 +- + drivers/edac/amd64_edac.c | 2 +- + drivers/edac/mce_amd.c | 2 +- + drivers/hwmon/coretemp.c | 6 +++--- + drivers/hwmon/hwmon-vid.c | 2 +- + drivers/hwmon/k10temp.c | 2 +- + drivers/hwmon/k8temp.c | 2 +- + drivers/video/fbdev/geode/video_gx.c | 2 +- + 34 files changed, 68 insertions(+), 68 deletions(-) + +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c +index cb85222..6b251fcc 100644 +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -3360,7 +3360,7 @@ static int intel_snb_pebs_broken(int cpu) + break; + + case INTEL_FAM6_SANDYBRIDGE_X: +- switch (cpu_data(cpu).x86_mask) { ++ switch (cpu_data(cpu).x86_stepping) { + case 6: rev = 0x618; break; + case 7: rev = 0x70c; break; + } +diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c +index 81b321a..34ba350 100644 +--- a/arch/x86/events/intel/lbr.c ++++ b/arch/x86/events/intel/lbr.c +@@ -1128,7 +1128,7 @@ void __init intel_pmu_lbr_init_atom(void) + * on PMU interrupt + */ + if (boot_cpu_data.x86_model == 28 +- && boot_cpu_data.x86_mask < 10) { ++ && boot_cpu_data.x86_stepping < 10) { + pr_cont("LBR disabled due to erratum"); + return; + } +diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c +index 1f5c47a..c5e441b 100644 +--- a/arch/x86/events/intel/p6.c ++++ b/arch/x86/events/intel/p6.c +@@ -233,7 +233,7 @@ static __initconst const struct x86_pmu p6_pmu = { + + static __init void p6_pmu_rdpmc_quirk(void) + { +- if (boot_cpu_data.x86_mask < 9) { ++ if (boot_cpu_data.x86_stepping < 9) { + /* + * PPro erratum 26; fixed in stepping 9 and above. + */ +diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h +index 5391b0a..d32bab6 100644 +--- a/arch/x86/include/asm/acpi.h ++++ b/arch/x86/include/asm/acpi.h +@@ -92,7 +92,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) + if (boot_cpu_data.x86 == 0x0F && + boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86_model <= 0x05 && +- boot_cpu_data.x86_mask < 0x0A) ++ boot_cpu_data.x86_stepping < 0x0A) + return 1; + else if (amd_e400_c1e_detected) + return 1; +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index a781668..df29212 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -88,7 +88,7 @@ struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; +- __u8 x86_mask; ++ __u8 x86_stepping; + #ifdef CONFIG_X86_32 + char wp_works_ok; /* It doesn't on 386's */ + +diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c +index 458da85..8fe41c6 100644 +--- a/arch/x86/kernel/amd_nb.c ++++ b/arch/x86/kernel/amd_nb.c +@@ -231,7 +231,7 @@ int amd_cache_northbridges(void) + if (boot_cpu_data.x86 == 0x10 && + boot_cpu_data.x86_model >= 0x8 && + (boot_cpu_data.x86_model > 0x9 || +- boot_cpu_data.x86_mask >= 0x1)) ++ boot_cpu_data.x86_stepping >= 0x1)) + amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; + + if (boot_cpu_data.x86 == 0x15) +diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c +index 880aa09..36ebb6d 100644 +--- a/arch/x86/kernel/asm-offsets_32.c ++++ b/arch/x86/kernel/asm-offsets_32.c +@@ -20,7 +20,7 @@ void foo(void) + OFFSET(CPUINFO_x86, cpuinfo_x86, x86); + OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); + OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); +- OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); ++ OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping); + OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); + OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); + OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 1b89f0c..c375bc6 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -118,7 +118,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) + return; + } + +- if (c->x86_model == 6 && c->x86_mask == 1) { ++ if (c->x86_model == 6 && c->x86_stepping == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); +@@ -147,7 +147,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || +- (c->x86_model == 8 && c->x86_mask < 8)) { ++ (c->x86_model == 8 && c->x86_stepping < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; +@@ -166,7 +166,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) + return; + } + +- if ((c->x86_model == 8 && c->x86_mask > 7) || ++ if ((c->x86_model == 8 && c->x86_stepping > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + +@@ -219,7 +219,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ +- if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { ++ if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", +@@ -239,12 +239,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c) + * but they are not certified as MP capable. + */ + /* Athlon 660/661 is valid. */ +- if ((c->x86_model == 6) && ((c->x86_mask == 0) || +- (c->x86_mask == 1))) ++ if ((c->x86_model == 6) && ((c->x86_stepping == 0) || ++ (c->x86_stepping == 1))) + return; + + /* Duron 670 is valid */ +- if ((c->x86_model == 7) && (c->x86_mask == 0)) ++ if ((c->x86_model == 7) && (c->x86_stepping == 0)) + return; + + /* +@@ -254,8 +254,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for + * more. + */ +- if (((c->x86_model == 6) && (c->x86_mask >= 2)) || +- ((c->x86_model == 7) && (c->x86_mask >= 1)) || ++ if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || ++ ((c->x86_model == 7) && (c->x86_stepping >= 1)) || + (c->x86_model > 7)) + if (cpu_has(c, X86_FEATURE_MP)) + return; +@@ -569,7 +569,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) + /* Set MTRR capability flag if appropriate */ + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || +- (c->x86_model == 8 && c->x86_mask >= 8)) ++ (c->x86_model == 8 && c->x86_stepping >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); + #endif + #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) +@@ -834,11 +834,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + /* Duron Rev A0 */ +- if (c->x86_model == 3 && c->x86_mask == 0) ++ if (c->x86_model == 3 && c->x86_stepping == 0) + size = 64; + /* Tbird rev A1/A2 */ + if (c->x86_model == 4 && +- (c->x86_mask == 0 || c->x86_mask == 1)) ++ (c->x86_stepping == 0 || c->x86_stepping == 1)) + size = 256; + } + return size; +@@ -975,7 +975,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) + } + + /* OSVW unavailable or ID unknown, match family-model-stepping range */ +- ms = (cpu->x86_model << 4) | cpu->x86_mask; ++ ms = (cpu->x86_model << 4) | cpu->x86_stepping; + while ((range = *erratum++)) + if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && + (ms >= AMD_MODEL_RANGE_START(range)) && +diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c +index 1661d8e..4d2f61f 100644 +--- a/arch/x86/kernel/cpu/centaur.c ++++ b/arch/x86/kernel/cpu/centaur.c +@@ -134,7 +134,7 @@ static void init_centaur(struct cpuinfo_x86 *c) + clear_cpu_cap(c, X86_FEATURE_TSC); + break; + case 8: +- switch (c->x86_mask) { ++ switch (c->x86_stepping) { + default: + name = "2"; + break; +@@ -209,7 +209,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) + * - Note, it seems this may only be in engineering samples. + */ + if ((c->x86 == 6) && (c->x86_model == 9) && +- (c->x86_mask == 1) && (size == 65)) ++ (c->x86_stepping == 1) && (size == 65)) + size -= 1; + return size; + } +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 08e89ed..96b2c83 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -699,7 +699,7 @@ void cpu_detect(struct cpuinfo_x86 *c) + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); + c->x86 = x86_family(tfms); + c->x86_model = x86_model(tfms); +- c->x86_mask = x86_stepping(tfms); ++ c->x86_stepping = x86_stepping(tfms); + + if (cap0 & (1<<19)) { + c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; +@@ -1146,7 +1146,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; +- c->x86_model = c->x86_mask = 0; /* So far unknown... */ ++ c->x86_model = c->x86_stepping = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_max_cores = 1; +@@ -1391,8 +1391,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) + + pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); + +- if (c->x86_mask || c->cpuid_level >= 0) +- pr_cont(", stepping: 0x%x)\n", c->x86_mask); ++ if (c->x86_stepping || c->cpuid_level >= 0) ++ pr_cont(", stepping: 0x%x)\n", c->x86_stepping); + else + pr_cont(")\n"); + +diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c +index bd9dcd6..455d8ad 100644 +--- a/arch/x86/kernel/cpu/cyrix.c ++++ b/arch/x86/kernel/cpu/cyrix.c +@@ -212,7 +212,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) + + /* common case step number/rev -- exceptions handled below */ + c->x86_model = (dir1 >> 4) + 1; +- c->x86_mask = dir1 & 0xf; ++ c->x86_stepping = dir1 & 0xf; + + /* Now cook; the original recipe is by Channing Corn, from Cyrix. + * We do the same thing for each generation: we work out +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 02cb2e3..6ed206b 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -105,7 +105,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) + + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && +- c->x86_mask == spectre_bad_microcodes[i].stepping) ++ c->x86_stepping == spectre_bad_microcodes[i].stepping) + return (c->microcode <= spectre_bad_microcodes[i].microcode); + } + return false; +@@ -158,7 +158,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) + * need the microcode to have already been loaded... so if it is + * not, recommend a BIOS update and disable large pages. + */ +- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && ++ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && + c->microcode < 0x20e) { + pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); + clear_cpu_cap(c, X86_FEATURE_PSE); +@@ -174,7 +174,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) + + /* CPUID workaround for 0F33/0F34 CPU */ + if (c->x86 == 0xF && c->x86_model == 0x3 +- && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) ++ && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) + c->x86_phys_bits = 36; + + /* +@@ -289,7 +289,7 @@ int ppro_with_ram_bug(void) + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && +- boot_cpu_data.x86_mask < 8) { ++ boot_cpu_data.x86_stepping < 8) { + pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + return 1; + } +@@ -306,7 +306,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c) + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86 == 5 && +- c->x86_mask >= 1 && c->x86_mask <= 4 && ++ c->x86_stepping >= 1 && c->x86_stepping <= 4 && + c->x86_model <= 3) { + /* + * Remember we have B step Pentia with bugs +@@ -349,7 +349,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) + * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until + * model 3 mask 3 + */ +- if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) ++ if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* +@@ -367,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) + * P4 Xeon erratum 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +- if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { ++ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { + if (msr_set_bit(MSR_IA32_MISC_ENABLE, + MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { + pr_info("CPU: C0 stepping P4 Xeon detected.\n"); +@@ -382,7 +382,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) + * Specification Update"). + */ + if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && +- (c->x86_mask < 0x6 || c->x86_mask == 0xb)) ++ (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) + set_cpu_bug(c, X86_BUG_11AP); + + +@@ -601,7 +601,7 @@ static void init_intel(struct cpuinfo_x86 *c) + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; +- else if (c->x86_mask == 0 || c->x86_mask == 5) ++ else if (c->x86_stepping == 0 || c->x86_stepping == 5) + p = "Celeron-A"; + break; + +diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c +index cdc0dea..5d346c0 100644 +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -1055,7 +1055,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, + enum ucode_state ret; + + sprintf(name, "intel-ucode/%02x-%02x-%02x", +- c->x86, c->x86_model, c->x86_mask); ++ c->x86, c->x86_model, c->x86_stepping); + + if (request_firmware_direct(&firmware, name, device)) { + pr_debug("data file %s load failed\n", name); +diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c +index fdc5521..e12ee86 100644 +--- a/arch/x86/kernel/cpu/mtrr/generic.c ++++ b/arch/x86/kernel/cpu/mtrr/generic.c +@@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, + */ + if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && +- boot_cpu_data.x86_mask <= 7) { ++ boot_cpu_data.x86_stepping <= 7) { + if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { + pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + return -EINVAL; +diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c +index 24e87e7..fae740c 100644 +--- a/arch/x86/kernel/cpu/mtrr/main.c ++++ b/arch/x86/kernel/cpu/mtrr/main.c +@@ -699,8 +699,8 @@ void __init mtrr_bp_init(void) + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && +- (boot_cpu_data.x86_mask == 0x3 || +- boot_cpu_data.x86_mask == 0x4)) ++ (boot_cpu_data.x86_stepping == 0x3 || ++ boot_cpu_data.x86_stepping == 0x4)) + phys_addr = 36; + + size_or_mask = SIZE_OR_MASK_BITS(phys_addr); +diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c +index 18ca99f..9e817f2 100644 +--- a/arch/x86/kernel/cpu/proc.c ++++ b/arch/x86/kernel/cpu/proc.c +@@ -70,8 +70,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + +- if (c->x86_mask || c->cpuid_level >= 0) +- seq_printf(m, "stepping\t: %d\n", c->x86_mask); ++ if (c->x86_stepping || c->cpuid_level >= 0) ++ seq_printf(m, "stepping\t: %d\n", c->x86_stepping); + else + seq_puts(m, "stepping\t: unknown\n"); + if (c->microcode) +diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S +index 2dabea4..82155d0 100644 +--- a/arch/x86/kernel/head_32.S ++++ b/arch/x86/kernel/head_32.S +@@ -35,7 +35,7 @@ + #define X86 new_cpu_data+CPUINFO_x86 + #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor + #define X86_MODEL new_cpu_data+CPUINFO_x86_model +-#define X86_MASK new_cpu_data+CPUINFO_x86_mask ++#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping + #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math + #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level + #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability +@@ -441,7 +441,7 @@ enable_paging: + shrb $4,%al + movb %al,X86_MODEL + andb $0x0f,%cl # mask mask revision +- movb %cl,X86_MASK ++ movb %cl,X86_STEPPING + movl %edx,X86_CAPABILITY + + is486: +diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c +index 0f8d204..d0fb941 100644 +--- a/arch/x86/kernel/mpparse.c ++++ b/arch/x86/kernel/mpparse.c +@@ -406,7 +406,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) + processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.cpuflag = CPU_ENABLED; + processor.cpufeature = (boot_cpu_data.x86 << 8) | +- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; ++ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping; + processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; + processor.reserved[0] = 0; + processor.reserved[1] = 0; +diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c +index d6f848d..2dd1fe13 100644 +--- a/arch/x86/lib/cpu.c ++++ b/arch/x86/lib/cpu.c +@@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig) + { + unsigned int fam, model; + +- fam = x86_family(sig); ++ fam = x86_family(sig); + + model = (sig >> 4) & 0xf; + +diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c +index 44ce806..e278125 100644 +--- a/drivers/char/hw_random/via-rng.c ++++ b/drivers/char/hw_random/via-rng.c +@@ -166,7 +166,7 @@ static int via_rng_init(struct hwrng *rng) + /* Enable secondary noise source on CPUs where it is present. */ + + /* Nehemiah stepping 8 and higher */ +- if ((c->x86_model == 9) && (c->x86_mask > 7)) ++ if ((c->x86_model == 9) && (c->x86_stepping > 7)) + lo |= VIA_NOISESRC2; + + /* Esther */ +diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c +index 297e912..1ee3674 100644 +--- a/drivers/cpufreq/acpi-cpufreq.c ++++ b/drivers/cpufreq/acpi-cpufreq.c +@@ -648,7 +648,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) + if (c->x86_vendor == X86_VENDOR_INTEL) { + if ((c->x86 == 15) && + (c->x86_model == 6) && +- (c->x86_mask == 8)) { ++ (c->x86_stepping == 8)) { + pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n"); + return -ENODEV; + } +diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c +index c46a12d..d5e27bc 100644 +--- a/drivers/cpufreq/longhaul.c ++++ b/drivers/cpufreq/longhaul.c +@@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) + break; + + case 7: +- switch (c->x86_mask) { ++ switch (c->x86_stepping) { + case 0: + longhaul_version = TYPE_LONGHAUL_V1; + cpu_model = CPU_SAMUEL2; +@@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) + break; + case 1 ... 15: + longhaul_version = TYPE_LONGHAUL_V2; +- if (c->x86_mask < 8) { ++ if (c->x86_stepping < 8) { + cpu_model = CPU_SAMUEL2; + cpuname = "C3 'Samuel 2' [C5B]"; + } else { +@@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) + numscales = 32; + memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); + memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); +- switch (c->x86_mask) { ++ switch (c->x86_stepping) { + case 0 ... 1: + cpu_model = CPU_NEHEMIAH; + cpuname = "C3 'Nehemiah A' [C5XLOE]"; +diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c +index fd77812..a25741b 100644 +--- a/drivers/cpufreq/p4-clockmod.c ++++ b/drivers/cpufreq/p4-clockmod.c +@@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) + #endif + + /* Errata workaround */ +- cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; ++ cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping; + switch (cpuid) { + case 0x0f07: + case 0x0f0a: +diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c +index 9f013ed..ef276f6 100644 +--- a/drivers/cpufreq/powernow-k7.c ++++ b/drivers/cpufreq/powernow-k7.c +@@ -131,7 +131,7 @@ static int check_powernow(void) + return 0; + } + +- if ((c->x86_model == 6) && (c->x86_mask == 0)) { ++ if ((c->x86_model == 6) && (c->x86_stepping == 0)) { + pr_info("K7 660[A0] core detected, enabling errata workarounds\n"); + have_a0 = 1; + } +diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c +index 41bc539..4fa5adf 100644 +--- a/drivers/cpufreq/speedstep-centrino.c ++++ b/drivers/cpufreq/speedstep-centrino.c +@@ -37,7 +37,7 @@ struct cpu_id + { + __u8 x86; /* CPU family */ + __u8 x86_model; /* model */ +- __u8 x86_mask; /* stepping */ ++ __u8 x86_stepping; /* stepping */ + }; + + enum { +@@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, + { + if ((c->x86 == x->x86) && + (c->x86_model == x->x86_model) && +- (c->x86_mask == x->x86_mask)) ++ (c->x86_stepping == x->x86_stepping)) + return 1; + return 0; + } +diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c +index 1b80621..ade98a2 100644 +--- a/drivers/cpufreq/speedstep-lib.c ++++ b/drivers/cpufreq/speedstep-lib.c +@@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void) + ebx = cpuid_ebx(0x00000001); + ebx &= 0x000000FF; + +- pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); ++ pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping); + +- switch (c->x86_mask) { ++ switch (c->x86_stepping) { + case 4: + /* + * B-stepping [M-P4-M] +@@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void) + msr_lo, msr_hi); + if ((msr_hi & (1<<18)) && + (relaxed_check ? 1 : (msr_hi & (3<<24)))) { +- if (c->x86_mask == 0x01) { ++ if (c->x86_stepping == 0x01) { + pr_debug("early PIII version\n"); + return SPEEDSTEP_CPU_PIII_C_EARLY; + } else +diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c +index 441e86b..9126627 100644 +--- a/drivers/crypto/padlock-aes.c ++++ b/drivers/crypto/padlock-aes.c +@@ -531,7 +531,7 @@ static int __init padlock_init(void) + + printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); + +- if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { ++ if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) { + ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; + cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; + printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); +diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c +index 82dab16..3cb3e8b 100644 +--- a/drivers/edac/amd64_edac.c ++++ b/drivers/edac/amd64_edac.c +@@ -3150,7 +3150,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) + struct amd64_family_type *fam_type = NULL; + + pvt->ext_model = boot_cpu_data.x86_model >> 4; +- pvt->stepping = boot_cpu_data.x86_mask; ++ pvt->stepping = boot_cpu_data.x86_stepping; + pvt->model = boot_cpu_data.x86_model; + pvt->fam = boot_cpu_data.x86; + +diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c +index 3af92fc..3d5436f 100644 +--- a/drivers/edac/mce_amd.c ++++ b/drivers/edac/mce_amd.c +@@ -949,7 +949,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) + + pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", + m->extcpu, +- c->x86, c->x86_model, c->x86_mask, ++ c->x86, c->x86_model, c->x86_stepping, + m->bank, + ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), + ((m->status & MCI_STATUS_UC) ? "UE" : +diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c +index 6a27eb2..be1e380 100644 +--- a/drivers/hwmon/coretemp.c ++++ b/drivers/hwmon/coretemp.c +@@ -269,13 +269,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) + for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { + const struct tjmax_model *tm = &tjmax_model_table[i]; + if (c->x86_model == tm->model && +- (tm->mask == ANY || c->x86_mask == tm->mask)) ++ (tm->mask == ANY || c->x86_stepping == tm->mask)) + return tm->tjmax; + } + + /* Early chips have no MSR for TjMax */ + +- if (c->x86_model == 0xf && c->x86_mask < 4) ++ if (c->x86_model == 0xf && c->x86_stepping < 4) + usemsr_ee = 0; + + if (c->x86_model > 0xe && usemsr_ee) { +@@ -426,7 +426,7 @@ static int chk_ucode_version(unsigned int cpu) + * Readings might stop update when processor visited too deep sleep, + * fixed for stepping D0 (6EC). + */ +- if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) { ++ if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { + pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); + return -ENODEV; + } +diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c +index ef91b8a..84e9128 100644 +--- a/drivers/hwmon/hwmon-vid.c ++++ b/drivers/hwmon/hwmon-vid.c +@@ -293,7 +293,7 @@ u8 vid_which_vrm(void) + if (c->x86 < 6) /* Any CPU with family lower than 6 */ + return 0; /* doesn't have VID */ + +- vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor); ++ vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor); + if (vrm_ret == 134) + vrm_ret = get_via_model_d_vrm(); + if (vrm_ret == 0) +diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c +index 9cdfde6..0124584 100644 +--- a/drivers/hwmon/k10temp.c ++++ b/drivers/hwmon/k10temp.c +@@ -179,7 +179,7 @@ static bool has_erratum_319(struct pci_dev *pdev) + * and AM3 formats, but that's the best we can do. + */ + return boot_cpu_data.x86_model < 4 || +- (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2); ++ (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2); + } + + static int k10temp_probe(struct pci_dev *pdev, +diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c +index 734d55d..4865027 100644 +--- a/drivers/hwmon/k8temp.c ++++ b/drivers/hwmon/k8temp.c +@@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev, + return -ENOMEM; + + model = boot_cpu_data.x86_model; +- stepping = boot_cpu_data.x86_mask; ++ stepping = boot_cpu_data.x86_stepping; + + /* feature available since SH-C0, exclude older revisions */ + if ((model == 4 && stepping == 0) || +diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c +index 6082f65..67773e8 100644 +--- a/drivers/video/fbdev/geode/video_gx.c ++++ b/drivers/video/fbdev/geode/video_gx.c +@@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info) + int timeout = 1000; + + /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ +- if (cpu_data(0).x86_mask == 1) { ++ if (cpu_data(0).x86_stepping == 1) { + pll_table = gx_pll_table_14MHz; + pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); + } else { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch new file mode 100644 index 00000000..147b2675 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch @@ -0,0 +1,162 @@ +From e187253b583696b67f207047bab1360cabd461c8 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 25 Jan 2018 16:14:09 +0000 +Subject: [PATCH 08/42] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf + +(cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f) + +This is a pure feature bits leaf. There are two AVX512 feature bits in it +already which were handled as scattered bits, and three more from this leaf +are going to be added for speculation control features. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: ak@linux.intel.com +Cc: ashok.raj@intel.com +Cc: dave.hansen@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeature.h | 7 +++++-- + arch/x86/include/asm/cpufeatures.h | 10 ++++++---- + arch/x86/include/asm/disabled-features.h | 3 ++- + arch/x86/include/asm/required-features.h | 3 ++- + arch/x86/kernel/cpu/common.c | 1 + + arch/x86/kernel/cpu/scattered.c | 2 -- + 6 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +index 9ea67a0..8c10157 100644 +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -28,6 +28,7 @@ enum cpuid_leafs + CPUID_8000_000A_EDX, + CPUID_7_ECX, + CPUID_8000_0007_EBX, ++ CPUID_7_EDX, + }; + + #ifdef CONFIG_X86_FEATURE_NAMES +@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ ++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ + REQUIRED_MASK_CHECK || \ +- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) ++ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + + #define DISABLED_MASK_BIT_SET(feature_bit) \ + ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ +@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ ++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ + DISABLED_MASK_CHECK || \ +- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) ++ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + + #define cpu_has(c, bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 8537a21..9d4a422 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -12,7 +12,7 @@ + /* + * Defines x86 CPU feature bits + */ +-#define NCAPINTS 18 /* N 32-bit words worth of info */ ++#define NCAPINTS 19 /* N 32-bit words worth of info */ + #define NBUGINTS 1 /* N 32-bit bug flags */ + + /* +@@ -197,9 +197,7 @@ + #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ + +-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ ++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ + + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ + #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ +@@ -295,6 +293,10 @@ + #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ + #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ + ++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ ++#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ ++#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ ++ + /* + * BUG word(s) + */ +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h +index 85599ad..8b45e08 100644 +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -57,6 +57,7 @@ + #define DISABLED_MASK15 0 + #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) + #define DISABLED_MASK17 0 +-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) ++#define DISABLED_MASK18 0 ++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) + + #endif /* _ASM_X86_DISABLED_FEATURES_H */ +diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h +index fac9a5c..6847d85 100644 +--- a/arch/x86/include/asm/required-features.h ++++ b/arch/x86/include/asm/required-features.h +@@ -100,6 +100,7 @@ + #define REQUIRED_MASK15 0 + #define REQUIRED_MASK16 0 + #define REQUIRED_MASK17 0 +-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) ++#define REQUIRED_MASK18 0 ++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) + + #endif /* _ASM_X86_REQUIRED_FEATURES_H */ +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index d198ae0..4267273 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -737,6 +737,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) + cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); + c->x86_capability[CPUID_7_0_EBX] = ebx; + c->x86_capability[CPUID_7_ECX] = ecx; ++ c->x86_capability[CPUID_7_EDX] = edx; + } + + /* Extended state features: level 0x0000000d */ +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index b0dd9ae..afbb525 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) + const struct cpuid_bit *cb; + + static const struct cpuid_bit cpuid_bits[] = { +- { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, +- { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, + { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch new file mode 100644 index 00000000..0f35decd --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch @@ -0,0 +1,60 @@ +From 03a686fb1ba599b2ed6b0bb256fa364f629ed2c7 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Mon, 26 Feb 2018 09:35:01 -0500 +Subject: [PATCH 08/14] x86/spectre_v2: Don't check microcode versions when + running under hypervisors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 36268223c1e9981d6cfc33aff8520b3bde4b8114 upstream. + +As: + + 1) It's known that hypervisors lie about the environment anyhow (host + mismatch) + + 2) Even if the hypervisor (Xen, KVM, VMWare, etc) provided a valid + "correct" value, it all gets to be very murky when migration happens + (do you provide the "new" microcode of the machine?). + +And in reality the cloud vendors are the ones that should make sure that +the microcode that is running is correct and we should just sing lalalala +and trust them. + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> +Cc: Wanpeng Li <kernellwp@gmail.com> +Cc: kvm <kvm@vger.kernel.org> +Cc: Krčmář <rkrcmar@redhat.com> +Cc: Borislav Petkov <bp@alien8.de> +CC: "H. Peter Anvin" <hpa@zytor.com> +CC: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20180226213019.GE9497@char.us.oracle.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/intel.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 6ed206b..7680425 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -103,6 +103,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) + { + int i; + ++ /* ++ * We know that the hypervisor lie to us on the microcode version so ++ * we may as well hope that it is running the correct version. ++ */ ++ if (cpu_has(c, X86_FEATURE_HYPERVISOR)) ++ return false; ++ + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && + c->x86_stepping == spectre_bad_microcodes[i].stepping) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch new file mode 100644 index 00000000..19dfa3a4 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch @@ -0,0 +1,263 @@ +From d901d344ca4172a49bab9852e993e5a2c47a7fde Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Mon, 19 Feb 2018 10:50:56 +0000 +Subject: [PATCH 09/14] Revert "x86/retpoline: Simplify vmexit_fill_RSB()" + +commit d1c99108af3c5992640aa2afa7d2e88c3775c06e upstream. + +This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting +the RSB filling out of line and calling it, we waste one RSB slot for +returning from the function itself, which means one fewer actual function +call we can make if we're doing the Skylake abomination of call-depth +counting. + +It also changed the number of RSB stuffings we do on vmexit from 32, +which was correct, to 16. Let's just stop with the bikeshedding; it +didn't actually *fix* anything anyway. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: arjan.van.de.ven@intel.com +Cc: bp@alien8.de +Cc: dave.hansen@intel.com +Cc: jmattson@google.com +Cc: karahmed@amazon.de +Cc: kvm@vger.kernel.org +Cc: pbonzini@redhat.com +Cc: rkrcmar@redhat.com +Link: http://lkml.kernel.org/r/1519037457-7643-4-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_32.S | 3 +- + arch/x86/entry/entry_64.S | 3 +- + arch/x86/include/asm/asm-prototypes.h | 3 -- + arch/x86/include/asm/nospec-branch.h | 70 +++++++++++++++++++++++++++++++---- + arch/x86/lib/Makefile | 1 - + arch/x86/lib/retpoline.S | 56 ---------------------------- + 6 files changed, 65 insertions(+), 71 deletions(-) + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index f5434b4..a76dc73 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -237,8 +237,7 @@ ENTRY(__switch_to_asm) + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ +- /* Clobbers %ebx */ +- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW ++ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + #endif + + /* restore callee-saved registers */ +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index e9120d4..caf79e3 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -331,8 +331,7 @@ ENTRY(__switch_to_asm) + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ +- /* Clobbers %rbx */ +- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW ++ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + #endif + + /* restore callee-saved registers */ +diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h +index 1666542..5a25ada 100644 +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -37,7 +37,4 @@ INDIRECT_THUNK(dx) + INDIRECT_THUNK(si) + INDIRECT_THUNK(di) + INDIRECT_THUNK(bp) +-asmlinkage void __fill_rsb(void); +-asmlinkage void __clear_rsb(void); +- + #endif /* CONFIG_RETPOLINE */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 81a1be3..dace2de 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -8,6 +8,50 @@ + #include <asm/cpufeatures.h> + #include <asm/msr-index.h> + ++/* ++ * Fill the CPU return stack buffer. ++ * ++ * Each entry in the RSB, if used for a speculative 'ret', contains an ++ * infinite 'pause; lfence; jmp' loop to capture speculative execution. ++ * ++ * This is required in various cases for retpoline and IBRS-based ++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to ++ * eliminate potentially bogus entries from the RSB, and sometimes ++ * purely to ensure that it doesn't get empty, which on some CPUs would ++ * allow predictions from other (unwanted!) sources to be used. ++ * ++ * We define a CPP macro such that it can be used from both .S files and ++ * inline assembly. It's possible to do a .macro and then include that ++ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. ++ */ ++ ++#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ ++#define RSB_FILL_LOOPS 16 /* To avoid underflow */ ++ ++/* ++ * Google experimented with loop-unrolling and this turned out to be ++ * the optimal version — two calls, each with their own speculation ++ * trap should their return address end up getting used, in a loop. ++ */ ++#define __FILL_RETURN_BUFFER(reg, nr, sp) \ ++ mov $(nr/2), reg; \ ++771: \ ++ call 772f; \ ++773: /* speculation trap */ \ ++ pause; \ ++ lfence; \ ++ jmp 773b; \ ++772: \ ++ call 774f; \ ++775: /* speculation trap */ \ ++ pause; \ ++ lfence; \ ++ jmp 775b; \ ++774: \ ++ dec reg; \ ++ jnz 771b; \ ++ add $(BITS_PER_LONG/8) * nr, sp; ++ + #ifdef __ASSEMBLY__ + + /* +@@ -78,10 +122,17 @@ + #endif + .endm + +-/* This clobbers the BX register */ +-.macro FILL_RETURN_BUFFER nr:req ftr:req ++ /* ++ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP ++ * monstrosity above, manually. ++ */ ++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req + #ifdef CONFIG_RETPOLINE +- ALTERNATIVE "", "call __clear_rsb", \ftr ++ ANNOTATE_NOSPEC_ALTERNATIVE ++ ALTERNATIVE "jmp .Lskip_rsb_\@", \ ++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ ++ \ftr ++.Lskip_rsb_\@: + #endif + .endm + +@@ -156,10 +207,15 @@ extern char __indirect_thunk_end[]; + static inline void vmexit_fill_RSB(void) + { + #ifdef CONFIG_RETPOLINE +- alternative_input("", +- "call __fill_rsb", +- X86_FEATURE_RETPOLINE, +- ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); ++ unsigned long loops; ++ ++ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE ++ ALTERNATIVE("jmp 910f", ++ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), ++ X86_FEATURE_RETPOLINE) ++ "910:" ++ : "=r" (loops), ASM_CALL_CONSTRAINT ++ : : "memory" ); + #endif + } + +diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile +index 4ad7c4d..6bf1898 100644 +--- a/arch/x86/lib/Makefile ++++ b/arch/x86/lib/Makefile +@@ -26,7 +26,6 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o + lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o + lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o + lib-$(CONFIG_RETPOLINE) += retpoline.o +-OBJECT_FILES_NON_STANDARD_retpoline.o :=y + + obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o + +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index 480edc3..c909961 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -7,7 +7,6 @@ + #include <asm/alternative-asm.h> + #include <asm/export.h> + #include <asm/nospec-branch.h> +-#include <asm/bitsperlong.h> + + .macro THUNK reg + .section .text.__x86.indirect_thunk +@@ -47,58 +46,3 @@ GENERATE_THUNK(r13) + GENERATE_THUNK(r14) + GENERATE_THUNK(r15) + #endif +- +-/* +- * Fill the CPU return stack buffer. +- * +- * Each entry in the RSB, if used for a speculative 'ret', contains an +- * infinite 'pause; lfence; jmp' loop to capture speculative execution. +- * +- * This is required in various cases for retpoline and IBRS-based +- * mitigations for the Spectre variant 2 vulnerability. Sometimes to +- * eliminate potentially bogus entries from the RSB, and sometimes +- * purely to ensure that it doesn't get empty, which on some CPUs would +- * allow predictions from other (unwanted!) sources to be used. +- * +- * Google experimented with loop-unrolling and this turned out to be +- * the optimal version - two calls, each with their own speculation +- * trap should their return address end up getting used, in a loop. +- */ +-.macro STUFF_RSB nr:req sp:req +- mov $(\nr / 2), %_ASM_BX +- .align 16 +-771: +- call 772f +-773: /* speculation trap */ +- pause +- lfence +- jmp 773b +- .align 16 +-772: +- call 774f +-775: /* speculation trap */ +- pause +- lfence +- jmp 775b +- .align 16 +-774: +- dec %_ASM_BX +- jnz 771b +- add $((BITS_PER_LONG/8) * \nr), \sp +-.endm +- +-#define RSB_FILL_LOOPS 16 /* To avoid underflow */ +- +-ENTRY(__fill_rsb) +- STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP +- ret +-END(__fill_rsb) +-EXPORT_SYMBOL_GPL(__fill_rsb) +- +-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +- +-ENTRY(__clear_rsb) +- STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP +- ret +-END(__clear_rsb) +-EXPORT_SYMBOL_GPL(__clear_rsb) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-kaiser-KAISER-depends-on-SMP.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-kaiser-KAISER-depends-on-SMP.patch new file mode 100644 index 00000000..2bdab698 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-kaiser-KAISER-depends-on-SMP.patch @@ -0,0 +1,56 @@ +From 8b458f1e8f957c6bdf2674f65ac76234ef8bb018 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Wed, 13 Sep 2017 14:03:10 -0700 +Subject: [PATCH 009/102] kaiser: KAISER depends on SMP + +It is absurd that KAISER should depend on SMP, but apparently nobody +has tried a UP build before: which breaks on implicit declaration of +function 'per_cpu_offset' in arch/x86/mm/kaiser.c. + +Now, you would expect that to be trivially fixed up; but looking at +the System.map when that block is #ifdef'ed out of kaiser_init(), +I see that in a UP build __per_cpu_user_mapped_end is precisely at +__per_cpu_user_mapped_start, and the items carefully gathered into +that section for user-mapping on SMP, dispersed elsewhere on UP. + +So, some other kind of section assignment will be needed on UP, +but implementing that is not a priority: just make KAISER depend +on SMP for now. + +Also inserted a blank line before the option, tidied up the +brief Kconfig help message, and added an "If unsure, Y". + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + security/Kconfig | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/security/Kconfig b/security/Kconfig +index 334d2e8..dc78671 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -30,14 +30,16 @@ config SECURITY + model will be used. + + If you are unsure how to answer this question, answer N. ++ + config KAISER + bool "Remove the kernel mapping in user mode" + default y +- depends on X86_64 +- depends on !PARAVIRT ++ depends on X86_64 && SMP && !PARAVIRT + help +- This enforces a strict kernel and user space isolation in order to close +- hardware side channels on kernel address information. ++ This enforces a strict kernel and user space isolation, in order ++ to close hardware side channels on kernel address information. ++ ++ If you are unsure how to answer this question, answer Y. + + config KAISER_REAL_SWITCH + bool "KAISER: actually switch page tables" +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch new file mode 100644 index 00000000..1de4e886 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch @@ -0,0 +1,51 @@ +From a56ed550fd79c3bab8aa9d0f136086314dc377f5 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 25 Jan 2018 16:14:10 +0000 +Subject: [PATCH 09/42] x86/cpufeatures: Add Intel feature bits for Speculation + Control + +(cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61) + +Add three feature bits exposed by new microcode on Intel CPUs for +speculation control. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: ak@linux.intel.com +Cc: ashok.raj@intel.com +Cc: dave.hansen@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 9d4a422..1f03888 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -296,6 +296,9 @@ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ + #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ ++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ ++#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ ++#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ + + /* + * BUG word(s) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-spectre-Fix-an-error-message.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-spectre-Fix-an-error-message.patch new file mode 100644 index 00000000..b3f35a95 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-spectre-Fix-an-error-message.patch @@ -0,0 +1,44 @@ +From 6893aed64644e59c2aec9a347e6a324233b81dd7 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter <dan.carpenter@oracle.com> +Date: Wed, 14 Feb 2018 10:14:17 +0300 +Subject: [PATCH 09/12] x86/spectre: Fix an error message + +commit 9de29eac8d2189424d81c0d840cd0469aa3d41c8 upstream. + +If i == ARRAY_SIZE(mitigation_options) then we accidentally print +garbage from one space beyond the end of the mitigation_options[] array. + +Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@suse.de> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: KarimAllah Ahmed <karahmed@amazon.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: kernel-janitors@vger.kernel.org +Fixes: 9005c6834c0f ("x86/spectre: Simplify spectre_v2 command line parsing") +Link: http://lkml.kernel.org/r/20180214071416.GA26677@mwanda +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index b83e0c9..baddc9e 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -173,7 +173,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + } + + if (i >= ARRAY_SIZE(mitigation_options)) { +- pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); ++ pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; + } + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch new file mode 100644 index 00000000..f8e7874d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch @@ -0,0 +1,74 @@ +From 3d8ca014b31b43c78d3240b2574670f2ab38519c Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Thu, 21 Sep 2017 20:39:56 -0700 +Subject: [PATCH 010/102] kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER + +pjt has observed that nmi's second (nmi_from_kernel) call to do_nmi() +adjusted the %rdi regs arg, rightly when CONFIG_KAISER, but wrongly +when not CONFIG_KAISER. + +Although the minimal change is to add an #ifdef CONFIG_KAISER around +the addq line, that looks cluttered, and I prefer how the first call +to do_nmi() handled it: prepare args in %rdi and %rsi before getting +into the CONFIG_KAISER block, since it does not touch them at all. + +And while we're here, place the "#ifdef CONFIG_KAISER" that follows +each, to enclose the "Unconditionally restore CR3" comment: matching +how the "Unconditionally use kernel CR3" comment above is enclosed. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index d84e3a7..57f7993 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1321,12 +1321,13 @@ ENTRY(nmi) + movq %rax, %cr3 + #endif + call do_nmi ++ ++#ifdef CONFIG_KAISER + /* + * Unconditionally restore CR3. I know we return to + * kernel code that needs user CR3, but do we ever return + * to "user mode" where we need the kernel CR3? + */ +-#ifdef CONFIG_KAISER + popq %rax + mov %rax, %cr3 + #endif +@@ -1550,6 +1551,8 @@ end_repeat_nmi: + SWAPGS + xorl %ebx, %ebx + 1: ++ movq %rsp, %rdi ++ movq $-1, %rsi + #ifdef CONFIG_KAISER + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ +@@ -1562,16 +1565,14 @@ end_repeat_nmi: + #endif + + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ +- movq %rsp, %rdi +- addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */ +- movq $-1, %rsi + call do_nmi ++ ++#ifdef CONFIG_KAISER + /* + * Unconditionally restore CR3. We might be returning to + * kernel code that needs user CR3, like just just before + * a sysret. + */ +-#ifdef CONFIG_KAISER + popq %rax + mov %rax, %cr3 + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch new file mode 100644 index 00000000..68e82a01 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch @@ -0,0 +1,72 @@ +From 5d671cb212c75a4adebb52863b5e9d370c8c23c1 Mon Sep 17 00:00:00 2001 +From: "Gustavo A. R. Silva" <garsilva@embeddedor.com> +Date: Tue, 13 Feb 2018 13:22:08 -0600 +Subject: [PATCH 10/12] x86/cpu: Change type of x86_cache_size variable to + unsigned int + +commit 24dbc6000f4b9b0ef5a9daecb161f1907733765a upstream. + +Currently, x86_cache_size is of type int, which makes no sense as we +will never have a valid cache size equal or less than 0. So instead of +initializing this variable to -1, it can perfectly be initialized to 0 +and use it as an unsigned variable instead. + +Suggested-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Addresses-Coverity-ID: 1464429 +Link: http://lkml.kernel.org/r/20180213192208.GA26414@embeddedor.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/processor.h | 2 +- + arch/x86/kernel/cpu/common.c | 2 +- + arch/x86/kernel/cpu/proc.c | 4 ++-- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index df29212..d51e679 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -113,7 +113,7 @@ struct cpuinfo_x86 { + char x86_vendor_id[16]; + char x86_model_id[64]; + /* in KB - valid for CPUS which support this call: */ +- int x86_cache_size; ++ unsigned int x86_cache_size; + int x86_cache_alignment; /* In bytes */ + /* Cache QoS architectural values: */ + int x86_cache_max_rmid; /* max index */ +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 96b2c83..301bbd1 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1144,7 +1144,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) + int i; + + c->loops_per_jiffy = loops_per_jiffy; +- c->x86_cache_size = -1; ++ c->x86_cache_size = 0; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->x86_model = c->x86_stepping = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ +diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c +index 9e817f2..c4f772d 100644 +--- a/arch/x86/kernel/cpu/proc.c ++++ b/arch/x86/kernel/cpu/proc.c +@@ -87,8 +87,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) + } + + /* Cache size */ +- if (c->x86_cache_size >= 0) +- seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); ++ if (c->x86_cache_size) ++ seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size); + + show_cpuinfo_core(m, c, cpu); + show_cpuinfo_misc(m, c); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch new file mode 100644 index 00000000..9417a4ec --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch @@ -0,0 +1,51 @@ +From 3a855b66f0fb7388b32ed33a536b4f68cd09afc3 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 25 Jan 2018 16:14:11 +0000 +Subject: [PATCH 10/42] x86/cpufeatures: Add AMD feature bits for Speculation + Control + +(cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7) + +AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel. +See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: ak@linux.intel.com +Cc: ashok.raj@intel.com +Cc: dave.hansen@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 1f03888..c4d03e7 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -258,6 +258,9 @@ + /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ + #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ + #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ ++#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ ++#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ ++#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-speculation-Use-IBRS-if-available-before-calling.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-speculation-Use-IBRS-if-available-before-calling.patch new file mode 100644 index 00000000..d5bd585e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-speculation-Use-IBRS-if-available-before-calling.patch @@ -0,0 +1,232 @@ +From d65c0b72013dac24f4e2d0b031ed8bc6b71bfcca Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Mon, 19 Feb 2018 10:50:54 +0000 +Subject: [PATCH 10/14] x86/speculation: Use IBRS if available before calling + into firmware + +commit dd84441a797150dcc49298ec95c459a8891d8bb1 upstream. + +Retpoline means the kernel is safe because it has no indirect branches. +But firmware isn't, so use IBRS for firmware calls if it's available. + +Block preemption while IBRS is set, although in practice the call sites +already had to be doing that. + +Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware +code, from an NMI handler. I don't want to touch that with a bargepole. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: arjan.van.de.ven@intel.com +Cc: bp@alien8.de +Cc: dave.hansen@intel.com +Cc: jmattson@google.com +Cc: karahmed@amazon.de +Cc: kvm@vger.kernel.org +Cc: pbonzini@redhat.com +Cc: rkrcmar@redhat.com +Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/apm.h | 6 ++++++ + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/efi.h | 17 ++++++++++++++-- + arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++--------- + arch/x86/kernel/cpu/bugs.c | 12 ++++++++++- + 5 files changed, 63 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h +index 93eebc63..46e40ae 100644 +--- a/arch/x86/include/asm/apm.h ++++ b/arch/x86/include/asm/apm.h +@@ -6,6 +6,8 @@ + #ifndef _ASM_X86_MACH_DEFAULT_APM_H + #define _ASM_X86_MACH_DEFAULT_APM_H + ++#include <asm/nospec-branch.h> ++ + #ifdef APM_ZERO_SEGS + # define APM_DO_ZERO_SEGS \ + "pushl %%ds\n\t" \ +@@ -31,6 +33,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ ++ firmware_restrict_branch_speculation_start(); + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" +@@ -43,6 +46,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, + "=S" (*esi) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); ++ firmware_restrict_branch_speculation_end(); + } + + static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, +@@ -55,6 +59,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ ++ firmware_restrict_branch_speculation_start(); + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" +@@ -67,6 +72,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, + "=S" (si) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); ++ firmware_restrict_branch_speculation_end(); + return error; + } + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 8eb23f5..ed7a1d2 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,6 +203,7 @@ + #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + + #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ ++#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h +index 389d700..9df22bb 100644 +--- a/arch/x86/include/asm/efi.h ++++ b/arch/x86/include/asm/efi.h +@@ -5,6 +5,7 @@ + #include <asm/pgtable.h> + #include <asm/processor-flags.h> + #include <asm/tlb.h> ++#include <asm/nospec-branch.h> + + /* + * We map the EFI regions needed for runtime services non-contiguously, +@@ -35,8 +36,18 @@ + + extern unsigned long asmlinkage efi_call_phys(void *, ...); + +-#define arch_efi_call_virt_setup() kernel_fpu_begin() +-#define arch_efi_call_virt_teardown() kernel_fpu_end() ++#define arch_efi_call_virt_setup() \ ++({ \ ++ kernel_fpu_begin(); \ ++ firmware_restrict_branch_speculation_start(); \ ++}) ++ ++#define arch_efi_call_virt_teardown() \ ++({ \ ++ firmware_restrict_branch_speculation_end(); \ ++ kernel_fpu_end(); \ ++}) ++ + + /* + * Wrap all the virtual calls in a way that forces the parameters on the stack. +@@ -72,6 +83,7 @@ struct efi_scratch { + efi_sync_low_kernel_mappings(); \ + preempt_disable(); \ + __kernel_fpu_begin(); \ ++ firmware_restrict_branch_speculation_start(); \ + \ + if (efi_scratch.use_pgd) { \ + efi_scratch.prev_cr3 = read_cr3(); \ +@@ -90,6 +102,7 @@ struct efi_scratch { + __flush_tlb_all(); \ + } \ + \ ++ firmware_restrict_branch_speculation_end(); \ + __kernel_fpu_end(); \ + preempt_enable(); \ + }) +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index dace2de..031840a 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -219,17 +219,38 @@ static inline void vmexit_fill_RSB(void) + #endif + } + ++#define alternative_msr_write(_msr, _val, _feature) \ ++ asm volatile(ALTERNATIVE("", \ ++ "movl %[msr], %%ecx\n\t" \ ++ "movl %[val], %%eax\n\t" \ ++ "movl $0, %%edx\n\t" \ ++ "wrmsr", \ ++ _feature) \ ++ : : [msr] "i" (_msr), [val] "i" (_val) \ ++ : "eax", "ecx", "edx", "memory") ++ + static inline void indirect_branch_prediction_barrier(void) + { +- asm volatile(ALTERNATIVE("", +- "movl %[msr], %%ecx\n\t" +- "movl %[val], %%eax\n\t" +- "movl $0, %%edx\n\t" +- "wrmsr", +- X86_FEATURE_USE_IBPB) +- : : [msr] "i" (MSR_IA32_PRED_CMD), +- [val] "i" (PRED_CMD_IBPB) +- : "eax", "ecx", "edx", "memory"); ++ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, ++ X86_FEATURE_USE_IBPB); ++} ++ ++/* ++ * With retpoline, we must use IBRS to restrict branch prediction ++ * before calling into firmware. ++ */ ++static inline void firmware_restrict_branch_speculation_start(void) ++{ ++ preempt_disable(); ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, ++ X86_FEATURE_USE_IBRS_FW); ++} ++ ++static inline void firmware_restrict_branch_speculation_end(void) ++{ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, ++ X86_FEATURE_USE_IBRS_FW); ++ preempt_enable(); + } + + #endif /* __ASSEMBLY__ */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index baddc9e..b8b0b6e 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -299,6 +299,15 @@ static void __init spectre_v2_select_mitigation(void) + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } ++ ++ /* ++ * Retpoline means the kernel is safe because it has no indirect ++ * branches. But firmware isn't, so use IBRS to protect that. ++ */ ++ if (boot_cpu_has(X86_FEATURE_IBRS)) { ++ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); ++ pr_info("Enabling Restricted Speculation for firmware calls\n"); ++ } + } + + #undef pr_fmt +@@ -325,8 +334,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + +- return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ++ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", ++ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + } + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-kaiser-fix-perf-crashes.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-kaiser-fix-perf-crashes.patch new file mode 100644 index 00000000..98431ecb --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-kaiser-fix-perf-crashes.patch @@ -0,0 +1,152 @@ +From 0a7605cc230a1eec0f773bd4312320a8e63b443c Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Wed, 23 Aug 2017 14:21:14 -0700 +Subject: [PATCH 011/102] kaiser: fix perf crashes + +Avoid perf crashes: place debug_store in the user-mapped per-cpu area +instead of allocating, and use page allocator plus kaiser_add_mapping() +to keep the BTS and PEBS buffers user-mapped (that is, present in the +user mapping, though visible only to kernel and hardware). The PEBS +fixup buffer does not need this treatment. + +The need for a user-mapped struct debug_store showed up before doing +any conscious perf testing: in a couple of kernel paging oopses on +Westmere, implicating the debug_store offset of the per-cpu area. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/events/intel/ds.c | 57 ++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 45 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c +index be20239..c2e4ae2 100644 +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -2,11 +2,15 @@ + #include <linux/types.h> + #include <linux/slab.h> + ++#include <asm/kaiser.h> + #include <asm/perf_event.h> + #include <asm/insn.h> + + #include "../perf_event.h" + ++static ++DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store); ++ + /* The size of a BTS record in bytes: */ + #define BTS_RECORD_SIZE 24 + +@@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu) + + static DEFINE_PER_CPU(void *, insn_buffer); + ++static void *dsalloc(size_t size, gfp_t flags, int node) ++{ ++#ifdef CONFIG_KAISER ++ unsigned int order = get_order(size); ++ struct page *page; ++ unsigned long addr; ++ ++ page = __alloc_pages_node(node, flags | __GFP_ZERO, order); ++ if (!page) ++ return NULL; ++ addr = (unsigned long)page_address(page); ++ if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) { ++ __free_pages(page, order); ++ addr = 0; ++ } ++ return (void *)addr; ++#else ++ return kmalloc_node(size, flags | __GFP_ZERO, node); ++#endif ++} ++ ++static void dsfree(const void *buffer, size_t size) ++{ ++#ifdef CONFIG_KAISER ++ if (!buffer) ++ return; ++ kaiser_remove_mapping((unsigned long)buffer, size); ++ free_pages((unsigned long)buffer, get_order(size)); ++#else ++ kfree(buffer); ++#endif ++} ++ + static int alloc_pebs_buffer(int cpu) + { + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; +@@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu) + if (!x86_pmu.pebs) + return 0; + +- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); ++ buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + if (unlikely(!buffer)) + return -ENOMEM; + +@@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu) + if (x86_pmu.intel_cap.pebs_format < 2) { + ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); + if (!ibuffer) { +- kfree(buffer); ++ dsfree(buffer, x86_pmu.pebs_buffer_size); + return -ENOMEM; + } + per_cpu(insn_buffer, cpu) = ibuffer; +@@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu) + kfree(per_cpu(insn_buffer, cpu)); + per_cpu(insn_buffer, cpu) = NULL; + +- kfree((void *)(unsigned long)ds->pebs_buffer_base); ++ dsfree((void *)(unsigned long)ds->pebs_buffer_base, ++ x86_pmu.pebs_buffer_size); + ds->pebs_buffer_base = 0; + } + +@@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu) + if (!x86_pmu.bts) + return 0; + +- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); ++ buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + if (unlikely(!buffer)) { + WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); + return -ENOMEM; +@@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu) + if (!ds || !x86_pmu.bts) + return; + +- kfree((void *)(unsigned long)ds->bts_buffer_base); ++ dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE); + ds->bts_buffer_base = 0; + } + + static int alloc_ds_buffer(int cpu) + { +- int node = cpu_to_node(cpu); +- struct debug_store *ds; +- +- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); +- if (unlikely(!ds)) +- return -ENOMEM; ++ struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu); + ++ memset(ds, 0, sizeof(*ds)); + per_cpu(cpu_hw_events, cpu).ds = ds; + + return 0; +@@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu) + return; + + per_cpu(cpu_hw_events, cpu).ds = NULL; +- kfree(ds); + } + + void release_ds_buffers(void) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch new file mode 100644 index 00000000..00297c34 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch @@ -0,0 +1,133 @@ +From df2f7e0d21ca37bdbdf3fc5b6fa42a9b0bc6fbd6 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Mon, 19 Feb 2018 11:13:28 +0100 +Subject: [PATCH 11/12] x86/microcode/AMD: Change load_microcode_amd()'s param + to bool to fix preemptibility bug + +commit dac6ca243c4c49a9ca7507d3d66140ebfac8b04b upstream. + +With CONFIG_DEBUG_PREEMPT enabled, I get: + + BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 + caller is debug_smp_processor_id + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.12.0-rc2+ #2 + Call Trace: + dump_stack + check_preemption_disabled + debug_smp_processor_id + save_microcode_in_initrd_amd + ? microcode_init + save_microcode_in_initrd + ... + +because, well, it says it above, we're using smp_processor_id() in +preemptible code. + +But passing the CPU number is not really needed. It is only used to +determine whether we're on the BSP, and, if so, to save the microcode +patch for early loading. + + [ We don't absolutely need to do it on the BSP but we do that + customarily there. ] + +Instead, convert that function parameter to a boolean which denotes +whether the patch should be saved or not, thereby avoiding the use of +smp_processor_id() in preemptible code. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20170528200414.31305-1-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +[arnd: rebased to 4.9, after running into warning: + arch/x86/kernel/cpu/microcode/amd.c:881:30: self-comparison always evaluates to true] +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/microcode_amd.h | 1 - + arch/x86/kernel/cpu/microcode/amd.c | 17 +++++++++++------ + 2 files changed, 11 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h +index 15eb754..98ccbd1 100644 +--- a/arch/x86/include/asm/microcode_amd.h ++++ b/arch/x86/include/asm/microcode_amd.h +@@ -59,7 +59,6 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, + + extern int __apply_microcode_amd(struct microcode_amd *mc_amd); + extern int apply_microcode_amd(int cpu); +-extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); + + #define PATCH_MAX_SIZE PAGE_SIZE + +diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c +index 017bda1..aaab28a 100644 +--- a/arch/x86/kernel/cpu/microcode/amd.c ++++ b/arch/x86/kernel/cpu/microcode/amd.c +@@ -135,6 +135,9 @@ static size_t compute_container_size(u8 *data, u32 total_size) + return size; + } + ++static enum ucode_state ++load_microcode_amd(bool save, u8 family, const u8 *data, size_t size); ++ + /* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a +@@ -451,7 +454,7 @@ int __init save_microcode_in_initrd_amd(void) + eax = cpuid_eax(0x00000001); + eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); + +- ret = load_microcode_amd(smp_processor_id(), eax, container, container_size); ++ ret = load_microcode_amd(true, eax, container, container_size); + if (ret != UCODE_OK) + retval = -EINVAL; + +@@ -860,7 +863,8 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, + return UCODE_OK; + } + +-enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) ++static enum ucode_state ++load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) + { + enum ucode_state ret; + +@@ -874,8 +878,8 @@ enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t s + + #ifdef CONFIG_X86_32 + /* save BSP's matching patch for early load */ +- if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { +- struct ucode_patch *p = find_patch(cpu); ++ if (save) { ++ struct ucode_patch *p = find_patch(0); + if (p) { + memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); + memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), +@@ -907,11 +911,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, + { + char fw_name[36] = "amd-ucode/microcode_amd.bin"; + struct cpuinfo_x86 *c = &cpu_data(cpu); ++ bool bsp = c->cpu_index == boot_cpu_data.cpu_index; + enum ucode_state ret = UCODE_NFOUND; + const struct firmware *fw; + + /* reload ucode container only on the boot cpu */ +- if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) ++ if (!refresh_fw || !bsp) + return UCODE_OK; + + if (c->x86 >= 0x15) +@@ -928,7 +933,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, + goto fw_release; + } + +- ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); ++ ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); + + fw_release: + release_firmware(fw); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-msr-Add-definitions-for-new-speculation-control-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-msr-Add-definitions-for-new-speculation-control-.patch new file mode 100644 index 00000000..311c2e85 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-msr-Add-definitions-for-new-speculation-control-.patch @@ -0,0 +1,67 @@ +From b733a28baec38d991f253a8587a94e9b2948a7d0 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 25 Jan 2018 16:14:12 +0000 +Subject: [PATCH 11/42] x86/msr: Add definitions for new speculation control + MSRs + +(cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410) + +Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES. + +See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: ak@linux.intel.com +Cc: ashok.raj@intel.com +Cc: dave.hansen@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/msr-index.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 4eeaa36..0e4da8e 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -37,6 +37,13 @@ + #define EFER_FFXSR (1<<_EFER_FFXSR) + + /* Intel MSRs. Some also available on other CPUs */ ++#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ ++#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ ++#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ ++ ++#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ ++#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ ++ + #define MSR_IA32_PERFCTR0 0x000000c1 + #define MSR_IA32_PERFCTR1 0x000000c2 + #define MSR_FSB_FREQ 0x000000cd +@@ -50,6 +57,11 @@ + #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + + #define MSR_MTRRcap 0x000000fe ++ ++#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a ++#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ ++#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ ++ + #define MSR_IA32_BBL_CR_CTL 0x00000119 + #define MSR_IA32_BBL_CR_CTL3 0x0000011e + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch new file mode 100644 index 00000000..6caed4a9 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch @@ -0,0 +1,103 @@ +From 3de13a223fa7e5d0dc5bb20d87be73f686768daf Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Mon, 19 Feb 2018 10:50:57 +0000 +Subject: [PATCH 11/14] x86/retpoline: Support retpoline builds with Clang + +commit 87358710c1fb4f1bf96bbe2349975ff9953fc9b2 upstream. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: arjan.van.de.ven@intel.com +Cc: bp@alien8.de +Cc: dave.hansen@intel.com +Cc: jmattson@google.com +Cc: karahmed@amazon.de +Cc: kvm@vger.kernel.org +Cc: pbonzini@redhat.com +Cc: rkrcmar@redhat.com +Link: http://lkml.kernel.org/r/1519037457-7643-5-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/Makefile | 5 ++++- + include/linux/compiler-clang.h | 5 +++++ + include/linux/compiler-gcc.h | 4 ++++ + include/linux/init.h | 8 ++++---- + 4 files changed, 17 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/Makefile b/arch/x86/Makefile +index cd22cb8..b609961 100644 +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -184,7 +184,10 @@ KBUILD_AFLAGS += $(mflags-y) + + # Avoid indirect branches in kernel to deal with Spectre + ifdef CONFIG_RETPOLINE +- RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ++ RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register ++ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk ++ ++ RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + endif +diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h +index de17999..01225b0 100644 +--- a/include/linux/compiler-clang.h ++++ b/include/linux/compiler-clang.h +@@ -15,3 +15,8 @@ + * with any version that can compile the kernel + */ + #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) ++ ++/* Clang doesn't have a way to turn it off per-function, yet. */ ++#ifdef __noretpoline ++#undef __noretpoline ++#endif +diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h +index 928e5ca..362a1e17 100644 +--- a/include/linux/compiler-gcc.h ++++ b/include/linux/compiler-gcc.h +@@ -88,6 +88,10 @@ + #define __weak __attribute__((weak)) + #define __alias(symbol) __attribute__((alias(#symbol))) + ++#ifdef RETPOLINE ++#define __noretpoline __attribute__((indirect_branch("keep"))) ++#endif ++ + /* + * it doesn't make sense on ARM (currently the only user of __naked) + * to trace naked functions because then mcount is called without +diff --git a/include/linux/init.h b/include/linux/init.h +index 8e346d1..683508f 100644 +--- a/include/linux/init.h ++++ b/include/linux/init.h +@@ -5,10 +5,10 @@ + #include <linux/types.h> + + /* Built-in __init functions needn't be compiled with retpoline */ +-#if defined(RETPOLINE) && !defined(MODULE) +-#define __noretpoline __attribute__((indirect_branch("keep"))) ++#if defined(__noretpoline) && !defined(MODULE) ++#define __noinitretpoline __noretpoline + #else +-#define __noretpoline ++#define __noinitretpoline + #endif + + /* These macros are used to mark some functions or +@@ -46,7 +46,7 @@ + + /* These are for everybody (although not all archs will actually + discard it in modules) */ +-#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline ++#define __init __section(.init.text) __cold notrace __latent_entropy __noinitretpoline + #define __initdata __section(.init.data) + #define __initconst __section(.init.rodata) + #define __exitdata __section(.exit.data) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch new file mode 100644 index 00000000..cab52bd6 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch @@ -0,0 +1,54 @@ +From f88c0c3498392939c54bd0bd0050029ac20ddd0e Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 3 Sep 2017 18:48:02 -0700 +Subject: [PATCH 012/102] kaiser: ENOMEM if kaiser_pagetable_walk() NULL + +kaiser_add_user_map() took no notice when kaiser_pagetable_walk() failed. +And avoid its might_sleep() when atomic (though atomic at present unused). + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 8d6061c..ba6fc2c 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -98,11 +98,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + +- might_sleep(); + if (is_atomic) { + gfp &= ~GFP_KERNEL; + gfp |= __GFP_HIGH | __GFP_ATOMIC; +- } ++ } else ++ might_sleep(); + + if (pgd_none(*pgd)) { + WARN_ONCE(1, "All shadow pgds should have been populated"); +@@ -159,13 +159,17 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, + unsigned long end_addr = PAGE_ALIGN(start_addr + size); + unsigned long target_address; + +- for (;address < end_addr; address += PAGE_SIZE) { ++ for (; address < end_addr; address += PAGE_SIZE) { + target_address = get_pa_from_mapping(address); + if (target_address == -1) { + ret = -EIO; + break; + } + pte = kaiser_pagetable_walk(address, false); ++ if (!pte) { ++ ret = -ENOMEM; ++ break; ++ } + if (pte_none(*pte)) { + set_pte(pte, __pte(flags | target_address)); + } else { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch new file mode 100644 index 00000000..f8e4bda9 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch @@ -0,0 +1,79 @@ +From c8c45aa51a96245b04ac18e6f3475d66bc90d4e3 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Fri, 23 Feb 2018 14:06:21 -0800 +Subject: [PATCH 12/12] x86/entry/64: Clear extra registers beyond syscall + arguments, to reduce speculation attack surface + +commit 8e1eb3fa009aa7c0b944b3c8b26b07de0efb3200 upstream. + +At entry userspace may have (maliciously) populated the extra registers +outside the syscall calling convention with arbitrary values that could +be useful in a speculative execution (Spectre style) attack. + +Clear these registers to minimize the kernel's attack surface. + +Note, this only clears the extra registers and not the unused +registers for syscalls less than 6 arguments, since those registers are +likely to be clobbered well before their values could be put to use +under speculation. + +Note, Linus found that the XOR instructions can be executed with +minimized cost if interleaved with the PUSH instructions, and Ingo's +analysis found that R10 and R11 should be included in the register +clearing beyond the typical 'extra' syscall calling convention +registers. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Reported-by: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Cc: <stable@vger.kernel.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/151787988577.7847.16733592218894189003.stgit@dwillia2-desk3.amr.corp.intel.com +[ Made small improvements to the changelog and the code comments. ] +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index c915eeb..e9120d4 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -176,13 +176,26 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ ++ /* ++ * Clear extra registers that a speculation attack might ++ * otherwise want to exploit. Interleave XOR with PUSH ++ * for better uop scheduling: ++ */ ++ xorq %r10, %r10 /* nospec r10 */ + pushq %r11 /* pt_regs->r11 */ ++ xorq %r11, %r11 /* nospec r11 */ + pushq %rbx /* pt_regs->rbx */ ++ xorl %ebx, %ebx /* nospec rbx */ + pushq %rbp /* pt_regs->rbp */ ++ xorl %ebp, %ebp /* nospec rbp */ + pushq %r12 /* pt_regs->r12 */ ++ xorq %r12, %r12 /* nospec r12 */ + pushq %r13 /* pt_regs->r13 */ ++ xorq %r13, %r13 /* nospec r13 */ + pushq %r14 /* pt_regs->r14 */ ++ xorq %r14, %r14 /* nospec r14 */ + pushq %r15 /* pt_regs->r15 */ ++ xorq %r15, %r15 /* nospec r15 */ + + /* IRQs are off. */ + movq %rsp, %rdi +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch new file mode 100644 index 00000000..b1f180c1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch @@ -0,0 +1,116 @@ +From 50f378f14484a86ee783e0e4da697e32295c6694 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 25 Jan 2018 16:14:13 +0000 +Subject: [PATCH 12/42] x86/pti: Do not enable PTI on CPUs which are not + vulnerable to Meltdown + +(cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621) + +Also, for CPUs which don't speculate at all, don't report that they're +vulnerable to the Spectre variants either. + +Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it +for now, even though that could be done with a simple comparison, on the +assumption that we'll have more to add. + +Based on suggestions from Dave Hansen and Alan Cox. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Reviewed-by: Borislav Petkov <bp@suse.de> +Acked-by: Dave Hansen <dave.hansen@intel.com> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: ak@linux.intel.com +Cc: ashok.raj@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/common.c | 48 +++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 43 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 4267273..cfa026f 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -44,6 +44,8 @@ + #include <asm/pat.h> + #include <asm/microcode.h> + #include <asm/microcode_intel.h> ++#include <asm/intel-family.h> ++#include <asm/cpu_device_id.h> + + #ifdef CONFIG_X86_LOCAL_APIC + #include <asm/uv/uv.h> +@@ -838,6 +840,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) + #endif + } + ++static const __initdata struct x86_cpu_id cpu_no_speculation[] = { ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, ++ { X86_VENDOR_CENTAUR, 5 }, ++ { X86_VENDOR_INTEL, 5 }, ++ { X86_VENDOR_NSC, 5 }, ++ { X86_VENDOR_ANY, 4 }, ++ {} ++}; ++ ++static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { ++ { X86_VENDOR_AMD }, ++ {} ++}; ++ ++static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) ++{ ++ u64 ia32_cap = 0; ++ ++ if (x86_match_cpu(cpu_no_meltdown)) ++ return false; ++ ++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); ++ ++ /* Rogue Data Cache Load? No! */ ++ if (ia32_cap & ARCH_CAP_RDCL_NO) ++ return false; ++ ++ return true; ++} ++ + /* + * Do minimum CPU detection early. + * Fields really needed: vendor, cpuid_level, family, model, mask, +@@ -884,11 +921,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + + setup_force_cpu_cap(X86_FEATURE_ALWAYS); + +- if (c->x86_vendor != X86_VENDOR_AMD) +- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); +- +- setup_force_cpu_bug(X86_BUG_SPECTRE_V1); +- setup_force_cpu_bug(X86_BUG_SPECTRE_V2); ++ if (!x86_match_cpu(cpu_no_speculation)) { ++ if (cpu_vulnerable_to_meltdown(c)) ++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); ++ } + + fpu__init_system(c); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch new file mode 100644 index 00000000..62777941 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch @@ -0,0 +1,101 @@ +From 05395f5046a3ff9280cde5804ff4505bbd42b115 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Wed, 17 Jan 2018 22:34:34 +0100 +Subject: [PATCH 12/14] x86/speculation, objtool: Annotate indirect calls/jumps + for objtool + +commit 9e0e3c5130e949c389caabc8033e9799b129e429 upstream. + +Annotate the indirect calls/jumps in the CALL_NOSPEC/JUMP_NOSPEC +alternatives. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 031840a..29e8f30 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -68,6 +68,18 @@ + .endm + + /* ++ * This should be used immediately before an indirect jump/call. It tells ++ * objtool the subsequent indirect jump/call is vouched safe for retpoline ++ * builds. ++ */ ++.macro ANNOTATE_RETPOLINE_SAFE ++ .Lannotate_\@: ++ .pushsection .discard.retpoline_safe ++ _ASM_PTR .Lannotate_\@ ++ .popsection ++.endm ++ ++/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. +@@ -103,9 +115,9 @@ + .macro JMP_NOSPEC reg:req + #ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE +- ALTERNATIVE_2 __stringify(jmp *\reg), \ ++ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ +- __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + #else + jmp *\reg + #endif +@@ -114,9 +126,9 @@ + .macro CALL_NOSPEC reg:req + #ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE +- ALTERNATIVE_2 __stringify(call *\reg), \ ++ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ +- __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD + #else + call *\reg + #endif +@@ -144,6 +156,12 @@ + ".long 999b - .\n\t" \ + ".popsection\n\t" + ++#define ANNOTATE_RETPOLINE_SAFE \ ++ "999:\n\t" \ ++ ".pushsection .discard.retpoline_safe\n\t" \ ++ _ASM_PTR " 999b\n\t" \ ++ ".popsection\n\t" ++ + #if defined(CONFIG_X86_64) && defined(RETPOLINE) + + /* +@@ -153,6 +171,7 @@ + # define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE( \ ++ ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch new file mode 100644 index 00000000..37eb1c7b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch @@ -0,0 +1,107 @@ +From e1c87b1c70f8cb5678116e407be3f6164ee52b0f Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 3 Sep 2017 19:18:07 -0700 +Subject: [PATCH 013/102] kaiser: tidied up asm/kaiser.h somewhat + +Mainly deleting a surfeit of blank lines, and reflowing header comment. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kaiser.h | 32 +++++++++++++------------------- + 1 file changed, 13 insertions(+), 19 deletions(-) + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 0703f48..7394ba9 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -1,15 +1,17 @@ + #ifndef _ASM_X86_KAISER_H + #define _ASM_X86_KAISER_H +- +-/* This file includes the definitions for the KAISER feature. +- * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory. +- * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped, +- * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled, +- * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled. +- * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory. ++/* ++ * This file includes the definitions for the KAISER feature. ++ * KAISER is a counter measure against x86_64 side channel attacks on ++ * the kernel virtual memory. It has a shadow pgd for every process: the ++ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole ++ * user memory. Within a kernel context switch, or when an interrupt is handled, ++ * the pgd is switched to the normal one. When the system switches to user mode, ++ * the shadow pgd is enabled. By this, the virtual memory caches are freed, ++ * and the user may not attack the whole kernel memory. + * +- * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions +- * of the user space, or the stacks. ++ * A minimalistic kernel mapping holds the parts needed to be mapped in user ++ * mode, such as the entry/exit functions of the user space, or the stacks. + */ + #ifdef __ASSEMBLY__ + #ifdef CONFIG_KAISER +@@ -48,13 +50,10 @@ _SWITCH_TO_KERNEL_CR3 %rax + movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + .endm + +- + .macro SWITCH_USER_CR3_NO_STACK +- + movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) + _SWITCH_TO_USER_CR3 %rax + movq PER_CPU_VAR(unsafe_stack_register_backup), %rax +- + .endm + + #else /* CONFIG_KAISER */ +@@ -72,7 +71,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + + #else /* __ASSEMBLY__ */ + +- + #ifdef CONFIG_KAISER + /* + * Upon kernel/user mode switch, it may happen that the address +@@ -80,7 +78,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + * stored. To change the address space, another register is + * needed. A register therefore has to be stored/restored. + */ +- + DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + + /** +@@ -95,7 +92,6 @@ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + */ + extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); + +- + /** + * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping + * @addr: the start address of the range +@@ -104,12 +100,12 @@ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned l + extern void kaiser_remove_mapping(unsigned long start, unsigned long size); + + /** +- * kaiser_initialize_mapping - Initalize the shadow mapping ++ * kaiser_init - Initialize the shadow mapping + * + * Most parts of the shadow mapping can be mapped upon boot + * time. Only per-process things like the thread stacks + * or a new LDT have to be mapped at runtime. These boot- +- * time mappings are permanent and nevertunmapped. ++ * time mappings are permanent and never unmapped. + */ + extern void kaiser_init(void); + +@@ -117,6 +113,4 @@ extern void kaiser_init(void); + + #endif /* __ASSEMBLY */ + +- +- + #endif /* _ASM_X86_KAISER_H */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch new file mode 100644 index 00000000..7fa185ec --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch @@ -0,0 +1,54 @@ +From 8642e6bac57983a63f16725873f6df03a16c5e14 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 16 Jan 2018 10:38:09 +0100 +Subject: [PATCH 13/14] x86/boot, objtool: Annotate indirect jump in + secondary_startup_64() + +commit bd89004f6305cbf7352238f61da093207ee518d6 upstream. + +The objtool retpoline validation found this indirect jump. Seeing how +it's on CPU bringup before we run userspace it should be safe, annotate +it. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/head_64.S | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index 67cd7c1..9d72cf5 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -22,6 +22,7 @@ + #include <asm/nops.h> + #include "../entry/calling.h" + #include <asm/export.h> ++#include <asm/nospec-branch.h> + + #ifdef CONFIG_PARAVIRT + #include <asm/asm-offsets.h> +@@ -200,6 +201,7 @@ ENTRY(secondary_startup_64) + + /* Ensure I am executing from virtual addresses */ + movq $1f, %rax ++ ANNOTATE_RETPOLINE_SAFE + jmp *%rax + 1: + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch new file mode 100644 index 00000000..7377d2cd --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch @@ -0,0 +1,173 @@ +From ba3461b1d9bf51d9719e001f3095a2f4b9b7031d Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 25 Jan 2018 16:14:14 +0000 +Subject: [PATCH 13/42] x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early + Spectre v2 microcodes + +(cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035) + +This doesn't refuse to load the affected microcodes; it just refuses to +use the Spectre v2 mitigation features if they're detected, by clearing +the appropriate feature bits. + +The AMD CPUID bits are handled here too, because hypervisors *may* have +been exposing those bits even on Intel chips, for fine-grained control +of what's available. + +It is non-trivial to use x86_match_cpu() for this table because that +doesn't handle steppings. And the approach taken in commit bd9240a18 +almost made me lose my lunch. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: ak@linux.intel.com +Cc: ashok.raj@intel.com +Cc: dave.hansen@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/intel-family.h | 7 ++-- + arch/x86/kernel/cpu/intel.c | 66 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 71 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h +index 34a46dc..75b748a 100644 +--- a/arch/x86/include/asm/intel-family.h ++++ b/arch/x86/include/asm/intel-family.h +@@ -12,6 +12,7 @@ + */ + + #define INTEL_FAM6_CORE_YONAH 0x0E ++ + #define INTEL_FAM6_CORE2_MEROM 0x0F + #define INTEL_FAM6_CORE2_MEROM_L 0x16 + #define INTEL_FAM6_CORE2_PENRYN 0x17 +@@ -21,6 +22,7 @@ + #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ + #define INTEL_FAM6_NEHALEM_EP 0x1A + #define INTEL_FAM6_NEHALEM_EX 0x2E ++ + #define INTEL_FAM6_WESTMERE 0x25 + #define INTEL_FAM6_WESTMERE_EP 0x2C + #define INTEL_FAM6_WESTMERE_EX 0x2F +@@ -36,9 +38,9 @@ + #define INTEL_FAM6_HASWELL_GT3E 0x46 + + #define INTEL_FAM6_BROADWELL_CORE 0x3D +-#define INTEL_FAM6_BROADWELL_XEON_D 0x56 + #define INTEL_FAM6_BROADWELL_GT3E 0x47 + #define INTEL_FAM6_BROADWELL_X 0x4F ++#define INTEL_FAM6_BROADWELL_XEON_D 0x56 + + #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E + #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E +@@ -57,9 +59,10 @@ + #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ + #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ + #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ +-#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ ++#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ + #define INTEL_FAM6_ATOM_GOLDMONT 0x5C + #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ ++#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A + + /* Xeon Phi */ + +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index fcd484d..4d23d78 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) + } + } + ++/* ++ * Early microcode releases for the Spectre v2 mitigation were broken. ++ * Information taken from; ++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf ++ * - https://kb.vmware.com/s/article/52345 ++ * - Microcode revisions observed in the wild ++ * - Release note from 20180108 microcode release ++ */ ++struct sku_microcode { ++ u8 model; ++ u8 stepping; ++ u32 microcode; ++}; ++static const struct sku_microcode spectre_bad_microcodes[] = { ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, ++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, ++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, ++ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, ++ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, ++ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, ++ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, ++ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, ++ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, ++ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, ++ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, ++ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, ++ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, ++ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, ++ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, ++ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, ++ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, ++ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, ++ /* Updated in the 20180108 release; blacklist until we know otherwise */ ++ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, ++ /* Observed in the wild */ ++ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, ++ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, ++}; ++ ++static bool bad_spectre_microcode(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { ++ if (c->x86_model == spectre_bad_microcodes[i].model && ++ c->x86_mask == spectre_bad_microcodes[i].stepping) ++ return (c->microcode <= spectre_bad_microcodes[i].microcode); ++ } ++ return false; ++} ++ + static void early_init_intel(struct cpuinfo_x86 *c) + { + u64 misc_enable; +@@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) + rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); + } + ++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || ++ cpu_has(c, X86_FEATURE_STIBP) || ++ cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || ++ cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || ++ cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { ++ pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); ++ clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); ++ clear_cpu_cap(c, X86_FEATURE_STIBP); ++ clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); ++ clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); ++ clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); ++ } ++ + /* + * Atom erratum AAE44/AAF40/AAG38/AAH41: + * +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch new file mode 100644 index 00000000..29bf304f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch @@ -0,0 +1,52 @@ +From fd127e673bbc3d794f9586799bffce38cd095e4f Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 3 Sep 2017 19:23:08 -0700 +Subject: [PATCH 014/102] kaiser: tidied up kaiser_add/remove_mapping slightly + +Yes, unmap_pud_range_nofree()'s declaration ought to be in a +header file really, but I'm not sure we want to use it anyway: +so for now just declare it inside kaiser_remove_mapping(). +And there doesn't seem to be such a thing as unmap_p4d_range(), +even in a 5-level paging tree. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index ba6fc2c..7a7e850 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -285,8 +285,7 @@ void __init kaiser_init(void) + __PAGE_KERNEL); + } + +-extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end); +-// add a mapping to the shadow-mapping, and synchronize the mappings ++/* Add a mapping to the shadow mapping, and synchronize the mappings */ + int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) + { + return kaiser_add_user_map((const void *)addr, size, flags); +@@ -294,15 +293,13 @@ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long fla + + void kaiser_remove_mapping(unsigned long start, unsigned long size) + { ++ extern void unmap_pud_range_nofree(pgd_t *pgd, ++ unsigned long start, unsigned long end); + unsigned long end = start + size; + unsigned long addr; + + for (addr = start; addr < end; addr += PGDIR_SIZE) { + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); +- /* +- * unmap_p4d_range() handles > P4D_SIZE unmaps, +- * so no need to trim 'end'. +- */ + unmap_pud_range_nofree(pgd, addr, end); + } + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch new file mode 100644 index 00000000..ed57dfd2 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch @@ -0,0 +1,102 @@ +From 8d91a1887b4fccf06f4077529dc167a52590b348 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 25 Jan 2018 16:14:15 +0000 +Subject: [PATCH 14/42] x86/speculation: Add basic IBPB (Indirect Branch + Prediction Barrier) support + +(cherry picked from commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d) + +Expose indirect_branch_prediction_barrier() for use in subsequent patches. + +[ tglx: Add IBPB status to spectre_v2 sysfs file ] + +Co-developed-by: KarimAllah Ahmed <karahmed@amazon.de> +Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: ak@linux.intel.com +Cc: ashok.raj@intel.com +Cc: dave.hansen@intel.com +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 2 ++ + arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ + arch/x86/kernel/cpu/bugs.c | 10 +++++++++- + 3 files changed, 24 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index c4d03e7..3901545 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -202,6 +202,8 @@ + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ + #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + ++#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ ++ + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ + #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 4ad4108..34e384c 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -218,5 +218,18 @@ static inline void vmexit_fill_RSB(void) + #endif + } + ++static inline void indirect_branch_prediction_barrier(void) ++{ ++ asm volatile(ALTERNATIVE("", ++ "movl %[msr], %%ecx\n\t" ++ "movl %[val], %%eax\n\t" ++ "movl $0, %%edx\n\t" ++ "wrmsr", ++ X86_FEATURE_IBPB) ++ : : [msr] "i" (MSR_IA32_PRED_CMD), ++ [val] "i" (PRED_CMD_IBPB) ++ : "eax", "ecx", "edx", "memory"); ++} ++ + #endif /* __ASSEMBLY__ */ + #endif /* __NOSPEC_BRANCH_H__ */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 4cea7d4..1c4b39d 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -262,6 +262,13 @@ static void __init spectre_v2_select_mitigation(void) + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Filling RSB on context switch\n"); + } ++ ++ /* Initialize Indirect Branch Prediction Barrier if supported */ ++ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || ++ boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { ++ setup_force_cpu_cap(X86_FEATURE_IBPB); ++ pr_info("Enabling Indirect Branch Prediction Barrier\n"); ++ } + } + + #undef pr_fmt +@@ -291,7 +298,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + +- return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], ++ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ++ boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", + spectre_v2_bad_module ? " - vulnerable module loaded" : ""); + } + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Move-firmware_restrict_branch_specul.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Move-firmware_restrict_branch_specul.patch new file mode 100644 index 00000000..29fb0352 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Move-firmware_restrict_branch_specul.patch @@ -0,0 +1,76 @@ +From 9c1c34861d012ab32557236c23a303e70bef627e Mon Sep 17 00:00:00 2001 +From: Ingo Molnar <mingo@kernel.org> +Date: Wed, 21 Feb 2018 09:20:37 +0100 +Subject: [PATCH 14/14] x86/speculation: Move + firmware_restrict_branch_speculation_*() from C to CPP + +commit d72f4e29e6d84b7ec02ae93088aa459ac70e733b upstream. + +firmware_restrict_branch_speculation_*() recently started using +preempt_enable()/disable(), but those are relatively high level +primitives and cause build failures on some 32-bit builds. + +Since we want to keep <asm/nospec-branch.h> low level, convert +them to macros to avoid header hell... + +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: arjan.van.de.ven@intel.com +Cc: bp@alien8.de +Cc: dave.hansen@intel.com +Cc: jmattson@google.com +Cc: karahmed@amazon.de +Cc: kvm@vger.kernel.org +Cc: pbonzini@redhat.com +Cc: rkrcmar@redhat.com +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 29e8f30..d0dabea 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -257,20 +257,22 @@ static inline void indirect_branch_prediction_barrier(void) + /* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. ++ * ++ * (Implemented as CPP macros due to header hell.) + */ +-static inline void firmware_restrict_branch_speculation_start(void) +-{ +- preempt_disable(); +- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, +- X86_FEATURE_USE_IBRS_FW); +-} ++#define firmware_restrict_branch_speculation_start() \ ++do { \ ++ preempt_disable(); \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ ++ X86_FEATURE_USE_IBRS_FW); \ ++} while (0) + +-static inline void firmware_restrict_branch_speculation_end(void) +-{ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, +- X86_FEATURE_USE_IBRS_FW); +- preempt_enable(); +-} ++#define firmware_restrict_branch_speculation_end() \ ++do { \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ ++ X86_FEATURE_USE_IBRS_FW); \ ++ preempt_enable(); \ ++} while (0) + + #endif /* __ASSEMBLY__ */ + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-kaiser-align-addition-to-x86-mm-Makefile.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-kaiser-align-addition-to-x86-mm-Makefile.patch new file mode 100644 index 00000000..3191e4d0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-kaiser-align-addition-to-x86-mm-Makefile.patch @@ -0,0 +1,28 @@ +From 3275e2dab198c30b32b9f0396ce6670e7c699136 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 3 Sep 2017 19:51:10 -0700 +Subject: [PATCH 015/102] kaiser: align addition to x86/mm/Makefile + +Use tab not space so they line up properly, kaslr.o also. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile +index 682c162..c505569 100644 +--- a/arch/x86/mm/Makefile ++++ b/arch/x86/mm/Makefile +@@ -37,5 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o + + obj-$(CONFIG_X86_INTEL_MPX) += mpx.o + obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o +-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +-obj-$(CONFIG_KAISER) += kaiser.o ++obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o ++obj-$(CONFIG_KAISER) += kaiser.o +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-x86-nospec-Fix-header-guards-names.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-x86-nospec-Fix-header-guards-names.patch new file mode 100644 index 00000000..e3c3192e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-x86-nospec-Fix-header-guards-names.patch @@ -0,0 +1,56 @@ +From d4cebbf42a124247c55852e555cea3e84b09e892 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Fri, 26 Jan 2018 13:11:37 +0100 +Subject: [PATCH 15/42] x86/nospec: Fix header guards names + +(cherry picked from commit 7a32fc51ca938e67974cbb9db31e1a43f98345a9) + +... to adhere to the _ASM_X86_ naming scheme. + +No functional change. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: riel@redhat.com +Cc: ak@linux.intel.com +Cc: peterz@infradead.org +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: jikos@kernel.org +Cc: luto@amacapital.net +Cc: dave.hansen@intel.com +Cc: torvalds@linux-foundation.org +Cc: keescook@google.com +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Cc: pjt@google.com +Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 34e384c..865192a 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -1,7 +1,7 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + +-#ifndef __NOSPEC_BRANCH_H__ +-#define __NOSPEC_BRANCH_H__ ++#ifndef _ASM_X86_NOSPEC_BRANCH_H_ ++#define _ASM_X86_NOSPEC_BRANCH_H_ + + #include <asm/alternative.h> + #include <asm/alternative-asm.h> +@@ -232,4 +232,4 @@ static inline void indirect_branch_prediction_barrier(void) + } + + #endif /* __ASSEMBLY__ */ +-#endif /* __NOSPEC_BRANCH_H__ */ ++#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-kaiser-cleanups-while-trying-for-gold-link.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-kaiser-cleanups-while-trying-for-gold-link.patch new file mode 100644 index 00000000..10263905 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-kaiser-cleanups-while-trying-for-gold-link.patch @@ -0,0 +1,141 @@ +From 0f85b3821ea7026375748e984dcd43fd27f6f80a Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Mon, 21 Aug 2017 20:11:43 -0700 +Subject: [PATCH 016/102] kaiser: cleanups while trying for gold link + +While trying to get our gold link to work, four cleanups: +matched the gdt_page declaration to its definition; +in fiddling unsuccessfully with PERCPU_INPUT(), lined up backslashes; +lined up the backslashes according to convention in percpu-defs.h; +deleted the unused irq_stack_pointer addition to irq_stack_union. + +Sad to report that aligning backslashes does not appear to help gold +align to 8192: but while these did not help, they are worth keeping. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/desc.h | 2 +- + arch/x86/include/asm/processor.h | 5 ----- + include/asm-generic/vmlinux.lds.h | 18 ++++++++---------- + include/linux/percpu-defs.h | 22 +++++++++++----------- + 4 files changed, 20 insertions(+), 27 deletions(-) + +diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h +index 12080d8..2ed5a2b 100644 +--- a/arch/x86/include/asm/desc.h ++++ b/arch/x86/include/asm/desc.h +@@ -43,7 +43,7 @@ struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; + } __attribute__((aligned(PAGE_SIZE))); + +-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); ++DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page); + + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) + { +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index 3d4784e2..8cb52ee 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -335,11 +335,6 @@ union irq_stack_union { + char gs_base[40]; + unsigned long stack_canary; + }; +- +- struct { +- char irq_stack_pointer[64]; +- char unused[IRQ_STACK_SIZE - 64]; +- }; + }; + + DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 0b16b5d..174f5c8 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -764,16 +764,14 @@ + */ + #define PERCPU_INPUT(cacheline) \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ +- \ +- VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ +- *(.data..percpu..first) \ +- . = ALIGN(cacheline); \ +- *(.data..percpu..user_mapped) \ +- *(.data..percpu..user_mapped..shared_aligned) \ +- . = ALIGN(PAGE_SIZE); \ +- *(.data..percpu..user_mapped..page_aligned) \ +- VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ +- \ ++ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ ++ *(.data..percpu..first) \ ++ . = ALIGN(cacheline); \ ++ *(.data..percpu..user_mapped) \ ++ *(.data..percpu..user_mapped..shared_aligned) \ ++ . = ALIGN(PAGE_SIZE); \ ++ *(.data..percpu..user_mapped..page_aligned) \ ++ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..page_aligned) \ + . = ALIGN(cacheline); \ +diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h +index 8ea945f..cfe13cb 100644 +--- a/include/linux/percpu-defs.h ++++ b/include/linux/percpu-defs.h +@@ -121,10 +121,10 @@ + #define DEFINE_PER_CPU(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "") + +-#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ ++#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + +-#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ ++#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + + /* +@@ -156,11 +156,11 @@ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +-#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ ++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +-#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ ++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +@@ -185,18 +185,18 @@ + /* + * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. + */ +-#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ +- DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ +- __aligned(PAGE_SIZE) ++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ ++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ ++ __aligned(PAGE_SIZE) + +-#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ +- DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ +- __aligned(PAGE_SIZE) ++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ ++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ ++ __aligned(PAGE_SIZE) + + /* + * Declaration/definition used for per-CPU variables that must be read mostly. + */ +-#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ ++#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..read_mostly") + + #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch new file mode 100644 index 00000000..c7571ac4 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch @@ -0,0 +1,55 @@ +From 50014cf904736f358e41d1fb1337d10f92b40aa7 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Fri, 26 Jan 2018 13:11:39 +0100 +Subject: [PATCH 16/42] x86/bugs: Drop one "mitigation" from dmesg + +(cherry picked from commit 55fa19d3e51f33d9cd4056d25836d93abf9438db) + +Make + +[ 0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline + +into + +[ 0.031118] Spectre V2: Mitigation: Full generic retpoline + +to reduce the mitigation mitigations strings. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: riel@redhat.com +Cc: ak@linux.intel.com +Cc: peterz@infradead.org +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: jikos@kernel.org +Cc: luto@amacapital.net +Cc: dave.hansen@intel.com +Cc: torvalds@linux-foundation.org +Cc: keescook@google.com +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: tim.c.chen@linux.intel.com +Cc: pjt@google.com +Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 1c4b39d..674ad46 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -90,7 +90,7 @@ static const char *spectre_v2_strings[] = { + }; + + #undef pr_fmt +-#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt ++#define pr_fmt(fmt) "Spectre V2 : " fmt + + static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + static bool spectre_v2_bad_module; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch new file mode 100644 index 00000000..ef495fe1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch @@ -0,0 +1,70 @@ +From 3335fb9c9dc5f3099f659a09423e5e1d1c264f03 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sat, 9 Sep 2017 17:31:18 -0700 +Subject: [PATCH 017/102] kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET + +There's a 0x1000 in various places, which looks better with a name. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 4 ++-- + arch/x86/include/asm/kaiser.h | 7 +++++-- + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 57f7993..3c8fc97 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1316,7 +1316,7 @@ ENTRY(nmi) + movq %cr3, %rax + pushq %rax + #ifdef CONFIG_KAISER_REAL_SWITCH +- andq $(~0x1000), %rax ++ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax + #endif + movq %rax, %cr3 + #endif +@@ -1559,7 +1559,7 @@ end_repeat_nmi: + movq %cr3, %rax + pushq %rax + #ifdef CONFIG_KAISER_REAL_SWITCH +- andq $(~0x1000), %rax ++ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax + #endif + movq %rax, %cr3 + #endif +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 7394ba9..051acf6 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -13,13 +13,16 @@ + * A minimalistic kernel mapping holds the parts needed to be mapped in user + * mode, such as the entry/exit functions of the user space, or the stacks. + */ ++ ++#define KAISER_SHADOW_PGD_OFFSET 0x1000 ++ + #ifdef __ASSEMBLY__ + #ifdef CONFIG_KAISER + + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg + #ifdef CONFIG_KAISER_REAL_SWITCH +-andq $(~0x1000), \reg ++andq $(~KAISER_SHADOW_PGD_OFFSET), \reg + #endif + movq \reg, %cr3 + .endm +@@ -27,7 +30,7 @@ movq \reg, %cr3 + .macro _SWITCH_TO_USER_CR3 reg + movq %cr3, \reg + #ifdef CONFIG_KAISER_REAL_SWITCH +-orq $(0x1000), \reg ++orq $(KAISER_SHADOW_PGD_OFFSET), \reg + #endif + movq \reg, %cr3 + .endm +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch new file mode 100644 index 00000000..f5232d18 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch @@ -0,0 +1,72 @@ +From 0af038c29f5df7028f229d2d4bf8ee7163db4cdd Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sat, 27 Jan 2018 15:45:14 +0100 +Subject: [PATCH 17/42] x86/cpu/bugs: Make retpoline module warning conditional +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +(cherry picked from commit e383095c7fe8d218e00ec0f83e4b95ed4e627b02) + +If sysfs is disabled and RETPOLINE not defined: + +arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used +[-Wunused-variable] + static bool spectre_v2_bad_module; + +Hide it. + +Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module") +Reported-by: Borislav Petkov <bp@alien8.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 674ad46..efe55c5 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -93,9 +93,10 @@ static const char *spectre_v2_strings[] = { + #define pr_fmt(fmt) "Spectre V2 : " fmt + + static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +-static bool spectre_v2_bad_module; + + #ifdef RETPOLINE ++static bool spectre_v2_bad_module; ++ + bool retpoline_module_ok(bool has_retpoline) + { + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) +@@ -105,6 +106,13 @@ bool retpoline_module_ok(bool has_retpoline) + spectre_v2_bad_module = true; + return false; + } ++ ++static inline const char *spectre_v2_module_string(void) ++{ ++ return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; ++} ++#else ++static inline const char *spectre_v2_module_string(void) { return ""; } + #endif + + static void __init spec2_print_if_insecure(const char *reason) +@@ -299,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], +- boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", +- spectre_v2_bad_module ? " - vulnerable module loaded" : ""); ++ boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", ++ spectre_v2_module_string()); + } + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch new file mode 100644 index 00000000..59f32c0b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch @@ -0,0 +1,85 @@ +From e1ccf8ed6f602560bbd73af01c9592347030c61c Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 3 Sep 2017 18:30:43 -0700 +Subject: [PATCH 018/102] kaiser: delete KAISER_REAL_SWITCH option + +We fail to see what CONFIG_KAISER_REAL_SWITCH is for: it seems to be +left over from early development, and now just obscures tricky parts +of the code. Delete it before adding PCIDs, or nokaiser boot option. + +(Or if there is some good reason to keep the option, then it needs +a help text - and a "depends on KAISER", so that all those without +KAISER are not asked the question. But we'd much rather delete it.) + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 4 ---- + arch/x86/include/asm/kaiser.h | 4 ---- + security/Kconfig | 4 ---- + 3 files changed, 12 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 3c8fc97..df33f10 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1315,9 +1315,7 @@ ENTRY(nmi) + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +-#ifdef CONFIG_KAISER_REAL_SWITCH + andq $(~KAISER_SHADOW_PGD_OFFSET), %rax +-#endif + movq %rax, %cr3 + #endif + call do_nmi +@@ -1558,9 +1556,7 @@ end_repeat_nmi: + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +-#ifdef CONFIG_KAISER_REAL_SWITCH + andq $(~KAISER_SHADOW_PGD_OFFSET), %rax +-#endif + movq %rax, %cr3 + #endif + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 051acf6..e0fc45e 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -21,17 +21,13 @@ + + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg +-#ifdef CONFIG_KAISER_REAL_SWITCH + andq $(~KAISER_SHADOW_PGD_OFFSET), \reg +-#endif + movq \reg, %cr3 + .endm + + .macro _SWITCH_TO_USER_CR3 reg + movq %cr3, \reg +-#ifdef CONFIG_KAISER_REAL_SWITCH + orq $(KAISER_SHADOW_PGD_OFFSET), \reg +-#endif + movq \reg, %cr3 + .endm + +diff --git a/security/Kconfig b/security/Kconfig +index dc78671..d8ae933 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -41,10 +41,6 @@ config KAISER + + If you are unsure how to answer this question, answer Y. + +-config KAISER_REAL_SWITCH +- bool "KAISER: actually switch page tables" +- default y +- + config SECURITYFS + bool "Enable the securityfs filesystem" + help +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch new file mode 100644 index 00000000..09e6e0ce --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch @@ -0,0 +1,181 @@ +From 9d680bb2dea42b419a94a55a4b65afb1b785b307 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Sat, 27 Jan 2018 16:24:32 +0000 +Subject: [PATCH 18/42] x86/cpufeatures: Clean up Spectre v2 related CPUID + flags + +(cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2) + +We want to expose the hardware features simply in /proc/cpuinfo as "ibrs", +"ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them +as the user-visible bits. + +When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB +capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP +bit is set, set the AMD STIBP that's used for the generic hardware +capability. + +Hide the rest from /proc/cpuinfo by putting "" in the comments. Including +RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are +patches to make the sysfs vulnerabilities information non-readable by +non-root, and the same should apply to all information about which +mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo. + +The feature bit for whether IBPB is actually used, which is needed for +ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB. + +Originally-by: Borislav Petkov <bp@suse.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: ak@linux.intel.com +Cc: dave.hansen@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 18 +++++++++--------- + arch/x86/include/asm/nospec-branch.h | 2 +- + arch/x86/kernel/cpu/bugs.c | 7 +++---- + arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++++---------- + 4 files changed, 34 insertions(+), 24 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 3901545..8eb23f5 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -194,15 +194,15 @@ + #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + +-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ ++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ ++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + +-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ ++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ + + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ + #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + +-#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ ++#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +@@ -260,9 +260,9 @@ + /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ + #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ + #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +-#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ +-#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ +-#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ ++#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ ++#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ ++#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +@@ -301,8 +301,8 @@ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ + #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +-#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ ++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ ++#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ + + /* +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 865192a..19ecb54 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -225,7 +225,7 @@ static inline void indirect_branch_prediction_barrier(void) + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", +- X86_FEATURE_IBPB) ++ X86_FEATURE_USE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index efe55c5..3a06718 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -272,9 +272,8 @@ static void __init spectre_v2_select_mitigation(void) + } + + /* Initialize Indirect Branch Prediction Barrier if supported */ +- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || +- boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { +- setup_force_cpu_cap(X86_FEATURE_IBPB); ++ if (boot_cpu_has(X86_FEATURE_IBPB)) { ++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Enabling Indirect Branch Prediction Barrier\n"); + } + } +@@ -307,7 +306,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], +- boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", ++ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + spectre_v2_module_string()); + } + #endif +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 4d23d78..2e257f8 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -140,17 +140,28 @@ static void early_init_intel(struct cpuinfo_x86 *c) + rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); + } + +- if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || +- cpu_has(c, X86_FEATURE_STIBP) || +- cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || +- cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || +- cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { +- pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); +- clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); ++ /* ++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, ++ * and they also have a different bit for STIBP support. Also, ++ * a hypervisor might have set the individual AMD bits even on ++ * Intel CPUs, for finer-grained selection of what's available. ++ */ ++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { ++ set_cpu_cap(c, X86_FEATURE_IBRS); ++ set_cpu_cap(c, X86_FEATURE_IBPB); ++ } ++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) ++ set_cpu_cap(c, X86_FEATURE_STIBP); ++ ++ /* Now if any of them are set, check the blacklist and clear the lot */ ++ if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || ++ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { ++ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); ++ clear_cpu_cap(c, X86_FEATURE_IBRS); ++ clear_cpu_cap(c, X86_FEATURE_IBPB); + clear_cpu_cap(c, X86_FEATURE_STIBP); +- clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); +- clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); +- clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); ++ clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); ++ clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch new file mode 100644 index 00000000..4abffa11 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch @@ -0,0 +1,122 @@ +From 5a28e367f6fd4c8e8c81ae99cf912d89930dd768 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sat, 9 Sep 2017 21:27:32 -0700 +Subject: [PATCH 019/102] kaiser: vmstat show NR_KAISERTABLE as nr_overhead + +The kaiser update made an interesting choice, never to free any shadow +page tables. Contention on global spinlock was worrying, particularly +with it held across page table scans when freeing. Something had to be +done: I was going to add refcounting; but simply never to free them is +an appealing choice, minimizing contention without complicating the code +(the more a page table is found already, the less the spinlock is used). + +But leaking pages in this way is also a worry: can we get away with it? +At the very least, we need a count to show how bad it actually gets: +in principle, one might end up wasting about 1/256 of memory that way +(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512 +for when they are user-mapped from the vmalloc area on another occasion +(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed). + +Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the +shared pgd entries, and 1 for each intermediate page table added +thereafter for user-mapping - but leave out the 1 per mm, for its +shadow pgd, because that distracts from the monotonic increase. +Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled). + +In practice, it doesn't look so bad so far: more like 1/12000 after +nine hours of gtests below; and movable pageblock segregation should +tend to cluster the kaiser tables into a subset of the address space +(if not, they will be bad for compaction too). But production may +tell a different story: keep an eye on this number, and bring back +lighter freeing if it gets out of control (maybe a shrinker). + +["nr_overhead" should of course say "nr_kaisertable", if it needs +to stay; but for the moment we are being coy, preferring that when +Joe Blow notices a new line in his /proc/vmstat, he does not get +too curious about what this "kaiser" stuff might be.] + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 16 +++++++++++----- + include/linux/mmzone.h | 3 ++- + mm/vmstat.c | 1 + + 3 files changed, 14 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 7a7e850..bd22ef5 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) + if (!new_pmd_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); +- if (pud_none(*pud)) ++ if (pud_none(*pud)) { + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); +- else ++ __inc_zone_page_state(virt_to_page((void *) ++ new_pmd_page), NR_KAISERTABLE); ++ } else + free_page(new_pmd_page); + spin_unlock(&shadow_table_allocation_lock); + } +@@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) + if (!new_pte_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); +- if (pmd_none(*pmd)) ++ if (pmd_none(*pmd)) { + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); +- else ++ __inc_zone_page_state(virt_to_page((void *) ++ new_pte_page), NR_KAISERTABLE); ++ } else + free_page(new_pte_page); + spin_unlock(&shadow_table_allocation_lock); + } +@@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(void) + pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); + for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { + pgd_t new_pgd; +- pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); ++ pud_t *pud = pud_alloc_one(&init_mm, ++ PAGE_OFFSET + i * PGDIR_SIZE); + if (!pud) { + WARN_ON(1); + break; + } ++ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE); + new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); + /* + * Make sure not to stomp on some other pgd entry. +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 7e273e2..0547d4f 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -124,8 +124,9 @@ enum zone_stat_item { + NR_SLAB_UNRECLAIMABLE, + NR_PAGETABLE, /* used for pagetables */ + NR_KERNEL_STACK_KB, /* measured in KiB */ +- /* Second 128 byte cacheline */ ++ NR_KAISERTABLE, + NR_BOUNCE, ++ /* Second 128 byte cacheline */ + #if IS_ENABLED(CONFIG_ZSMALLOC) + NR_ZSPAGES, /* allocated in zsmalloc */ + #endif +diff --git a/mm/vmstat.c b/mm/vmstat.c +index 604f26a..6a088df 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -932,6 +932,7 @@ const char * const vmstat_text[] = { + "nr_slab_unreclaimable", + "nr_page_table_pages", + "nr_kernel_stack", ++ "nr_overhead", + "nr_bounce", + #if IS_ENABLED(CONFIG_ZSMALLOC) + "nr_zspages", +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch new file mode 100644 index 00000000..60269d5c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch @@ -0,0 +1,261 @@ +From 53b3bd3747acd3d6633feaa63a998f854d90551c Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@alien8.de> +Date: Sat, 27 Jan 2018 16:24:33 +0000 +Subject: [PATCH 19/42] x86/retpoline: Simplify vmexit_fill_RSB() + +(cherry picked from commit 1dde7415e99933bb7293d6b2843752cbdb43ec11) + +Simplify it to call an asm-function instead of pasting 41 insn bytes at +every call site. Also, add alignment to the macro as suggested here: + + https://support.google.com/faqs/answer/7625886 + +[dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler] + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: ak@linux.intel.com +Cc: dave.hansen@intel.com +Cc: karahmed@amazon.de +Cc: arjan@linux.intel.com +Cc: torvalds@linux-foundation.org +Cc: peterz@infradead.org +Cc: bp@alien8.de +Cc: pbonzini@redhat.com +Cc: tim.c.chen@linux.intel.com +Cc: gregkh@linux-foundation.org +Link: https://lkml.kernel.org/r/1517070274-12128-3-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_32.S | 3 +- + arch/x86/entry/entry_64.S | 3 +- + arch/x86/include/asm/asm-prototypes.h | 3 ++ + arch/x86/include/asm/nospec-branch.h | 70 ++++------------------------------- + arch/x86/lib/Makefile | 1 + + arch/x86/lib/retpoline.S | 56 ++++++++++++++++++++++++++++ + 6 files changed, 71 insertions(+), 65 deletions(-) + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index a76dc73..f5434b4 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -237,7 +237,8 @@ ENTRY(__switch_to_asm) + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ +- FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW ++ /* Clobbers %ebx */ ++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + #endif + + /* restore callee-saved registers */ +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 16146eb..e422e15 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -435,7 +435,8 @@ ENTRY(__switch_to_asm) + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ +- FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW ++ /* Clobbers %rbx */ ++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + #endif + + /* restore callee-saved registers */ +diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h +index 5a25ada..1666542 100644 +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -37,4 +37,7 @@ INDIRECT_THUNK(dx) + INDIRECT_THUNK(si) + INDIRECT_THUNK(di) + INDIRECT_THUNK(bp) ++asmlinkage void __fill_rsb(void); ++asmlinkage void __clear_rsb(void); ++ + #endif /* CONFIG_RETPOLINE */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 19ecb54..df4ecec 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -7,50 +7,6 @@ + #include <asm/alternative-asm.h> + #include <asm/cpufeatures.h> + +-/* +- * Fill the CPU return stack buffer. +- * +- * Each entry in the RSB, if used for a speculative 'ret', contains an +- * infinite 'pause; lfence; jmp' loop to capture speculative execution. +- * +- * This is required in various cases for retpoline and IBRS-based +- * mitigations for the Spectre variant 2 vulnerability. Sometimes to +- * eliminate potentially bogus entries from the RSB, and sometimes +- * purely to ensure that it doesn't get empty, which on some CPUs would +- * allow predictions from other (unwanted!) sources to be used. +- * +- * We define a CPP macro such that it can be used from both .S files and +- * inline assembly. It's possible to do a .macro and then include that +- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. +- */ +- +-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +-#define RSB_FILL_LOOPS 16 /* To avoid underflow */ +- +-/* +- * Google experimented with loop-unrolling and this turned out to be +- * the optimal version — two calls, each with their own speculation +- * trap should their return address end up getting used, in a loop. +- */ +-#define __FILL_RETURN_BUFFER(reg, nr, sp) \ +- mov $(nr/2), reg; \ +-771: \ +- call 772f; \ +-773: /* speculation trap */ \ +- pause; \ +- lfence; \ +- jmp 773b; \ +-772: \ +- call 774f; \ +-775: /* speculation trap */ \ +- pause; \ +- lfence; \ +- jmp 775b; \ +-774: \ +- dec reg; \ +- jnz 771b; \ +- add $(BITS_PER_LONG/8) * nr, sp; +- + #ifdef __ASSEMBLY__ + + /* +@@ -121,17 +77,10 @@ + #endif + .endm + +- /* +- * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP +- * monstrosity above, manually. +- */ +-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ++/* This clobbers the BX register */ ++.macro FILL_RETURN_BUFFER nr:req ftr:req + #ifdef CONFIG_RETPOLINE +- ANNOTATE_NOSPEC_ALTERNATIVE +- ALTERNATIVE "jmp .Lskip_rsb_\@", \ +- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ +- \ftr +-.Lskip_rsb_\@: ++ ALTERNATIVE "", "call __clear_rsb", \ftr + #endif + .endm + +@@ -206,15 +155,10 @@ extern char __indirect_thunk_end[]; + static inline void vmexit_fill_RSB(void) + { + #ifdef CONFIG_RETPOLINE +- unsigned long loops; +- +- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE +- ALTERNATIVE("jmp 910f", +- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), +- X86_FEATURE_RETPOLINE) +- "910:" +- : "=r" (loops), ASM_CALL_CONSTRAINT +- : : "memory" ); ++ alternative_input("", ++ "call __fill_rsb", ++ X86_FEATURE_RETPOLINE, ++ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); + #endif + } + +diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile +index 6bf1898..4ad7c4d 100644 +--- a/arch/x86/lib/Makefile ++++ b/arch/x86/lib/Makefile +@@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o + lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o + lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o + lib-$(CONFIG_RETPOLINE) += retpoline.o ++OBJECT_FILES_NON_STANDARD_retpoline.o :=y + + obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o + +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index c909961..480edc3 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -7,6 +7,7 @@ + #include <asm/alternative-asm.h> + #include <asm/export.h> + #include <asm/nospec-branch.h> ++#include <asm/bitsperlong.h> + + .macro THUNK reg + .section .text.__x86.indirect_thunk +@@ -46,3 +47,58 @@ GENERATE_THUNK(r13) + GENERATE_THUNK(r14) + GENERATE_THUNK(r15) + #endif ++ ++/* ++ * Fill the CPU return stack buffer. ++ * ++ * Each entry in the RSB, if used for a speculative 'ret', contains an ++ * infinite 'pause; lfence; jmp' loop to capture speculative execution. ++ * ++ * This is required in various cases for retpoline and IBRS-based ++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to ++ * eliminate potentially bogus entries from the RSB, and sometimes ++ * purely to ensure that it doesn't get empty, which on some CPUs would ++ * allow predictions from other (unwanted!) sources to be used. ++ * ++ * Google experimented with loop-unrolling and this turned out to be ++ * the optimal version - two calls, each with their own speculation ++ * trap should their return address end up getting used, in a loop. ++ */ ++.macro STUFF_RSB nr:req sp:req ++ mov $(\nr / 2), %_ASM_BX ++ .align 16 ++771: ++ call 772f ++773: /* speculation trap */ ++ pause ++ lfence ++ jmp 773b ++ .align 16 ++772: ++ call 774f ++775: /* speculation trap */ ++ pause ++ lfence ++ jmp 775b ++ .align 16 ++774: ++ dec %_ASM_BX ++ jnz 771b ++ add $((BITS_PER_LONG/8) * \nr), \sp ++.endm ++ ++#define RSB_FILL_LOOPS 16 /* To avoid underflow */ ++ ++ENTRY(__fill_rsb) ++ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP ++ ret ++END(__fill_rsb) ++EXPORT_SYMBOL_GPL(__fill_rsb) ++ ++#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ ++ ++ENTRY(__clear_rsb) ++ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP ++ ret ++END(__clear_rsb) ++EXPORT_SYMBOL_GPL(__clear_rsb) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch new file mode 100644 index 00000000..85bdc307 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch @@ -0,0 +1,424 @@ +From 9bc1089baa5051f750a246af746e81bf1bb1fe09 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Wed, 30 Aug 2017 16:23:00 -0700 +Subject: [PATCH 020/102] kaiser: enhanced by kernel and user PCIDs + +Merged performance improvements to Kaiser, using distinct kernel +and user Process Context Identifiers to minimize the TLB flushing. + +[This work actually all from Dave Hansen 2017-08-30: +still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.] + +Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 10 ++++-- + arch/x86/entry/entry_64_compat.S | 1 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/kaiser.h | 15 +++++++-- + arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++++ + arch/x86/include/asm/tlbflush.h | 52 ++++++++++++++++++++++++----- + arch/x86/include/uapi/asm/processor-flags.h | 3 +- + arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++++ + arch/x86/kvm/x86.c | 3 +- + arch/x86/mm/kaiser.c | 7 ++++ + arch/x86/mm/tlb.c | 46 +++++++++++++++++++++++-- + 11 files changed, 181 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index df33f10..4a0ebf4 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1315,7 +1315,10 @@ ENTRY(nmi) + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax ++ /* mask off "user" bit of pgd address and 12 PCID bits: */ ++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax ++ /* Add back kernel PCID and "no flush" bit */ ++ orq X86_CR3_PCID_KERN_VAR, %rax + movq %rax, %cr3 + #endif + call do_nmi +@@ -1556,7 +1559,10 @@ end_repeat_nmi: + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax ++ /* mask off "user" bit of pgd address and 12 PCID bits: */ ++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax ++ /* Add back kernel PCID and "no flush" bit */ ++ orq X86_CR3_PCID_KERN_VAR, %rax + movq %rax, %cr3 + #endif + +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index f0e384e..0eb5801 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -13,6 +13,7 @@ + #include <asm/irqflags.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/pgtable_types.h> + #include <asm/kaiser.h> + #include <linux/linkage.h> + #include <linux/err.h> +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index ed10b5b..dc50883 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -189,6 +189,7 @@ + + #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ + #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ ++#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */ + + #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index e0fc45e..360ff3b 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -1,5 +1,8 @@ + #ifndef _ASM_X86_KAISER_H + #define _ASM_X86_KAISER_H ++ ++#include <uapi/asm/processor-flags.h> /* For PCID constants */ ++ + /* + * This file includes the definitions for the KAISER feature. + * KAISER is a counter measure against x86_64 side channel attacks on +@@ -21,13 +24,21 @@ + + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg +-andq $(~KAISER_SHADOW_PGD_OFFSET), \reg ++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg ++orq X86_CR3_PCID_KERN_VAR, \reg + movq \reg, %cr3 + .endm + + .macro _SWITCH_TO_USER_CR3 reg + movq %cr3, \reg +-orq $(KAISER_SHADOW_PGD_OFFSET), \reg ++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg ++/* ++ * This can obviously be one instruction by putting the ++ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. ++ * But, just leave it now for simplicity. ++ */ ++orq X86_CR3_PCID_USER_VAR, \reg ++orq $(KAISER_SHADOW_PGD_OFFSET), \reg + movq \reg, %cr3 + .endm + +diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h +index 8bc8d02..ada77fd 100644 +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -141,6 +141,32 @@ + _PAGE_SOFT_DIRTY) + #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) + ++/* The ASID is the lower 12 bits of CR3 */ ++#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL)) ++ ++/* Mask for all the PCID-related bits in CR3: */ ++#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) ++#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) ++#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) ++#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) ++ ++#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) ++#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) ++#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) ++#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) ++#else ++#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) ++#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) ++/* ++ * PCIDs are unsupported on 32-bit and none of these bits can be ++ * set in CR3: ++ */ ++#define X86_CR3_PCID_KERN_FLUSH (0) ++#define X86_CR3_PCID_USER_FLUSH (0) ++#define X86_CR3_PCID_KERN_NOFLUSH (0) ++#define X86_CR3_PCID_USER_NOFLUSH (0) ++#endif ++ + /* + * The cache modes defined here are used to translate between pure SW usage + * and the HW defined cache mode bits and/or PAT entries. +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index c13041e..28b4182 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -12,7 +12,6 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) + { + struct { u64 d[2]; } desc = { { pcid, addr } }; +- + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global +@@ -135,14 +134,25 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) + + static inline void __native_flush_tlb(void) + { ++ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { ++ /* ++ * If current->mm == NULL then we borrow a mm which may change during a ++ * task switch and therefore we must not be preempted while we write CR3 ++ * back: ++ */ ++ preempt_disable(); ++ native_write_cr3(native_read_cr3()); ++ preempt_enable(); ++ return; ++ } + /* +- * If current->mm == NULL then we borrow a mm which may change during a +- * task switch and therefore we must not be preempted while we write CR3 +- * back: ++ * We are no longer using globals with KAISER, so a ++ * "nonglobals" flush would work too. But, this is more ++ * conservative. ++ * ++ * Note, this works with CR4.PCIDE=0 or 1. + */ +- preempt_disable(); +- native_write_cr3(native_read_cr3()); +- preempt_enable(); ++ invpcid_flush_all(); + } + + static inline void __native_flush_tlb_global_irq_disabled(void) +@@ -164,6 +174,8 @@ static inline void __native_flush_tlb_global(void) + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. ++ * ++ * Note, this works with CR4.PCIDE=0 or 1. + */ + invpcid_flush_all(); + return; +@@ -183,7 +195,31 @@ static inline void __native_flush_tlb_global(void) + + static inline void __native_flush_tlb_single(unsigned long addr) + { +- asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); ++ /* ++ * SIMICS #GP's if you run INVPCID with type 2/3 ++ * and X86_CR4_PCIDE clear. Shame! ++ * ++ * The ASIDs used below are hard-coded. But, we must not ++ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call ++ * invpcid in the case we are called early. ++ */ ++ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { ++ asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); ++ return; ++ } ++ /* Flush the address out of both PCIDs. */ ++ /* ++ * An optimization here might be to determine addresses ++ * that are only kernel-mapped and only flush the kernel ++ * ASID. But, userspace flushes are probably much more ++ * important performance-wise. ++ * ++ * Make sure to do only a single invpcid when KAISER is ++ * disabled and we have only a single ASID. ++ */ ++ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) ++ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); ++ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); + } + + static inline void __flush_tlb_all(void) +diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h +index 567de50..6768d13 100644 +--- a/arch/x86/include/uapi/asm/processor-flags.h ++++ b/arch/x86/include/uapi/asm/processor-flags.h +@@ -77,7 +77,8 @@ + #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) + #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ + #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) +-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ ++#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ ++#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) + + /* + * Intel CPU features in CR4 +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 3efde13..b4c0ae5 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -324,11 +324,45 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) + } + } + ++/* ++ * These can have bit 63 set, so we can not just use a plain "or" ++ * instruction to get their value or'd into CR3. It would take ++ * another register. So, we use a memory reference to these ++ * instead. ++ * ++ * This is also handy because systems that do not support ++ * PCIDs just end up or'ing a 0 into their CR3, which does ++ * no harm. ++ */ ++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; ++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; ++ + static void setup_pcid(struct cpuinfo_x86 *c) + { + if (cpu_has(c, X86_FEATURE_PCID)) { + if (cpu_has(c, X86_FEATURE_PGE)) { + cr4_set_bits(X86_CR4_PCIDE); ++ /* ++ * These variables are used by the entry/exit ++ * code to change PCIDs. ++ */ ++#ifdef CONFIG_KAISER ++ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; ++ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; ++#endif ++ /* ++ * INVPCID has two "groups" of types: ++ * 1/2: Invalidate an individual address ++ * 3/4: Invalidate all contexts ++ * ++ * 1/2 take a PCID, but 3/4 do not. So, 3/4 ++ * ignore the PCID argument in the descriptor. ++ * But, we have to be careful not to call 1/2 ++ * with an actual non-zero PCID in them before ++ * we do the above cr4_set_bits(). ++ */ ++ if (cpu_has(c, X86_FEATURE_INVPCID)) ++ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index e5bc139..51a700a 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) + return 1; + + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ +- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) ++ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) || ++ !is_long_mode(vcpu)) + return 1; + } + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index bd22ef5..f5c75f7 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(void) + } while (0) + + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; ++extern unsigned long X86_CR3_PCID_KERN_VAR; ++extern unsigned long X86_CR3_PCID_USER_VAR; + /* + * If anything in here fails, we will likely die on one of the + * first kernel->user transitions and init will die. But, we +@@ -289,6 +291,11 @@ void __init kaiser_init(void) + kaiser_add_user_map_early(&debug_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); ++ ++ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, ++ __PAGE_KERNEL); ++ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, ++ __PAGE_KERNEL); + } + + /* Add a mapping to the shadow mapping, and synchronize the mappings */ +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index a7655f6..a376246 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -36,6 +36,46 @@ struct flush_tlb_info { + unsigned long flush_end; + }; + ++static void load_new_mm_cr3(pgd_t *pgdir) ++{ ++ unsigned long new_mm_cr3 = __pa(pgdir); ++ ++ /* ++ * KAISER, plus PCIDs needs some extra work here. But, ++ * if either of features is not present, we need no ++ * PCIDs here and just do a normal, full TLB flush with ++ * the write_cr3() ++ */ ++ if (!IS_ENABLED(CONFIG_KAISER) || ++ !cpu_feature_enabled(X86_FEATURE_PCID)) ++ goto out_set_cr3; ++ /* ++ * We reuse the same PCID for different tasks, so we must ++ * flush all the entires for the PCID out when we change ++ * tasks. ++ */ ++ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); ++ ++ /* ++ * The flush from load_cr3() may leave old TLB entries ++ * for userspace in place. We must flush that context ++ * separately. We can theoretically delay doing this ++ * until we actually load up the userspace CR3, but ++ * that's a bit tricky. We have to have the "need to ++ * flush userspace PCID" bit per-cpu and check it in the ++ * exit-to-userspace paths. ++ */ ++ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); ++ ++out_set_cr3: ++ /* ++ * Caution: many callers of this function expect ++ * that load_cr3() is serializing and orders TLB ++ * fills with respect to the mm_cpumask writes. ++ */ ++ write_cr3(new_mm_cr3); ++} ++ + /* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. +@@ -47,7 +87,7 @@ void leave_mm(int cpu) + BUG(); + if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { + cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); +- load_cr3(swapper_pg_dir); ++ load_new_mm_cr3(swapper_pg_dir); + /* + * This gets called in the idle path where RCU + * functions differently. Tracing normally +@@ -126,7 +166,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + * ordering guarantee we need. + * + */ +- load_cr3(next->pgd); ++ load_new_mm_cr3(next->pgd); + + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + +@@ -175,7 +215,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. + */ +- load_cr3(next->pgd); ++ load_new_mm_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + load_mm_cr4(next); + load_mm_ldt(next); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch new file mode 100644 index 00000000..9a62cf34 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch @@ -0,0 +1,53 @@ +From 3ae5467002f15c1915b67a45af81dded8b451533 Mon Sep 17 00:00:00 2001 +From: Dou Liyang <douly.fnst@cn.fujitsu.com> +Date: Tue, 30 Jan 2018 14:13:50 +0800 +Subject: [PATCH 20/42] x86/spectre: Check CONFIG_RETPOLINE in command line + parser + +(cherry picked from commit 9471eee9186a46893726e22ebb54cade3f9bc043) + +The spectre_v2 option 'auto' does not check whether CONFIG_RETPOLINE is +enabled. As a consequence it fails to emit the appropriate warning and sets +feature flags which have no effect at all. + +Add the missing IS_ENABLED() check. + +Fixes: da285121560e ("x86/spectre: Add boot time option to select Spectre v2 mitigation") +Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: ak@linux.intel.com +Cc: peterz@infradead.org +Cc: Tomohiro <misono.tomohiro@jp.fujitsu.com> +Cc: dave.hansen@intel.com +Cc: bp@alien8.de +Cc: arjan@linux.intel.com +Cc: dwmw@amazon.co.uk +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/f5892721-7528-3647-08fb-f8d10e65ad87@cn.fujitsu.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 3a06718..51624c6 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -212,10 +212,10 @@ static void __init spectre_v2_select_mitigation(void) + return; + + case SPECTRE_V2_CMD_FORCE: +- /* FALLTRHU */ + case SPECTRE_V2_CMD_AUTO: +- goto retpoline_auto; +- ++ if (IS_ENABLED(CONFIG_RETPOLINE)) ++ goto retpoline_auto; ++ break; + case SPECTRE_V2_CMD_RETPOLINE_AMD: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_amd; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch new file mode 100644 index 00000000..e949fb58 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch @@ -0,0 +1,403 @@ +From efc1ec625e63752ab337e0b151068400535aa861 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Thu, 17 Aug 2017 15:00:37 -0700 +Subject: [PATCH 021/102] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush + user + +We have many machines (Westmere, Sandybridge, Ivybridge) supporting +PCID but not INVPCID: on these load_new_mm_cr3() simply crashed. + +Flushing user context inside load_new_mm_cr3() without the use of +invpcid is difficult: momentarily switch from kernel to user context +and back to do so? I'm not sure whether that can be safely done at +all, and would risk polluting user context with kernel internals, +and kernel context with stale user externals. + +Instead, follow the hint in the comment that was there: change +X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3() +can leave a note in it, for SWITCH_USER_CR3 on return to userspace to +flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH. + +Which works well enough that there's no need to do it this way only +when invpcid is unsupported: it's a good alternative to invpcid here. +But there's a couple of inlines in asm/tlbflush.h that need to do the +same trick, so it's best to localize all this per-cpu business in +mm/kaiser.c: moving that part of the initialization from setup_pcid() +to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the +function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a +KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit. + +I did try to make the feature tests in asm/tlbflush.h more consistent +with each other: there seem to be far too many ways of performing such +tests, and I don't have a good grasp of their differences. At first +I converted them all to be static_cpu_has(): but that proved to be a +mistake, as the comment in __native_flush_tlb_single() hints; so then +I reversed and made them all this_cpu_has(). Probably all gratuitous +change, but that's the way it's working at present. + +I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR +gets re-initialized by each cpu (before and after these changes): +no problem when (as usual) all cpus on a machine have the same +features, but in principle incorrect. However, my experiment +to per-cpu-ify that one did not end well... + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kaiser.h | 18 +++++++------ + arch/x86/include/asm/tlbflush.h | 56 ++++++++++++++++++++++++++++------------- + arch/x86/kernel/cpu/common.c | 22 +--------------- + arch/x86/mm/kaiser.c | 50 +++++++++++++++++++++++++++++++----- + arch/x86/mm/tlb.c | 46 +++++++++++++-------------------- + 5 files changed, 113 insertions(+), 79 deletions(-) + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 360ff3b..009bca5 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -32,13 +32,12 @@ movq \reg, %cr3 + .macro _SWITCH_TO_USER_CR3 reg + movq %cr3, \reg + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +-/* +- * This can obviously be one instruction by putting the +- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. +- * But, just leave it now for simplicity. +- */ +-orq X86_CR3_PCID_USER_VAR, \reg +-orq $(KAISER_SHADOW_PGD_OFFSET), \reg ++orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg ++js 9f ++// FLUSH this time, reset to NOFLUSH for next time ++// But if nopcid? Consider using 0x80 for user pcid? ++movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) ++9: + movq \reg, %cr3 + .endm + +@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + */ + DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + ++extern unsigned long X86_CR3_PCID_KERN_VAR; ++DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); ++ ++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; ++ + /** + * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping + * @addr: the start address of the range +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 28b4182..4fff696 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -12,6 +12,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) + { + struct { u64 d[2]; } desc = { { pcid, addr } }; ++ + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global +@@ -132,27 +133,42 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) + cr4_set_bits(mask); + } + ++/* ++ * Declare a couple of kaiser interfaces here for convenience, ++ * to avoid the need for asm/kaiser.h in unexpected places. ++ */ ++#ifdef CONFIG_KAISER ++extern void kaiser_setup_pcid(void); ++extern void kaiser_flush_tlb_on_return_to_user(void); ++#else ++static inline void kaiser_setup_pcid(void) ++{ ++} ++static inline void kaiser_flush_tlb_on_return_to_user(void) ++{ ++} ++#endif ++ + static inline void __native_flush_tlb(void) + { +- if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { ++ if (this_cpu_has(X86_FEATURE_INVPCID)) { + /* +- * If current->mm == NULL then we borrow a mm which may change during a +- * task switch and therefore we must not be preempted while we write CR3 +- * back: ++ * Note, this works with CR4.PCIDE=0 or 1. + */ +- preempt_disable(); +- native_write_cr3(native_read_cr3()); +- preempt_enable(); ++ invpcid_flush_all_nonglobals(); + return; + } ++ + /* +- * We are no longer using globals with KAISER, so a +- * "nonglobals" flush would work too. But, this is more +- * conservative. +- * +- * Note, this works with CR4.PCIDE=0 or 1. ++ * If current->mm == NULL then we borrow a mm which may change during a ++ * task switch and therefore we must not be preempted while we write CR3 ++ * back: + */ +- invpcid_flush_all(); ++ preempt_disable(); ++ if (this_cpu_has(X86_FEATURE_PCID)) ++ kaiser_flush_tlb_on_return_to_user(); ++ native_write_cr3(native_read_cr3()); ++ preempt_enable(); + } + + static inline void __native_flush_tlb_global_irq_disabled(void) +@@ -168,9 +184,13 @@ static inline void __native_flush_tlb_global_irq_disabled(void) + + static inline void __native_flush_tlb_global(void) + { ++#ifdef CONFIG_KAISER ++ /* Globals are not used at all */ ++ __native_flush_tlb(); ++#else + unsigned long flags; + +- if (static_cpu_has(X86_FEATURE_INVPCID)) { ++ if (this_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. +@@ -187,10 +207,9 @@ static inline void __native_flush_tlb_global(void) + * be called from deep inside debugging code.) + */ + raw_local_irq_save(flags); +- + __native_flush_tlb_global_irq_disabled(); +- + raw_local_irq_restore(flags); ++#endif + } + + static inline void __native_flush_tlb_single(unsigned long addr) +@@ -201,9 +220,12 @@ static inline void __native_flush_tlb_single(unsigned long addr) + * + * The ASIDs used below are hard-coded. But, we must not + * call invpcid(type=1/2) before CR4.PCIDE=1. Just call +- * invpcid in the case we are called early. ++ * invlpg in the case we are called early. + */ ++ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { ++ if (this_cpu_has(X86_FEATURE_PCID)) ++ kaiser_flush_tlb_on_return_to_user(); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + return; + } +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index b4c0ae5..e6be5f3 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -324,33 +324,12 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) + } + } + +-/* +- * These can have bit 63 set, so we can not just use a plain "or" +- * instruction to get their value or'd into CR3. It would take +- * another register. So, we use a memory reference to these +- * instead. +- * +- * This is also handy because systems that do not support +- * PCIDs just end up or'ing a 0 into their CR3, which does +- * no harm. +- */ +-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; +-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; +- + static void setup_pcid(struct cpuinfo_x86 *c) + { + if (cpu_has(c, X86_FEATURE_PCID)) { + if (cpu_has(c, X86_FEATURE_PGE)) { + cr4_set_bits(X86_CR4_PCIDE); + /* +- * These variables are used by the entry/exit +- * code to change PCIDs. +- */ +-#ifdef CONFIG_KAISER +- X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; +- X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; +-#endif +- /* + * INVPCID has two "groups" of types: + * 1/2: Invalidate an individual address + * 3/4: Invalidate all contexts +@@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x86 *c) + clear_cpu_cap(c, X86_FEATURE_PCID); + } + } ++ kaiser_setup_pcid(); + } + + /* +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index f5c75f7..7056840 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -11,12 +11,26 @@ + #include <linux/uaccess.h> + + #include <asm/kaiser.h> ++#include <asm/tlbflush.h> /* to verify its kaiser declarations */ + #include <asm/pgtable.h> + #include <asm/pgalloc.h> + #include <asm/desc.h> ++ + #ifdef CONFIG_KAISER ++__visible ++DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); ++ ++/* ++ * These can have bit 63 set, so we can not just use a plain "or" ++ * instruction to get their value or'd into CR3. It would take ++ * another register. So, we use a memory reference to these instead. ++ * ++ * This is also handy because systems that do not support PCIDs ++ * just end up or'ing a 0 into their CR3, which does no harm. ++ */ ++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR; ++DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); + +-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + /* + * At runtime, the only things we map are some things for CPU + * hotplug, and stacks for new processes. No two CPUs will ever +@@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(void) + WARN_ON(__ret); \ + } while (0) + +-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +-extern unsigned long X86_CR3_PCID_KERN_VAR; +-extern unsigned long X86_CR3_PCID_USER_VAR; + /* + * If anything in here fails, we will likely die on one of the + * first kernel->user transitions and init will die. But, we +@@ -294,8 +305,6 @@ void __init kaiser_init(void) + + kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, + __PAGE_KERNEL); +- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, +- __PAGE_KERNEL); + } + + /* Add a mapping to the shadow mapping, and synchronize the mappings */ +@@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) + } + return pgd; + } ++ ++void kaiser_setup_pcid(void) ++{ ++ unsigned long kern_cr3 = 0; ++ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; ++ ++ if (this_cpu_has(X86_FEATURE_PCID)) { ++ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH; ++ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; ++ } ++ /* ++ * These variables are used by the entry/exit ++ * code to change PCID and pgd and TLB flushing. ++ */ ++ X86_CR3_PCID_KERN_VAR = kern_cr3; ++ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3); ++} ++ ++/* ++ * Make a note that this cpu will need to flush USER tlb on return to user. ++ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling: ++ * if cpu does not, then the NOFLUSH bit will never have been set. ++ */ ++void kaiser_flush_tlb_on_return_to_user(void) ++{ ++ this_cpu_write(X86_CR3_PCID_USER_VAR, ++ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); ++} ++EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); + #endif /* CONFIG_KAISER */ +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index a376246..a2532d4 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -6,13 +6,14 @@ + #include <linux/interrupt.h> + #include <linux/export.h> + #include <linux/cpu.h> ++#include <linux/debugfs.h> + + #include <asm/tlbflush.h> + #include <asm/mmu_context.h> + #include <asm/cache.h> + #include <asm/apic.h> + #include <asm/uv/uv.h> +-#include <linux/debugfs.h> ++#include <asm/kaiser.h> + + /* + * Smarter SMP flushing macros. +@@ -40,34 +41,23 @@ static void load_new_mm_cr3(pgd_t *pgdir) + { + unsigned long new_mm_cr3 = __pa(pgdir); + +- /* +- * KAISER, plus PCIDs needs some extra work here. But, +- * if either of features is not present, we need no +- * PCIDs here and just do a normal, full TLB flush with +- * the write_cr3() +- */ +- if (!IS_ENABLED(CONFIG_KAISER) || +- !cpu_feature_enabled(X86_FEATURE_PCID)) +- goto out_set_cr3; +- /* +- * We reuse the same PCID for different tasks, so we must +- * flush all the entires for the PCID out when we change +- * tasks. +- */ +- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); +- +- /* +- * The flush from load_cr3() may leave old TLB entries +- * for userspace in place. We must flush that context +- * separately. We can theoretically delay doing this +- * until we actually load up the userspace CR3, but +- * that's a bit tricky. We have to have the "need to +- * flush userspace PCID" bit per-cpu and check it in the +- * exit-to-userspace paths. +- */ +- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); ++#ifdef CONFIG_KAISER ++ if (this_cpu_has(X86_FEATURE_PCID)) { ++ /* ++ * We reuse the same PCID for different tasks, so we must ++ * flush all the entries for the PCID out when we change tasks. ++ * Flush KERN below, flush USER when returning to userspace in ++ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro. ++ * ++ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could ++ * do it here, but can only be used if X86_FEATURE_INVPCID is ++ * available - and many machines support pcid without invpcid. ++ */ ++ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; ++ kaiser_flush_tlb_on_return_to_user(); ++ } ++#endif /* CONFIG_KAISER */ + +-out_set_cr3: + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch new file mode 100644 index 00000000..c476da81 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch @@ -0,0 +1,207 @@ +From 18dacfea13d15dbf2fa1037cf76ee463c52af031 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Sun, 28 Jan 2018 10:38:49 -0800 +Subject: [PATCH 21/42] x86/entry/64: Remove the SYSCALL64 fast path + +(cherry picked from commit 21d375b6b34ff511a507de27bf316b3dde6938d9) + +The SYCALLL64 fast path was a nice, if small, optimization back in the good +old days when syscalls were actually reasonably fast. Now there is PTI to +slow everything down, and indirect branches are verboten, making everything +messier. The retpoline code in the fast path is particularly nasty. + +Just get rid of the fast path. The slow path is barely slower. + +[ tglx: Split out the 'push all extra regs' part ] + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Kernel Hardening <kernel-hardening@lists.openwall.com> +Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 123 +------------------------------------------- + arch/x86/entry/syscall_64.c | 7 +-- + 2 files changed, 3 insertions(+), 127 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index e422e15..4360253 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -179,94 +179,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) + pushq %r11 /* pt_regs->r11 */ + sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ + +- /* +- * If we need to do entry work or if we guess we'll need to do +- * exit work, go straight to the slow path. +- */ +- movq PER_CPU_VAR(current_task), %r11 +- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) +- jnz entry_SYSCALL64_slow_path +- +-entry_SYSCALL_64_fastpath: +- /* +- * Easy case: enable interrupts and issue the syscall. If the syscall +- * needs pt_regs, we'll call a stub that disables interrupts again +- * and jumps to the slow path. +- */ +- TRACE_IRQS_ON +- ENABLE_INTERRUPTS(CLBR_NONE) +-#if __SYSCALL_MASK == ~0 +- cmpq $__NR_syscall_max, %rax +-#else +- andl $__SYSCALL_MASK, %eax +- cmpl $__NR_syscall_max, %eax +-#endif +- ja 1f /* return -ENOSYS (already in pt_regs->ax) */ +- movq %r10, %rcx +- +- /* +- * This call instruction is handled specially in stub_ptregs_64. +- * It might end up jumping to the slow path. If it jumps, RAX +- * and all argument registers are clobbered. +- */ +-#ifdef CONFIG_RETPOLINE +- movq sys_call_table(, %rax, 8), %rax +- call __x86_indirect_thunk_rax +-#else +- call *sys_call_table(, %rax, 8) +-#endif +-.Lentry_SYSCALL_64_after_fastpath_call: +- +- movq %rax, RAX(%rsp) +-1: +- +- /* +- * If we get here, then we know that pt_regs is clean for SYSRET64. +- * If we see that no exit work is required (which we are required +- * to check with IRQs off), then we can go straight to SYSRET64. +- */ +- DISABLE_INTERRUPTS(CLBR_NONE) +- TRACE_IRQS_OFF +- movq PER_CPU_VAR(current_task), %r11 +- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) +- jnz 1f +- +- LOCKDEP_SYS_EXIT +- TRACE_IRQS_ON /* user mode is traced as IRQs on */ +- movq RIP(%rsp), %rcx +- movq EFLAGS(%rsp), %r11 +- RESTORE_C_REGS_EXCEPT_RCX_R11 +- /* +- * This opens a window where we have a user CR3, but are +- * running in the kernel. This makes using the CS +- * register useless for telling whether or not we need to +- * switch CR3 in NMIs. Normal interrupts are OK because +- * they are off here. +- */ +- SWITCH_USER_CR3 +- movq RSP(%rsp), %rsp +- USERGS_SYSRET64 +- +-1: +- /* +- * The fast path looked good when we started, but something changed +- * along the way and we need to switch to the slow path. Calling +- * raise(3) will trigger this, for example. IRQs are off. +- */ +- TRACE_IRQS_ON +- ENABLE_INTERRUPTS(CLBR_NONE) +- SAVE_EXTRA_REGS +- movq %rsp, %rdi +- call syscall_return_slowpath /* returns with IRQs disabled */ +- jmp return_from_SYSCALL_64 +- +-entry_SYSCALL64_slow_path: + /* IRQs are off. */ + SAVE_EXTRA_REGS + movq %rsp, %rdi + call do_syscall_64 /* returns with IRQs disabled */ + +-return_from_SYSCALL_64: + RESTORE_EXTRA_REGS + TRACE_IRQS_IRETQ /* we're about to change IF */ + +@@ -339,6 +256,7 @@ return_from_SYSCALL_64: + syscall_return_via_sysret: + /* rcx and r11 are already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_RCX_R11 ++ + /* + * This opens a window where we have a user CR3, but are + * running in the kernel. This makes using the CS +@@ -363,45 +281,6 @@ opportunistic_sysret_failed: + jmp restore_c_regs_and_iret + END(entry_SYSCALL_64) + +-ENTRY(stub_ptregs_64) +- /* +- * Syscalls marked as needing ptregs land here. +- * If we are on the fast path, we need to save the extra regs, +- * which we achieve by trying again on the slow path. If we are on +- * the slow path, the extra regs are already saved. +- * +- * RAX stores a pointer to the C function implementing the syscall. +- * IRQs are on. +- */ +- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) +- jne 1f +- +- /* +- * Called from fast path -- disable IRQs again, pop return address +- * and jump to slow path +- */ +- DISABLE_INTERRUPTS(CLBR_NONE) +- TRACE_IRQS_OFF +- popq %rax +- jmp entry_SYSCALL64_slow_path +- +-1: +- JMP_NOSPEC %rax /* Called from C */ +-END(stub_ptregs_64) +- +-.macro ptregs_stub func +-ENTRY(ptregs_\func) +- leaq \func(%rip), %rax +- jmp stub_ptregs_64 +-END(ptregs_\func) +-.endm +- +-/* Instantiate ptregs_stub for each ptregs-using syscall */ +-#define __SYSCALL_64_QUAL_(sym) +-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym +-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) +-#include <asm/syscalls_64.h> +- + /* + * %rdi: prev task + * %rsi: next task +diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c +index 9dbc5ab..6705edd 100644 +--- a/arch/x86/entry/syscall_64.c ++++ b/arch/x86/entry/syscall_64.c +@@ -6,14 +6,11 @@ + #include <asm/asm-offsets.h> + #include <asm/syscall.h> + +-#define __SYSCALL_64_QUAL_(sym) sym +-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym +- +-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); ++#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); + #include <asm/syscalls_64.h> + #undef __SYSCALL_64 + +-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), ++#define __SYSCALL_64(nr, sym, qual) [nr] = sym, + + extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch new file mode 100644 index 00000000..450af998 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch @@ -0,0 +1,135 @@ +From 2b1824f53bd40e8d35a1c1a8504917e69c958847 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Fri, 8 Sep 2017 19:26:30 -0700 +Subject: [PATCH 022/102] kaiser: PCID 0 for kernel and 128 for user + +Why was 4 chosen for kernel PCID and 6 for user PCID? +No good reason in a backport where PCIDs are only used for Kaiser. + +If we continue with those, then we shall need to add Andy Lutomirski's +4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa() +and __read_cr3()"), which deals with the problem of read_cr3() callers +finding stray bits in the cr3 that they expected to be page-aligned; +and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64: +Mask off CR3's PCID bits in the saved CR3"). + +But if 0 is used for kernel PCID, then there's no need to add in those +commits - whenever the kernel looks, it sees 0 in the lower bits; and +0 for kernel seems an obvious choice. + +And I naughtily propose 128 for user PCID. Because there's a place +in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH, +but needs to reset that to NOFLUSH for the next occasion. Currently +it does so with a "movb $(0x80)" into the high byte of the per-cpu +quadword, but that will cause a machine without PCID support to crash. +Now, if %al just happened to have 0x80 in it at that point, on a +machine with PCID support, but 0 on a machine without PCID support... + +(That will go badly wrong once the pgd can be at a physical address +above 2^56, but even with 5-level paging, physical goes up to 2^52.) + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kaiser.h | 19 ++++++++++++------- + arch/x86/include/asm/pgtable_types.h | 7 ++++--- + arch/x86/mm/tlb.c | 3 +++ + 3 files changed, 19 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 009bca5..110a73e 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -29,14 +29,19 @@ orq X86_CR3_PCID_KERN_VAR, \reg + movq \reg, %cr3 + .endm + +-.macro _SWITCH_TO_USER_CR3 reg ++.macro _SWITCH_TO_USER_CR3 reg regb ++/* ++ * regb must be the low byte portion of reg: because we have arranged ++ * for the low byte of the user PCID to serve as the high byte of NOFLUSH ++ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are ++ * not enabled): so that the one register can update both memory and cr3. ++ */ + movq %cr3, \reg + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg + orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg + js 9f +-// FLUSH this time, reset to NOFLUSH for next time +-// But if nopcid? Consider using 0x80 for user pcid? +-movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) ++/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ ++movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) + 9: + movq \reg, %cr3 + .endm +@@ -49,7 +54,7 @@ popq %rax + + .macro SWITCH_USER_CR3 + pushq %rax +-_SWITCH_TO_USER_CR3 %rax ++_SWITCH_TO_USER_CR3 %rax %al + popq %rax + .endm + +@@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + + .macro SWITCH_USER_CR3_NO_STACK + movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) +-_SWITCH_TO_USER_CR3 %rax ++_SWITCH_TO_USER_CR3 %rax %al + movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + .endm + +@@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + + .macro SWITCH_KERNEL_CR3 reg + .endm +-.macro SWITCH_USER_CR3 reg ++.macro SWITCH_USER_CR3 reg regb + .endm + .macro SWITCH_USER_CR3_NO_STACK + .endm +diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h +index ada77fd..7cf2883 100644 +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -146,16 +146,17 @@ + + /* Mask for all the PCID-related bits in CR3: */ + #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) ++#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) ++ + #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) +-#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) +-#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) ++/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ ++#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) + + #define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) + #define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) + #define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) + #define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) + #else +-#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) + #define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) + /* + * PCIDs are unsupported on 32-bit and none of these bits can be +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index a2532d4..852c665 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -52,6 +52,9 @@ static void load_new_mm_cr3(pgd_t *pgdir) + * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could + * do it here, but can only be used if X86_FEATURE_INVPCID is + * available - and many machines support pcid without invpcid. ++ * ++ * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0; ++ * but keep that line in there in case something changes. + */ + new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; + kaiser_flush_tlb_on_return_to_user(); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-entry-64-Push-extra-regs-right-away.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-entry-64-Push-extra-regs-right-away.patch new file mode 100644 index 00000000..904e0528 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-entry-64-Push-extra-regs-right-away.patch @@ -0,0 +1,49 @@ +From c65286e3b8a7060e768c7b7e4c565922c205cb7f Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Sun, 28 Jan 2018 10:38:49 -0800 +Subject: [PATCH 22/42] x86/entry/64: Push extra regs right away + +(cherry picked from commit d1f7732009e0549eedf8ea1db948dc37be77fd46) + +With the fast path removed there is no point in splitting the push of the +normal and the extra register set. Just push the extra regs right away. + +[ tglx: Split out from 'x86/entry/64: Remove the SYSCALL64 fast path' ] + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Kernel Hardening <kernel-hardening@lists.openwall.com> +Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 4360253..c915eeb 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -177,10 +177,14 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ +- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ ++ pushq %rbx /* pt_regs->rbx */ ++ pushq %rbp /* pt_regs->rbp */ ++ pushq %r12 /* pt_regs->r12 */ ++ pushq %r13 /* pt_regs->r13 */ ++ pushq %r14 /* pt_regs->r14 */ ++ pushq %r15 /* pt_regs->r15 */ + + /* IRQs are off. */ +- SAVE_EXTRA_REGS + movq %rsp, %rdi + call do_syscall_64 /* returns with IRQs disabled */ + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch new file mode 100644 index 00000000..35c07966 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch @@ -0,0 +1,147 @@ +From 65e2f7ce211f85b00bd7e08f3b19c90ef0dbc938 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 27 Aug 2017 16:24:27 -0700 +Subject: [PATCH 023/102] kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user + +Mostly this commit is just unshouting X86_CR3_PCID_KERN_VAR and +X86_CR3_PCID_USER_VAR: we usually name variables in lower-case. + +But why does x86_cr3_pcid_noflush need to be __aligned(PAGE_SIZE)? +Ah, it's a leftover from when kaiser_add_user_map() once complained +about mapping the same page twice. Make it __read_mostly instead. +(I'm a little uneasy about all the unrelated data which shares its +page getting user-mapped too, but that was so before, and not a big +deal: though we call it user-mapped, it's not mapped with _PAGE_USER.) + +And there is a little change around the two calls to do_nmi(). +Previously they set the NOFLUSH bit (if PCID supported) when +forcing to kernel context before do_nmi(); now they also have the +NOFLUSH bit set (if PCID supported) when restoring context after: +nothing done in do_nmi() should require a TLB to be flushed here. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 8 ++++---- + arch/x86/include/asm/kaiser.h | 11 +++++------ + arch/x86/mm/kaiser.c | 13 +++++++------ + 3 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 4a0ebf4..e158fd5 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1314,11 +1314,11 @@ ENTRY(nmi) + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax ++ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ ++ orq x86_cr3_pcid_noflush, %rax + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax +- /* Add back kernel PCID and "no flush" bit */ +- orq X86_CR3_PCID_KERN_VAR, %rax + movq %rax, %cr3 + #endif + call do_nmi +@@ -1558,11 +1558,11 @@ end_repeat_nmi: + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax ++ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ ++ orq x86_cr3_pcid_noflush, %rax + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax +- /* Add back kernel PCID and "no flush" bit */ +- orq X86_CR3_PCID_KERN_VAR, %rax + movq %rax, %cr3 + #endif + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 110a73e..48d8d70 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -25,7 +25,7 @@ + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +-orq X86_CR3_PCID_KERN_VAR, \reg ++orq x86_cr3_pcid_noflush, \reg + movq \reg, %cr3 + .endm + +@@ -37,11 +37,10 @@ movq \reg, %cr3 + * not enabled): so that the one register can update both memory and cr3. + */ + movq %cr3, \reg +-andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +-orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg ++orq PER_CPU_VAR(x86_cr3_pcid_user), \reg + js 9f + /* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ +-movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) ++movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7) + 9: + movq \reg, %cr3 + .endm +@@ -94,8 +93,8 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + */ + DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +-extern unsigned long X86_CR3_PCID_KERN_VAR; +-DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); ++extern unsigned long x86_cr3_pcid_noflush; ++DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); + + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 7056840..fa1cb09 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -28,8 +28,8 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + * This is also handy because systems that do not support PCIDs + * just end up or'ing a 0 into their CR3, which does no harm. + */ +-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR; +-DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); ++unsigned long x86_cr3_pcid_noflush __read_mostly; ++DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user); + + /* + * At runtime, the only things we map are some things for CPU +@@ -303,7 +303,8 @@ void __init kaiser_init(void) + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); + +- kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, ++ kaiser_add_user_map_early(&x86_cr3_pcid_noflush, ++ sizeof(x86_cr3_pcid_noflush), + __PAGE_KERNEL); + } + +@@ -381,8 +382,8 @@ void kaiser_setup_pcid(void) + * These variables are used by the entry/exit + * code to change PCID and pgd and TLB flushing. + */ +- X86_CR3_PCID_KERN_VAR = kern_cr3; +- this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3); ++ x86_cr3_pcid_noflush = kern_cr3; ++ this_cpu_write(x86_cr3_pcid_user, user_cr3); + } + + /* +@@ -392,7 +393,7 @@ void kaiser_setup_pcid(void) + */ + void kaiser_flush_tlb_on_return_to_user(void) + { +- this_cpu_write(X86_CR3_PCID_USER_VAR, ++ this_cpu_write(x86_cr3_pcid_user, + X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); + } + EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-asm-Move-status-from-thread_struct-to-thread_inf.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-asm-Move-status-from-thread_struct-to-thread_inf.patch new file mode 100644 index 00000000..28daae3a --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-asm-Move-status-from-thread_struct-to-thread_inf.patch @@ -0,0 +1,187 @@ +From be756593e543b901edc0e7489949f08c8d2737bf Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Sun, 28 Jan 2018 10:38:50 -0800 +Subject: [PATCH 23/42] x86/asm: Move 'status' from thread_struct to + thread_info + +(cherry picked from commit 37a8f7c38339b22b69876d6f5a0ab851565284e3) + +The TS_COMPAT bit is very hot and is accessed from code paths that mostly +also touch thread_info::flags. Move it into struct thread_info to improve +cache locality. + +The only reason it was in thread_struct is that there was a brief period +during which arch-specific fields were not allowed in struct thread_info. + +Linus suggested further changing: + + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + +to: + + if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED))) + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + +on the theory that frequently dirtying the cacheline even in pure 64-bit +code that never needs to modify status hurts performance. That could be a +reasonable followup patch, but I suspect it matters less on top of this +patch. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Acked-by: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Kernel Hardening <kernel-hardening@lists.openwall.com> +Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/common.c | 4 ++-- + arch/x86/include/asm/processor.h | 2 -- + arch/x86/include/asm/syscall.h | 6 +++--- + arch/x86/include/asm/thread_info.h | 3 ++- + arch/x86/kernel/process_64.c | 4 ++-- + arch/x86/kernel/ptrace.c | 2 +- + arch/x86/kernel/signal.c | 2 +- + 7 files changed, 11 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c +index bdd9cc5..bd1d102 100644 +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -201,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) + * special case only applies after poking regs and before the + * very next return to user mode. + */ +- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); ++ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + #endif + + user_enter_irqoff(); +@@ -299,7 +299,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) + unsigned int nr = (unsigned int)regs->orig_ax; + + #ifdef CONFIG_IA32_EMULATION +- current->thread.status |= TS_COMPAT; ++ ti->status |= TS_COMPAT; + #endif + + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index e40b19c..a781668 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -391,8 +391,6 @@ struct thread_struct { + unsigned short gsindex; + #endif + +- u32 status; /* thread synchronous flags */ +- + #ifdef CONFIG_X86_64 + unsigned long fsbase; + unsigned long gsbase; +diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h +index e3c95e8..03eedc2 100644 +--- a/arch/x86/include/asm/syscall.h ++++ b/arch/x86/include/asm/syscall.h +@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, + * TS_COMPAT is set for 32-bit syscall entries and then + * remains set until we return to user mode. + */ +- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) ++ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. +@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, + unsigned long *args) + { + # ifdef CONFIG_IA32_EMULATION +- if (task->thread.status & TS_COMPAT) ++ if (task->thread_info.status & TS_COMPAT) + switch (i) { + case 0: + if (!n--) break; +@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, + const unsigned long *args) + { + # ifdef CONFIG_IA32_EMULATION +- if (task->thread.status & TS_COMPAT) ++ if (task->thread_info.status & TS_COMPAT) + switch (i) { + case 0: + if (!n--) break; +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index bdf9c4c..89978b9 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -54,6 +54,7 @@ struct task_struct; + + struct thread_info { + unsigned long flags; /* low level flags */ ++ u32 status; /* thread synchronous flags */ + }; + + #define INIT_THREAD_INFO(tsk) \ +@@ -213,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack, + #define in_ia32_syscall() true + #else + #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ +- current->thread.status & TS_COMPAT) ++ current_thread_info()->status & TS_COMPAT) + #endif + + /* +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index b3760b3..dca15e1 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -512,7 +512,7 @@ void set_personality_ia32(bool x32) + current->personality &= ~READ_IMPLIES_EXEC; + /* in_compat_syscall() uses the presence of the x32 + syscall bit flag to determine compat status */ +- current->thread.status &= ~TS_COMPAT; ++ current_thread_info()->status &= ~TS_COMPAT; + } else { + set_thread_flag(TIF_IA32); + clear_thread_flag(TIF_X32); +@@ -520,7 +520,7 @@ void set_personality_ia32(bool x32) + current->mm->context.ia32_compat = TIF_IA32; + current->personality |= force_personality32; + /* Prepare the first "return" to user space */ +- current->thread.status |= TS_COMPAT; ++ current_thread_info()->status |= TS_COMPAT; + } + } + EXPORT_SYMBOL_GPL(set_personality_ia32); +diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c +index 0e63c02..e497d37 100644 +--- a/arch/x86/kernel/ptrace.c ++++ b/arch/x86/kernel/ptrace.c +@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) + */ + regs->orig_ax = value; + if (syscall_get_nr(child, regs) >= 0) +- child->thread.status |= TS_I386_REGS_POKED; ++ child->thread_info.status |= TS_I386_REGS_POKED; + break; + + case offsetof(struct user32, regs.eflags): +diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c +index 763af1d..b1a5d25 100644 +--- a/arch/x86/kernel/signal.c ++++ b/arch/x86/kernel/signal.c +@@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) + * than the tracee. + */ + #ifdef CONFIG_IA32_EMULATION +- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) ++ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) + return __NR_ia32_restart_syscall; + #endif + #ifdef CONFIG_X86_X32_ABI +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-Documentation-Document-array_index_nospec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-Documentation-Document-array_index_nospec.patch new file mode 100644 index 00000000..e63a87b0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-Documentation-Document-array_index_nospec.patch @@ -0,0 +1,128 @@ +From be059366798cbe4d7f4e9d86232e17b2368154ce Mon Sep 17 00:00:00 2001 +From: Mark Rutland <mark.rutland@arm.com> +Date: Mon, 29 Jan 2018 17:02:16 -0800 +Subject: [PATCH 24/42] Documentation: Document array_index_nospec + +(cherry picked from commit f84a56f73dddaeac1dba8045b007f742f61cd2da) + +Document the rationale and usage of the new array_index_nospec() helper. + +Signed-off-by: Mark Rutland <mark.rutland@arm.com> +Signed-off-by: Will Deacon <will.deacon@arm.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Kees Cook <keescook@chromium.org> +Cc: linux-arch@vger.kernel.org +Cc: Jonathan Corbet <corbet@lwn.net> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: gregkh@linuxfoundation.org +Cc: kernel-hardening@lists.openwall.com +Cc: torvalds@linux-foundation.org +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/speculation.txt | 90 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 90 insertions(+) + create mode 100644 Documentation/speculation.txt + +diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt +new file mode 100644 +index 0000000..e9e6cba +--- /dev/null ++++ b/Documentation/speculation.txt +@@ -0,0 +1,90 @@ ++This document explains potential effects of speculation, and how undesirable ++effects can be mitigated portably using common APIs. ++ ++=========== ++Speculation ++=========== ++ ++To improve performance and minimize average latencies, many contemporary CPUs ++employ speculative execution techniques such as branch prediction, performing ++work which may be discarded at a later stage. ++ ++Typically speculative execution cannot be observed from architectural state, ++such as the contents of registers. However, in some cases it is possible to ++observe its impact on microarchitectural state, such as the presence or ++absence of data in caches. Such state may form side-channels which can be ++observed to extract secret information. ++ ++For example, in the presence of branch prediction, it is possible for bounds ++checks to be ignored by code which is speculatively executed. Consider the ++following code: ++ ++ int load_array(int *array, unsigned int index) ++ { ++ if (index >= MAX_ARRAY_ELEMS) ++ return 0; ++ else ++ return array[index]; ++ } ++ ++Which, on arm64, may be compiled to an assembly sequence such as: ++ ++ CMP <index>, #MAX_ARRAY_ELEMS ++ B.LT less ++ MOV <returnval>, #0 ++ RET ++ less: ++ LDR <returnval>, [<array>, <index>] ++ RET ++ ++It is possible that a CPU mis-predicts the conditional branch, and ++speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This ++value will subsequently be discarded, but the speculated load may affect ++microarchitectural state which can be subsequently measured. ++ ++More complex sequences involving multiple dependent memory accesses may ++result in sensitive information being leaked. Consider the following ++code, building on the prior example: ++ ++ int load_dependent_arrays(int *arr1, int *arr2, int index) ++ { ++ int val1, val2, ++ ++ val1 = load_array(arr1, index); ++ val2 = load_array(arr2, val1); ++ ++ return val2; ++ } ++ ++Under speculation, the first call to load_array() may return the value ++of an out-of-bounds address, while the second call will influence ++microarchitectural state dependent on this value. This may provide an ++arbitrary read primitive. ++ ++==================================== ++Mitigating speculation side-channels ++==================================== ++ ++The kernel provides a generic API to ensure that bounds checks are ++respected even under speculation. Architectures which are affected by ++speculation-based side-channels are expected to implement these ++primitives. ++ ++The array_index_nospec() helper in <linux/nospec.h> can be used to ++prevent information from being leaked via side-channels. ++ ++A call to array_index_nospec(index, size) returns a sanitized index ++value that is bounded to [0, size) even under cpu speculation ++conditions. ++ ++This can be used to protect the earlier load_array() example: ++ ++ int load_array(int *array, unsigned int index) ++ { ++ if (index >= MAX_ARRAY_ELEMS) ++ return 0; ++ else { ++ index = array_index_nospec(index, MAX_ARRAY_ELEMS); ++ return array[index]; ++ } ++ } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch new file mode 100644 index 00000000..bc2cbebd --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch @@ -0,0 +1,172 @@ +From 901d7211374f31ffc00719e75113b958a4ae64d4 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Tue, 26 Sep 2017 18:43:07 -0700 +Subject: [PATCH 024/102] kaiser: paranoid_entry pass cr3 need to paranoid_exit + +Neel Natu points out that paranoid_entry() was wrong to assume that +an entry that did not need swapgs would not need SWITCH_KERNEL_CR3: +paranoid_entry (used for debug breakpoint, int3, double fault or MCE; +though I think it's only the MCE case that is cause for concern here) +can break in at an awkward time, between cr3 switch and swapgs, but +its handling always needs kernel gs and kernel cr3. + +Easy to fix in itself, but paranoid_entry() also needs to convey to +paranoid_exit() (and my reading of macro idtentry says paranoid_entry +and paranoid_exit are always paired) how to restore the prior state. +The swapgs state is already conveyed by %ebx (0 or 1), so extend that +also to convey when SWITCH_USER_CR3 will be needed (2 or 3). + +(Yes, I'd much prefer that 0 meant no swapgs, whereas it's the other +way round: and a convention shared with error_entry() and error_exit(), +which I don't want to touch. Perhaps I should have inverted the bit +for switch cr3 too, but did not.) + +paranoid_exit() would be straightforward, except for TRACE_IRQS: it +did TRACE_IRQS_IRETQ when doing swapgs, but TRACE_IRQS_IRETQ_DEBUG +when not: which is it supposed to use when SWITCH_USER_CR3 is split +apart from that? As best as I can determine, commit 5963e317b1e9 +("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") +missed the swapgs case, and should have used TRACE_IRQS_IRETQ_DEBUG +there too (the discrepancy has nothing to do with the liberal use +of _NO_STACK and _UNSAFE_STACK hereabouts: TRACE_IRQS_OFF_DEBUG has +just been used in all cases); discrepancy lovingly preserved across +several paranoid_exit() cleanups, but I'm now removing it. + +Neel further indicates that to use SWITCH_USER_CR3_NO_STACK there in +paranoid_exit() is now not only unnecessary but unsafe: might corrupt +syscall entry's unsafe_stack_register_backup of %rax. Just use +SWITCH_USER_CR3: and delete SWITCH_USER_CR3_NO_STACK altogether, +before we make the mistake of using it again. + +hughd adds: this commit fixes an issue in the Kaiser-without-PCIDs +part of the series, and ought to be moved earlier, if you decided +to make a release of Kaiser-without-PCIDs. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 46 +++++++++++++++++++++++++++++++--------- + arch/x86/entry/entry_64_compat.S | 2 +- + arch/x86/include/asm/kaiser.h | 8 ------- + 3 files changed, 37 insertions(+), 19 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index e158fd5..41bf650 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1053,7 +1053,11 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec + /* + * Save all registers in pt_regs, and switch gs if needed. + * Use slow, but surefire "are we in kernel?" check. +- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise ++ * ++ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit ++ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit ++ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit ++ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit + */ + ENTRY(paranoid_entry) + cld +@@ -1065,9 +1069,26 @@ ENTRY(paranoid_entry) + testl %edx, %edx + js 1f /* negative -> in kernel */ + SWAPGS +- SWITCH_KERNEL_CR3 + xorl %ebx, %ebx +-1: ret ++1: ++#ifdef CONFIG_KAISER ++ /* ++ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3 ++ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit. ++ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done ++ * unconditionally, but we need to find out whether the reverse ++ * should be done on return (conveyed to paranoid_exit in %ebx). ++ */ ++ movq %cr3, %rax ++ testl $KAISER_SHADOW_PGD_OFFSET, %eax ++ jz 2f ++ orl $2, %ebx ++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax ++ orq x86_cr3_pcid_noflush, %rax ++ movq %rax, %cr3 ++2: ++#endif ++ ret + END(paranoid_entry) + + /* +@@ -1080,20 +1101,25 @@ END(paranoid_entry) + * be complicated. Fortunately, we there's no good reason + * to try to handle preemption here. + * +- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) ++ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3 ++ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 ++ * ebx=2: needs both swapgs and SWITCH_USER_CR3 ++ * ebx=3: needs SWITCH_USER_CR3 but not swapgs + */ + ENTRY(paranoid_exit) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF_DEBUG +- testl %ebx, %ebx /* swapgs needed? */ ++ TRACE_IRQS_IRETQ_DEBUG ++#ifdef CONFIG_KAISER ++ testl $2, %ebx /* SWITCH_USER_CR3 needed? */ ++ jz paranoid_exit_no_switch ++ SWITCH_USER_CR3 ++paranoid_exit_no_switch: ++#endif ++ testl $1, %ebx /* swapgs needed? */ + jnz paranoid_exit_no_swapgs +- TRACE_IRQS_IRETQ +- SWITCH_USER_CR3_NO_STACK + SWAPGS_UNSAFE_STACK +- jmp paranoid_exit_restore + paranoid_exit_no_swapgs: +- TRACE_IRQS_IRETQ_DEBUG +-paranoid_exit_restore: + RESTORE_EXTRA_REGS + RESTORE_C_REGS + REMOVE_PT_GPREGS_FROM_STACK 8 +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index 0eb5801..d76a976 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -343,7 +343,7 @@ ENTRY(entry_INT80_compat) + + /* Go back to user mode. */ + TRACE_IRQS_ON +- SWITCH_USER_CR3_NO_STACK ++ SWITCH_USER_CR3 + SWAPGS + jmp restore_regs_and_iret + END(entry_INT80_compat) +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 48d8d70..3dc5f4c 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -63,20 +63,12 @@ _SWITCH_TO_KERNEL_CR3 %rax + movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + .endm + +-.macro SWITCH_USER_CR3_NO_STACK +-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) +-_SWITCH_TO_USER_CR3 %rax %al +-movq PER_CPU_VAR(unsafe_stack_register_backup), %rax +-.endm +- + #else /* CONFIG_KAISER */ + + .macro SWITCH_KERNEL_CR3 reg + .endm + .macro SWITCH_USER_CR3 reg regb + .endm +-.macro SWITCH_USER_CR3_NO_STACK +-.endm + .macro SWITCH_KERNEL_CR3_NO_STACK + .endm + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch new file mode 100644 index 00000000..994c7017 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch @@ -0,0 +1,121 @@ +From d1d620936019d80fd9be22b6fb09d3a15d4dbf7f Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:22 -0800 +Subject: [PATCH 25/42] array_index_nospec: Sanitize speculative array + de-references + +(cherry picked from commit f3804203306e098dae9ca51540fcd5eb700d7f40) + +array_index_nospec() is proposed as a generic mechanism to mitigate +against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary +checks via speculative execution. The array_index_nospec() +implementation is expected to be safe for current generation CPUs across +multiple architectures (ARM, x86). + +Based on an original implementation by Linus Torvalds, tweaked to remove +speculative flows by Alexei Starovoitov, and tweaked again by Linus to +introduce an x86 assembly implementation for the mask generation. + +Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org> +Co-developed-by: Alexei Starovoitov <ast@kernel.org> +Suggested-by: Cyril Novikov <cnovikov@lynx.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: kernel-hardening@lists.openwall.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Cc: Russell King <linux@armlinux.org.uk> +Cc: gregkh@linuxfoundation.org +Cc: torvalds@linux-foundation.org +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/nospec.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 72 insertions(+) + create mode 100644 include/linux/nospec.h + +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +new file mode 100644 +index 0000000..b99bced +--- /dev/null ++++ b/include/linux/nospec.h +@@ -0,0 +1,72 @@ ++// SPDX-License-Identifier: GPL-2.0 ++// Copyright(c) 2018 Linus Torvalds. All rights reserved. ++// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. ++// Copyright(c) 2018 Intel Corporation. All rights reserved. ++ ++#ifndef _LINUX_NOSPEC_H ++#define _LINUX_NOSPEC_H ++ ++/** ++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise ++ * @index: array element index ++ * @size: number of elements in array ++ * ++ * When @index is out of bounds (@index >= @size), the sign bit will be ++ * set. Extend the sign bit to all bits and invert, giving a result of ++ * zero for an out of bounds index, or ~0 if within bounds [0, @size). ++ */ ++#ifndef array_index_mask_nospec ++static inline unsigned long array_index_mask_nospec(unsigned long index, ++ unsigned long size) ++{ ++ /* ++ * Warn developers about inappropriate array_index_nospec() usage. ++ * ++ * Even if the CPU speculates past the WARN_ONCE branch, the ++ * sign bit of @index is taken into account when generating the ++ * mask. ++ * ++ * This warning is compiled out when the compiler can infer that ++ * @index and @size are less than LONG_MAX. ++ */ ++ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, ++ "array_index_nospec() limited to range of [0, LONG_MAX]\n")) ++ return 0; ++ ++ /* ++ * Always calculate and emit the mask even if the compiler ++ * thinks the mask is not needed. The compiler does not take ++ * into account the value of @index under speculation. ++ */ ++ OPTIMIZER_HIDE_VAR(index); ++ return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); ++} ++#endif ++ ++/* ++ * array_index_nospec - sanitize an array index after a bounds check ++ * ++ * For a code sequence like: ++ * ++ * if (index < size) { ++ * index = array_index_nospec(index, size); ++ * val = array[index]; ++ * } ++ * ++ * ...if the CPU speculates past the bounds check then ++ * array_index_nospec() will clamp the index within the range of [0, ++ * size). ++ */ ++#define array_index_nospec(index, size) \ ++({ \ ++ typeof(index) _i = (index); \ ++ typeof(size) _s = (size); \ ++ unsigned long _mask = array_index_mask_nospec(_i, _s); \ ++ \ ++ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ ++ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ ++ \ ++ _i &= _mask; \ ++ _i; \ ++}) ++#endif /* _LINUX_NOSPEC_H */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch new file mode 100644 index 00000000..efd8753a --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch @@ -0,0 +1,52 @@ +From 53c0f95d99b8f7282166ed59871ef86396ae2a8f Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Mon, 2 Oct 2017 10:57:24 -0700 +Subject: [PATCH 025/102] kaiser: kaiser_remove_mapping() move along the pgd + +When removing the bogus comment from kaiser_remove_mapping(), +I really ought to have checked the extent of its bogosity: as +Neel points out, there is nothing to stop unmap_pud_range_nofree() +from continuing beyond the end of a pud (and starting in the wrong +position on the next). + +Fix kaiser_remove_mapping() to constrain the extent and advance pgd +pointer correctly: use pgd_addr_end() macro as used throughout base +mm (but don't assume page-rounded start and size in this case). + +But this bug was very unlikely to trigger in this backport: since +any buddy allocation is contained within a single pud extent, and +we are not using vmapped stacks (and are only mapping one page of +stack anyway): the only way to hit this bug here would be when +freeing a large modified ldt. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index fa1cb09..cc0950f 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -319,11 +319,13 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) + extern void unmap_pud_range_nofree(pgd_t *pgd, + unsigned long start, unsigned long end); + unsigned long end = start + size; +- unsigned long addr; ++ unsigned long addr, next; ++ pgd_t *pgd; + +- for (addr = start; addr < end; addr += PGDIR_SIZE) { +- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); +- unmap_pud_range_nofree(pgd, addr, end); ++ pgd = native_get_shadow_pgd(pgd_offset_k(start)); ++ for (addr = start; addr < end; pgd++, addr = next) { ++ next = pgd_addr_end(addr, end); ++ unmap_pud_range_nofree(pgd, addr, next); + } + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch new file mode 100644 index 00000000..31eb38e9 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch @@ -0,0 +1,35 @@ +From d77e639ab3d037d8a309b107452491b7ec4b887c Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Mon, 4 Dec 2017 20:13:35 -0800 +Subject: [PATCH 026/102] kaiser: fix unlikely error in alloc_ldt_struct() + +An error from kaiser_add_mapping() here is not at all likely, but +Eric Biggers rightly points out that __free_ldt_struct() relies on +new_ldt->size being initialized: move that up. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/ldt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c +index 8331bad..536e6ab 100644 +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -78,11 +78,11 @@ static struct ldt_struct *alloc_ldt_struct(int size) + + ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, + __PAGE_KERNEL); ++ new_ldt->size = size; + if (ret) { + __free_ldt_struct(new_ldt); + return NULL; + } +- new_ldt->size = size; + return new_ldt; + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-Implement-array_index_mask_nospec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-Implement-array_index_mask_nospec.patch new file mode 100644 index 00000000..3731f5b0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-Implement-array_index_mask_nospec.patch @@ -0,0 +1,68 @@ +From bc71a58ec0aadad07a49878204eb38273f0c1b9e Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:28 -0800 +Subject: [PATCH 26/42] x86: Implement array_index_mask_nospec + +(cherry picked from commit babdde2698d482b6c0de1eab4f697cf5856c5859) + +array_index_nospec() uses a mask to sanitize user controllable array +indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask +otherwise. While the default array_index_mask_nospec() handles the +carry-bit from the (index - size) result in software. + +The x86 array_index_mask_nospec() does the same, but the carry-bit is +handled in the processor CF flag without conditional instructions in the +control flow. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/barrier.h | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h +index bfb28ca..ca22173 100644 +--- a/arch/x86/include/asm/barrier.h ++++ b/arch/x86/include/asm/barrier.h +@@ -23,6 +23,30 @@ + #define wmb() asm volatile("sfence" ::: "memory") + #endif + ++/** ++ * array_index_mask_nospec() - generate a mask that is ~0UL when the ++ * bounds check succeeds and 0 otherwise ++ * @index: array element index ++ * @size: number of elements in array ++ * ++ * Returns: ++ * 0 - (index < size) ++ */ ++static inline unsigned long array_index_mask_nospec(unsigned long index, ++ unsigned long size) ++{ ++ unsigned long mask; ++ ++ asm ("cmp %1,%2; sbb %0,%0;" ++ :"=r" (mask) ++ :"r"(size),"r" (index) ++ :"cc"); ++ return mask; ++} ++ ++/* Override the default implementation from linux/nospec.h. */ ++#define array_index_mask_nospec array_index_mask_nospec ++ + #ifdef CONFIG_X86_PPRO_FENCE + #define dma_rmb() rmb() + #else +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch new file mode 100644 index 00000000..8243cf1f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch @@ -0,0 +1,686 @@ +From 98cbbfe8b0e5e38dac94986ffa4b09da9860a9af Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 24 Sep 2017 16:59:49 -0700 +Subject: [PATCH 027/102] kaiser: add "nokaiser" boot option, using ALTERNATIVE + +Added "nokaiser" boot option: an early param like "noinvpcid". +Most places now check int kaiser_enabled (#defined 0 when not +CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S +and entry_64_compat.S are using the ALTERNATIVE technique, which +patches in the preferred instructions at runtime. That technique +is tied to x86 cpu features, so X86_FEATURE_KAISER is fabricated. + +Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that, +but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when +nokaiser like when !CONFIG_KAISER, but not setting either when kaiser - +neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL +won't get set in some obscure corner, or something add PGE into CR4. +By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled, +all page table setup which uses pte_pfn() masks it out of the ptes. + +It's slightly shameful that the same declaration versus definition of +kaiser_enabled appears in not one, not two, but in three header files +(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way, +than with #including any of those in any of the others; and did not +feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes +them all, so we shall hear about it if they get out of synch. + +Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER +from kaiser.c; removed the unused native_get_normal_pgd(); removed +the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some +comments. But more interestingly, set CR4.PSE in secondary_startup_64: +the manual is clear that it does not matter whether it's 0 or 1 when +4-level-pts are enabled, but I was distracted to find cr4 different on +BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask(). + +Signed-off-by: Hugh Dickins <hughd@google.com> +Acked-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 2 ++ + arch/x86/entry/entry_64.S | 15 ++++++------ + arch/x86/include/asm/cpufeatures.h | 3 +++ + arch/x86/include/asm/kaiser.h | 27 ++++++++++++++++------ + arch/x86/include/asm/pgtable.h | 20 +++++++++++----- + arch/x86/include/asm/pgtable_64.h | 13 ++++------- + arch/x86/include/asm/pgtable_types.h | 4 ---- + arch/x86/include/asm/tlbflush.h | 39 ++++++++++++++++++++------------ + arch/x86/kernel/cpu/common.c | 28 ++++++++++++++++++++++- + arch/x86/kernel/espfix_64.c | 3 ++- + arch/x86/kernel/head_64.S | 4 ++-- + arch/x86/mm/init.c | 2 +- + arch/x86/mm/init_64.c | 10 ++++++++ + arch/x86/mm/kaiser.c | 26 +++++++++++++++++---- + arch/x86/mm/pgtable.c | 8 ++----- + arch/x86/mm/tlb.c | 4 +--- + tools/arch/x86/include/asm/cpufeatures.h | 3 +++ + 17 files changed, 146 insertions(+), 65 deletions(-) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index a303387..e2642ec 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2753,6 +2753,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + + nojitter [IA-64] Disables jitter checking for ITC timers. + ++ nokaiser [X86-64] Disable KAISER isolation of kernel from user. ++ + no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver + + no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 41bf650..bbb38ac 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1079,7 +1079,7 @@ ENTRY(paranoid_entry) + * unconditionally, but we need to find out whether the reverse + * should be done on return (conveyed to paranoid_exit in %ebx). + */ +- movq %cr3, %rax ++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + testl $KAISER_SHADOW_PGD_OFFSET, %eax + jz 2f + orl $2, %ebx +@@ -1111,6 +1111,7 @@ ENTRY(paranoid_exit) + TRACE_IRQS_OFF_DEBUG + TRACE_IRQS_IRETQ_DEBUG + #ifdef CONFIG_KAISER ++ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ + testl $2, %ebx /* SWITCH_USER_CR3 needed? */ + jz paranoid_exit_no_switch + SWITCH_USER_CR3 +@@ -1339,13 +1340,14 @@ ENTRY(nmi) + #ifdef CONFIG_KAISER + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ +- movq %cr3, %rax ++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ + orq x86_cr3_pcid_noflush, %rax + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + movq %rax, %cr3 ++2: + #endif + call do_nmi + +@@ -1355,8 +1357,7 @@ ENTRY(nmi) + * kernel code that needs user CR3, but do we ever return + * to "user mode" where we need the kernel CR3? + */ +- popq %rax +- mov %rax, %cr3 ++ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER + #endif + + /* +@@ -1583,13 +1584,14 @@ end_repeat_nmi: + #ifdef CONFIG_KAISER + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ +- movq %cr3, %rax ++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ + orq x86_cr3_pcid_noflush, %rax + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax + movq %rax, %cr3 ++2: + #endif + + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ +@@ -1601,8 +1603,7 @@ end_repeat_nmi: + * kernel code that needs user CR3, like just just before + * a sysret. + */ +- popq %rax +- mov %rax, %cr3 ++ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER + #endif + + testl %ebx, %ebx /* swapgs needed? */ +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index dc50883..20271d6 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -198,6 +198,9 @@ + #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + ++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ ++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ ++ + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ + #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 3dc5f4c..96643a9 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -46,28 +46,33 @@ movq \reg, %cr3 + .endm + + .macro SWITCH_KERNEL_CR3 +-pushq %rax ++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER + _SWITCH_TO_KERNEL_CR3 %rax + popq %rax ++8: + .endm + + .macro SWITCH_USER_CR3 +-pushq %rax ++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER + _SWITCH_TO_USER_CR3 %rax %al + popq %rax ++8: + .endm + + .macro SWITCH_KERNEL_CR3_NO_STACK +-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) ++ALTERNATIVE "jmp 8f", \ ++ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \ ++ X86_FEATURE_KAISER + _SWITCH_TO_KERNEL_CR3 %rax + movq PER_CPU_VAR(unsafe_stack_register_backup), %rax ++8: + .endm + + #else /* CONFIG_KAISER */ + +-.macro SWITCH_KERNEL_CR3 reg ++.macro SWITCH_KERNEL_CR3 + .endm +-.macro SWITCH_USER_CR3 reg regb ++.macro SWITCH_USER_CR3 + .endm + .macro SWITCH_KERNEL_CR3_NO_STACK + .endm +@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); + + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; + ++extern int kaiser_enabled; ++#else ++#define kaiser_enabled 0 ++#endif /* CONFIG_KAISER */ ++ ++/* ++ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, ++ * so as to build with tests on kaiser_enabled instead of #ifdefs. ++ */ ++ + /** + * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping + * @addr: the start address of the range +@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsigned long start, unsigned long size); + */ + extern void kaiser_init(void); + +-#endif /* CONFIG_KAISER */ +- + #endif /* __ASSEMBLY */ + + #endif /* _ASM_X86_KAISER_H */ +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 1cee98e..217e83a 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -18,6 +18,12 @@ + #ifndef __ASSEMBLY__ + #include <asm/x86_init.h> + ++#ifdef CONFIG_KAISER ++extern int kaiser_enabled; ++#else ++#define kaiser_enabled 0 ++#endif ++ + void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); + void ptdump_walk_pgd_level_checkwx(void); + +@@ -697,7 +703,7 @@ static inline int pgd_bad(pgd_t pgd) + * page table by accident; it will fault on the first + * instruction it tries to run. See native_set_pgd(). + */ +- if (IS_ENABLED(CONFIG_KAISER)) ++ if (kaiser_enabled) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; +@@ -913,12 +919,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, + */ + static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) + { +- memcpy(dst, src, count * sizeof(pgd_t)); ++ memcpy(dst, src, count * sizeof(pgd_t)); + #ifdef CONFIG_KAISER +- /* Clone the shadow pgd part as well */ +- memcpy(native_get_shadow_pgd(dst), +- native_get_shadow_pgd(src), +- count * sizeof(pgd_t)); ++ if (kaiser_enabled) { ++ /* Clone the shadow pgd part as well */ ++ memcpy(native_get_shadow_pgd(dst), ++ native_get_shadow_pgd(src), ++ count * sizeof(pgd_t)); ++ } + #endif + } + +diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h +index 177caf3..cf68b5c 100644 +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); + + static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) + { ++#ifdef CONFIG_DEBUG_VM ++ /* linux/mmdebug.h may not have been included at this point */ ++ BUG_ON(!kaiser_enabled); ++#endif + return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); + } +- +-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) +-{ +- return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); +-} + #else + static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) + { +@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) + BUILD_BUG_ON(1); + return NULL; + } +-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) +-{ +- return pgdp; +-} + #endif /* CONFIG_KAISER */ + + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) +diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h +index 7cf2883..f0d9a1a 100644 +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -45,11 +45,7 @@ + #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) + #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) + #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +-#ifdef CONFIG_KAISER +-#define _PAGE_GLOBAL (_AT(pteval_t, 0)) +-#else + #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +-#endif + #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) + #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) + #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 4fff696..13a74f6 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -138,9 +138,11 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) + * to avoid the need for asm/kaiser.h in unexpected places. + */ + #ifdef CONFIG_KAISER ++extern int kaiser_enabled; + extern void kaiser_setup_pcid(void); + extern void kaiser_flush_tlb_on_return_to_user(void); + #else ++#define kaiser_enabled 0 + static inline void kaiser_setup_pcid(void) + { + } +@@ -165,7 +167,7 @@ static inline void __native_flush_tlb(void) + * back: + */ + preempt_disable(); +- if (this_cpu_has(X86_FEATURE_PCID)) ++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) + kaiser_flush_tlb_on_return_to_user(); + native_write_cr3(native_read_cr3()); + preempt_enable(); +@@ -176,20 +178,30 @@ static inline void __native_flush_tlb_global_irq_disabled(void) + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); +- /* clear PGE */ +- native_write_cr4(cr4 & ~X86_CR4_PGE); +- /* write old PGE again and flush TLBs */ +- native_write_cr4(cr4); ++ if (cr4 & X86_CR4_PGE) { ++ /* clear PGE and flush TLB of all entries */ ++ native_write_cr4(cr4 & ~X86_CR4_PGE); ++ /* restore PGE as it was before */ ++ native_write_cr4(cr4); ++ } else { ++ /* ++ * x86_64 microcode update comes this way when CR4.PGE is not ++ * enabled, and it's safer for all callers to allow this case. ++ */ ++ native_write_cr3(native_read_cr3()); ++ } + } + + static inline void __native_flush_tlb_global(void) + { +-#ifdef CONFIG_KAISER +- /* Globals are not used at all */ +- __native_flush_tlb(); +-#else + unsigned long flags; + ++ if (kaiser_enabled) { ++ /* Globals are not used at all */ ++ __native_flush_tlb(); ++ return; ++ } ++ + if (this_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes +@@ -209,7 +221,6 @@ static inline void __native_flush_tlb_global(void) + raw_local_irq_save(flags); + __native_flush_tlb_global_irq_disabled(); + raw_local_irq_restore(flags); +-#endif + } + + static inline void __native_flush_tlb_single(unsigned long addr) +@@ -224,7 +235,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) + */ + + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { +- if (this_cpu_has(X86_FEATURE_PCID)) ++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) + kaiser_flush_tlb_on_return_to_user(); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + return; +@@ -239,9 +250,9 @@ static inline void __native_flush_tlb_single(unsigned long addr) + * Make sure to do only a single invpcid when KAISER is + * disabled and we have only a single ASID. + */ +- if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) +- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); +- invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); ++ if (kaiser_enabled) ++ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); ++ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); + } + + static inline void __flush_tlb_all(void) +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index e6be5f3..8b03874 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s) + return 1; + } + __setup("nopcid", x86_pcid_setup); ++ ++static int __init x86_nokaiser_setup(char *s) ++{ ++ /* nokaiser doesn't accept parameters */ ++ if (s) ++ return -EINVAL; ++#ifdef CONFIG_KAISER ++ kaiser_enabled = 0; ++ setup_clear_cpu_cap(X86_FEATURE_KAISER); ++ pr_info("nokaiser: KAISER feature disabled\n"); ++#endif ++ return 0; ++} ++early_param("nokaiser", x86_nokaiser_setup); + #endif + + static int __init x86_noinvpcid_setup(char *s) +@@ -327,7 +341,7 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) + static void setup_pcid(struct cpuinfo_x86 *c) + { + if (cpu_has(c, X86_FEATURE_PCID)) { +- if (cpu_has(c, X86_FEATURE_PGE)) { ++ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) { + cr4_set_bits(X86_CR4_PCIDE); + /* + * INVPCID has two "groups" of types: +@@ -799,6 +813,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c) + c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + + init_scattered_cpuid_features(c); ++#ifdef CONFIG_KAISER ++ if (kaiser_enabled) ++ set_cpu_cap(c, X86_FEATURE_KAISER); ++#endif + } + + static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +@@ -1537,6 +1555,14 @@ void cpu_init(void) + * try to read it. + */ + cr4_init_shadow(); ++ if (!kaiser_enabled) { ++ /* ++ * secondary_startup_64() deferred setting PGE in cr4: ++ * probe_page_size_mask() sets it on the boot cpu, ++ * but it needs to be set on each secondary cpu. ++ */ ++ cr4_set_bits(X86_CR4_PGE); ++ } + + /* + * Load microcode on this cpu if a valid microcode is available. +diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c +index 560c2fd..e33b385 100644 +--- a/arch/x86/kernel/espfix_64.c ++++ b/arch/x86/kernel/espfix_64.c +@@ -132,9 +132,10 @@ void __init init_espfix_bsp(void) + * area to ensure it is mapped into the shadow user page + * tables. + */ +- if (IS_ENABLED(CONFIG_KAISER)) ++ if (kaiser_enabled) { + set_pgd(native_get_shadow_pgd(pgd_p), + __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); ++ } + + /* Randomize the locations */ + init_espfix_random(); +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index 5775379..d04479b 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -190,8 +190,8 @@ ENTRY(secondary_startup_64) + movq $(init_level4_pgt - __START_KERNEL_map), %rax + 1: + +- /* Enable PAE mode and PGE */ +- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx ++ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */ ++ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx + movq %rcx, %cr4 + + /* Setup early boot stage 4 level pagetables. */ +diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c +index 22af912..05a9855 100644 +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -177,7 +177,7 @@ static void __init probe_page_size_mask(void) + cr4_set_bits_and_update_boot(X86_CR4_PSE); + + /* Enable PGE if available */ +- if (boot_cpu_has(X86_FEATURE_PGE)) { ++ if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) { + cr4_set_bits_and_update_boot(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } else +diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c +index 14b9dd7..a0e8df6 100644 +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -324,6 +324,16 @@ void __init cleanup_highmap(void) + continue; + if (vaddr < (unsigned long) _text || vaddr > end) + set_pmd(pmd, __pmd(0)); ++ else if (kaiser_enabled) { ++ /* ++ * level2_kernel_pgt is initialized with _PAGE_GLOBAL: ++ * clear that now. This is not important, so long as ++ * CR4.PGE remains clear, but it removes an anomaly. ++ * Physical mapping setup below avoids _PAGE_GLOBAL ++ * by use of massage_pgprot() inside pfn_pte() etc. ++ */ ++ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL)); ++ } + } + } + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index cc0950f..11032dc 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -16,7 +16,9 @@ + #include <asm/pgalloc.h> + #include <asm/desc.h> + +-#ifdef CONFIG_KAISER ++int kaiser_enabled __read_mostly = 1; ++EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ ++ + __visible + DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +@@ -167,8 +169,8 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) + return pte_offset_kernel(pmd, address); + } + +-int kaiser_add_user_map(const void *__start_addr, unsigned long size, +- unsigned long flags) ++static int kaiser_add_user_map(const void *__start_addr, unsigned long size, ++ unsigned long flags) + { + int ret = 0; + pte_t *pte; +@@ -177,6 +179,15 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, + unsigned long end_addr = PAGE_ALIGN(start_addr + size); + unsigned long target_address; + ++ /* ++ * It is convenient for callers to pass in __PAGE_KERNEL etc, ++ * and there is no actual harm from setting _PAGE_GLOBAL, so ++ * long as CR4.PGE is not set. But it is nonetheless troubling ++ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser" ++ * requires that not to be #defined to 0): so mask it off here. ++ */ ++ flags &= ~_PAGE_GLOBAL; ++ + for (; address < end_addr; address += PAGE_SIZE) { + target_address = get_pa_from_mapping(address); + if (target_address == -1) { +@@ -263,6 +274,8 @@ void __init kaiser_init(void) + { + int cpu; + ++ if (!kaiser_enabled) ++ return; + kaiser_init_all_pgds(); + + for_each_possible_cpu(cpu) { +@@ -311,6 +324,8 @@ void __init kaiser_init(void) + /* Add a mapping to the shadow mapping, and synchronize the mappings */ + int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) + { ++ if (!kaiser_enabled) ++ return 0; + return kaiser_add_user_map((const void *)addr, size, flags); + } + +@@ -322,6 +337,8 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) + unsigned long addr, next; + pgd_t *pgd; + ++ if (!kaiser_enabled) ++ return; + pgd = native_get_shadow_pgd(pgd_offset_k(start)); + for (addr = start; addr < end; pgd++, addr = next) { + next = pgd_addr_end(addr, end); +@@ -343,6 +360,8 @@ static inline bool is_userspace_pgd(pgd_t *pgdp) + + pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) + { ++ if (!kaiser_enabled) ++ return pgd; + /* + * Do we need to also populate the shadow pgd? Check _PAGE_USER to + * skip cases like kexec and EFI which make temporary low mappings. +@@ -399,4 +418,3 @@ void kaiser_flush_tlb_on_return_to_user(void) + X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); + } + EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); +-#endif /* CONFIG_KAISER */ +diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c +index 352fd01..5aaec8e 100644 +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -345,16 +345,12 @@ static inline void _pgd_free(pgd_t *pgd) + } + #else + +-#ifdef CONFIG_KAISER + /* +- * Instead of one pmd, we aquire two pmds. Being order-1, it is ++ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +-#define PGD_ALLOCATION_ORDER 1 +-#else +-#define PGD_ALLOCATION_ORDER 0 +-#endif ++#define PGD_ALLOCATION_ORDER kaiser_enabled + + static inline pgd_t *_pgd_alloc(void) + { +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 852c665..fde44bb 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -41,8 +41,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) + { + unsigned long new_mm_cr3 = __pa(pgdir); + +-#ifdef CONFIG_KAISER +- if (this_cpu_has(X86_FEATURE_PCID)) { ++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { + /* + * We reuse the same PCID for different tasks, so we must + * flush all the entries for the PCID out when we change tasks. +@@ -59,7 +58,6 @@ static void load_new_mm_cr3(pgd_t *pgdir) + new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; + kaiser_flush_tlb_on_return_to_user(); + } +-#endif /* CONFIG_KAISER */ + + /* + * Caution: many callers of this function expect +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index a396292..67c93d9 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -197,6 +197,9 @@ + #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + ++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ ++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ ++ + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ + #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-Introduce-barrier_nospec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-Introduce-barrier_nospec.patch new file mode 100644 index 00000000..9b3ea121 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-Introduce-barrier_nospec.patch @@ -0,0 +1,70 @@ +From 13c25ff312ecc09941828ec112a11c40debbfef1 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:33 -0800 +Subject: [PATCH 27/42] x86: Introduce barrier_nospec + +(cherry picked from commit b3d7ad85b80bbc404635dca80f5b129f6242bc7a) + +Rename the open coded form of this instruction sequence from +rdtsc_ordered() into a generic barrier primitive, barrier_nospec(). + +One of the mitigations for Spectre variant1 vulnerabilities is to fence +speculative execution after successfully validating a bounds check. I.e. +force the result of a bounds check to resolve in the instruction pipeline +to ensure speculative execution honors that result before potentially +operating on out-of-bounds data. + +No functional changes. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Suggested-by: Andi Kleen <ak@linux.intel.com> +Suggested-by: Ingo Molnar <mingo@redhat.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Kees Cook <keescook@chromium.org> +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: Al Viro <viro@zeniv.linux.org.uk> +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/barrier.h | 4 ++++ + arch/x86/include/asm/msr.h | 3 +-- + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h +index ca22173..8575903 100644 +--- a/arch/x86/include/asm/barrier.h ++++ b/arch/x86/include/asm/barrier.h +@@ -47,6 +47,10 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + /* Override the default implementation from linux/nospec.h. */ + #define array_index_mask_nospec array_index_mask_nospec + ++/* Prevent speculative execution past this barrier. */ ++#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ ++ "lfence", X86_FEATURE_LFENCE_RDTSC) ++ + #ifdef CONFIG_X86_PPRO_FENCE + #define dma_rmb() rmb() + #else +diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h +index b5fee97..ed35b91 100644 +--- a/arch/x86/include/asm/msr.h ++++ b/arch/x86/include/asm/msr.h +@@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) + * that some other imaginary CPU is updating continuously with a + * time stamp. + */ +- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, +- "lfence", X86_FEATURE_LFENCE_RDTSC); ++ barrier_nospec(); + return rdtsc(); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch new file mode 100644 index 00000000..aac56df7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch @@ -0,0 +1,83 @@ +From b26b0d72d0e6506712e9ed45598814ff9e6b188b Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:39 -0800 +Subject: [PATCH 28/42] x86: Introduce __uaccess_begin_nospec() and + uaccess_try_nospec + +(cherry picked from commit b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd) + +For __get_user() paths, do not allow the kernel to speculate on the value +of a user controlled pointer. In addition to the 'stac' instruction for +Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the +access_ok() result to resolve in the pipeline before the CPU might take any +speculative action on the pointer value. Given the cost of 'stac' the +speculation barrier is placed after 'stac' to hopefully overlap the cost of +disabling SMAP with the cost of flushing the instruction pipeline. + +Since __get_user is a major kernel interface that deals with user +controlled pointers, the __uaccess_begin_nospec() mechanism will prevent +speculative execution past an access_ok() permission check. While +speculative execution past access_ok() is not enough to lead to a kernel +memory leak, it is a necessary precondition. + +To be clear, __uaccess_begin_nospec() is addressing a class of potential +problems near __get_user() usages. + +Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used +to protect __get_user(), pointer masking similar to array_index_nospec() +will be used for get_user() since it incorporates a bounds check near the +usage. + +uaccess_try_nospec provides the same mechanism for get_user_try. + +No functional changes. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Suggested-by: Andi Kleen <ak@linux.intel.com> +Suggested-by: Ingo Molnar <mingo@redhat.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Kees Cook <keescook@chromium.org> +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: Al Viro <viro@zeniv.linux.org.uk> +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/uaccess.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +index faf3687..c917703 100644 +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -114,6 +114,11 @@ extern int __get_user_bad(void); + + #define __uaccess_begin() stac() + #define __uaccess_end() clac() ++#define __uaccess_begin_nospec() \ ++({ \ ++ stac(); \ ++ barrier_nospec(); \ ++}) + + /* + * This is a type: either unsigned long, if the argument fits into +@@ -465,6 +470,10 @@ struct __large_struct { unsigned long buf[100]; }; + __uaccess_begin(); \ + barrier(); + ++#define uaccess_try_nospec do { \ ++ current->thread.uaccess_err = 0; \ ++ __uaccess_begin_nospec(); \ ++ + #define uaccess_catch(err) \ + __uaccess_end(); \ + (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch new file mode 100644 index 00000000..f72d092f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch @@ -0,0 +1,104 @@ +From b5b97b7c4cbdc5f14263446aad0e9f01acea6165 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Tue, 2 Jan 2018 14:19:48 +0100 +Subject: [PATCH 028/102] x86/kaiser: Rename and simplify X86_FEATURE_KAISER + handling + +Concentrate it in arch/x86/mm/kaiser.c and use the upstream string "nopti". + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 2 +- + arch/x86/kernel/cpu/common.c | 18 ------------------ + arch/x86/mm/kaiser.c | 20 +++++++++++++++++++- + 3 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index e2642ec..f5a95f77 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2753,7 +2753,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + + nojitter [IA-64] Disables jitter checking for ITC timers. + +- nokaiser [X86-64] Disable KAISER isolation of kernel from user. ++ nopti [X86-64] Disable KAISER isolation of kernel from user. + + no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 8b03874..918e447 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -179,20 +179,6 @@ static int __init x86_pcid_setup(char *s) + return 1; + } + __setup("nopcid", x86_pcid_setup); +- +-static int __init x86_nokaiser_setup(char *s) +-{ +- /* nokaiser doesn't accept parameters */ +- if (s) +- return -EINVAL; +-#ifdef CONFIG_KAISER +- kaiser_enabled = 0; +- setup_clear_cpu_cap(X86_FEATURE_KAISER); +- pr_info("nokaiser: KAISER feature disabled\n"); +-#endif +- return 0; +-} +-early_param("nokaiser", x86_nokaiser_setup); + #endif + + static int __init x86_noinvpcid_setup(char *s) +@@ -813,10 +799,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) + c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + + init_scattered_cpuid_features(c); +-#ifdef CONFIG_KAISER +- if (kaiser_enabled) +- set_cpu_cap(c, X86_FEATURE_KAISER); +-#endif + } + + static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 11032dc..87cae72 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -274,8 +274,13 @@ void __init kaiser_init(void) + { + int cpu; + +- if (!kaiser_enabled) ++ if (!kaiser_enabled) { ++ setup_clear_cpu_cap(X86_FEATURE_KAISER); + return; ++ } ++ ++ setup_force_cpu_cap(X86_FEATURE_KAISER); ++ + kaiser_init_all_pgds(); + + for_each_possible_cpu(cpu) { +@@ -418,3 +423,16 @@ void kaiser_flush_tlb_on_return_to_user(void) + X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); + } + EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); ++ ++static int __init x86_nokaiser_setup(char *s) ++{ ++ /* nopti doesn't accept parameters */ ++ if (s) ++ return -EINVAL; ++ ++ kaiser_enabled = 0; ++ pr_info("Kernel/User page tables isolation: disabled\n"); ++ ++ return 0; ++} ++early_param("nopti", x86_nokaiser_setup); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-kaiser-Check-boottime-cmdline-params.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-kaiser-Check-boottime-cmdline-params.patch new file mode 100644 index 00000000..cedcf69a --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-kaiser-Check-boottime-cmdline-params.patch @@ -0,0 +1,127 @@ +From 8db17e2fa98e810bbc4f63d4e502caceaf942373 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Tue, 2 Jan 2018 14:19:48 +0100 +Subject: [PATCH 029/102] x86/kaiser: Check boottime cmdline params + +AMD (and possibly other vendors) are not affected by the leak +KAISER is protecting against. + +Keep the "nopti" for traditional reasons and add pti=<on|off|auto> +like upstream. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 6 ++++ + arch/x86/mm/kaiser.c | 59 ++++++++++++++++++++++++++----------- + 2 files changed, 47 insertions(+), 18 deletions(-) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index f5a95f77..9f04c53 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -3317,6 +3317,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + pt. [PARIDE] + See Documentation/blockdev/paride.txt. + ++ pti= [X86_64] ++ Control KAISER user/kernel address space isolation: ++ on - enable ++ off - disable ++ auto - default setting ++ + pty.legacy_count= + [KNL] Number of legacy pty's. Overwrites compiled-in + default number. +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 87cae72..1840aa0 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -15,6 +15,7 @@ + #include <asm/pgtable.h> + #include <asm/pgalloc.h> + #include <asm/desc.h> ++#include <asm/cmdline.h> + + int kaiser_enabled __read_mostly = 1; + EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ +@@ -263,6 +264,43 @@ static void __init kaiser_init_all_pgds(void) + WARN_ON(__ret); \ + } while (0) + ++void __init kaiser_check_boottime_disable(void) ++{ ++ bool enable = true; ++ char arg[5]; ++ int ret; ++ ++ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); ++ if (ret > 0) { ++ if (!strncmp(arg, "on", 2)) ++ goto enable; ++ ++ if (!strncmp(arg, "off", 3)) ++ goto disable; ++ ++ if (!strncmp(arg, "auto", 4)) ++ goto skip; ++ } ++ ++ if (cmdline_find_option_bool(boot_command_line, "nopti")) ++ goto disable; ++ ++skip: ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ++ goto disable; ++ ++enable: ++ if (enable) ++ setup_force_cpu_cap(X86_FEATURE_KAISER); ++ ++ return; ++ ++disable: ++ pr_info("Kernel/User page tables isolation: disabled\n"); ++ kaiser_enabled = 0; ++ setup_clear_cpu_cap(X86_FEATURE_KAISER); ++} ++ + /* + * If anything in here fails, we will likely die on one of the + * first kernel->user transitions and init will die. But, we +@@ -274,12 +312,10 @@ void __init kaiser_init(void) + { + int cpu; + +- if (!kaiser_enabled) { +- setup_clear_cpu_cap(X86_FEATURE_KAISER); +- return; +- } ++ kaiser_check_boottime_disable(); + +- setup_force_cpu_cap(X86_FEATURE_KAISER); ++ if (!kaiser_enabled) ++ return; + + kaiser_init_all_pgds(); + +@@ -423,16 +459,3 @@ void kaiser_flush_tlb_on_return_to_user(void) + X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); + } + EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); +- +-static int __init x86_nokaiser_setup(char *s) +-{ +- /* nopti doesn't accept parameters */ +- if (s) +- return -EINVAL; +- +- kaiser_enabled = 0; +- pr_info("Kernel/User page tables isolation: disabled\n"); +- +- return 0; +-} +-early_param("nopti", x86_nokaiser_setup); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch new file mode 100644 index 00000000..a27e1b16 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch @@ -0,0 +1,73 @@ +From 73e4bfd188d510a576ca75964cd7939d97171e1f Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:44 -0800 +Subject: [PATCH 29/42] x86/usercopy: Replace open coded stac/clac with + __uaccess_{begin, end} + +(cherry picked from commit b5c4ae4f35325d520b230bab6eb3310613b72ac1) + +In preparation for converting some __uaccess_begin() instances to +__uacess_begin_nospec(), make sure all 'from user' uaccess paths are +using the _begin(), _end() helpers rather than open-coded stac() and +clac(). + +No functional changes. + +Suggested-by: Ingo Molnar <mingo@redhat.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Kees Cook <keescook@chromium.org> +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: Al Viro <viro@zeniv.linux.org.uk> +Cc: torvalds@linux-foundation.org +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727416438.33451.17309465232057176966.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/lib/usercopy_32.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c +index 3bc7baf..9b5fa0f 100644 +--- a/arch/x86/lib/usercopy_32.c ++++ b/arch/x86/lib/usercopy_32.c +@@ -570,12 +570,12 @@ do { \ + unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) + { +- stac(); ++ __uaccess_begin(); + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_intel(to, from, n); +- clac(); ++ __uaccess_end(); + return n; + } + EXPORT_SYMBOL(__copy_to_user_ll); +@@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); + unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, + unsigned long n) + { +- stac(); ++ __uaccess_begin(); + #ifdef CONFIG_X86_INTEL_USERCOPY + if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) + n = __copy_user_intel_nocache(to, from, n); +@@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr + #else + __copy_user(to, from, n); + #endif +- clac(); ++ __uaccess_end(); + return n; + } + EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch new file mode 100644 index 00000000..9c34147d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch @@ -0,0 +1,137 @@ +From 9a72e20b9aaf74010d4426ec95e58c262161465e Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Tue, 3 Oct 2017 20:49:04 -0700 +Subject: [PATCH 030/102] kaiser: use ALTERNATIVE instead of + x86_cr3_pcid_noflush + +Now that we're playing the ALTERNATIVE game, use that more efficient +method: instead of user-mapping an extra page, and reading an extra +cacheline each time for x86_cr3_pcid_noflush. + +Neel has found that __stringify(bts $X86_CR3_PCID_NOFLUSH_BIT, %rax) +is a working substitute for the "bts $63, %rax" in these ALTERNATIVEs; +but the one line with $63 in looks clearer, so let's stick with that. + +Worried about what happens with an ALTERNATIVE between the jump and +jump label in another ALTERNATIVE? I was, but have checked the +combinations in SWITCH_KERNEL_CR3_NO_STACK at entry_SYSCALL_64, +and it does a good job. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Acked-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 7 ++++--- + arch/x86/include/asm/kaiser.h | 6 +++--- + arch/x86/mm/kaiser.c | 11 +---------- + 3 files changed, 8 insertions(+), 16 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index bbb38ac..d4ba81e 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1084,7 +1084,8 @@ ENTRY(paranoid_entry) + jz 2f + orl $2, %ebx + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax +- orq x86_cr3_pcid_noflush, %rax ++ /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ ++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID + movq %rax, %cr3 + 2: + #endif +@@ -1342,7 +1343,7 @@ ENTRY(nmi) + /* %rax is saved above, so OK to clobber here */ + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ +- orq x86_cr3_pcid_noflush, %rax ++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax +@@ -1586,7 +1587,7 @@ end_repeat_nmi: + /* %rax is saved above, so OK to clobber here */ + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER + /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ +- orq x86_cr3_pcid_noflush, %rax ++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID + pushq %rax + /* mask off "user" bit of pgd address and 12 PCID bits: */ + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 96643a9..906150d 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -25,7 +25,8 @@ + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +-orq x86_cr3_pcid_noflush, \reg ++/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ ++ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID + movq \reg, %cr3 + .endm + +@@ -39,7 +40,7 @@ movq \reg, %cr3 + movq %cr3, \reg + orq PER_CPU_VAR(x86_cr3_pcid_user), \reg + js 9f +-/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ ++/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */ + movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7) + 9: + movq \reg, %cr3 +@@ -90,7 +91,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + */ + DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + +-extern unsigned long x86_cr3_pcid_noflush; + DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); + + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 1840aa0..b8aa9ad 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -31,7 +31,6 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + * This is also handy because systems that do not support PCIDs + * just end up or'ing a 0 into their CR3, which does no harm. + */ +-unsigned long x86_cr3_pcid_noflush __read_mostly; + DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user); + + /* +@@ -356,10 +355,6 @@ void __init kaiser_init(void) + kaiser_add_user_map_early(&debug_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); +- +- kaiser_add_user_map_early(&x86_cr3_pcid_noflush, +- sizeof(x86_cr3_pcid_noflush), +- __PAGE_KERNEL); + } + + /* Add a mapping to the shadow mapping, and synchronize the mappings */ +@@ -433,18 +428,14 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) + + void kaiser_setup_pcid(void) + { +- unsigned long kern_cr3 = 0; + unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; + +- if (this_cpu_has(X86_FEATURE_PCID)) { +- kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH; ++ if (this_cpu_has(X86_FEATURE_PCID)) + user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; +- } + /* + * These variables are used by the entry/exit + * code to change PCID and pgd and TLB flushing. + */ +- x86_cr3_pcid_noflush = kern_cr3; + this_cpu_write(x86_cr3_pcid_user, user_cr3); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch new file mode 100644 index 00000000..fab4948d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch @@ -0,0 +1,196 @@ +From 268e7abcab638b44ca26107c32bf0c2df0a5b678 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:49 -0800 +Subject: [PATCH 30/42] x86/uaccess: Use __uaccess_begin_nospec() and + uaccess_try_nospec + +(cherry picked from commit 304ec1b050310548db33063e567123fae8fd0301) + +Quoting Linus: + + I do think that it would be a good idea to very expressly document + the fact that it's not that the user access itself is unsafe. I do + agree that things like "get_user()" want to be protected, but not + because of any direct bugs or problems with get_user() and friends, + but simply because get_user() is an excellent source of a pointer + that is obviously controlled from a potentially attacking user + space. So it's a prime candidate for then finding _subsequent_ + accesses that can then be used to perturb the cache. + +__uaccess_begin_nospec() covers __get_user() and copy_from_iter() where the +limit check is far away from the user pointer de-reference. In those cases +a barrier_nospec() prevents speculation with a potential pointer to +privileged memory. uaccess_try_nospec covers get_user_try. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Suggested-by: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: Kees Cook <keescook@chromium.org> +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: Al Viro <viro@zeniv.linux.org.uk> +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727416953.33451.10508284228526170604.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/uaccess.h | 6 +++--- + arch/x86/include/asm/uaccess_32.h | 12 ++++++------ + arch/x86/include/asm/uaccess_64.h | 12 ++++++------ + arch/x86/lib/usercopy_32.c | 4 ++-- + 4 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +index c917703..f80021b 100644 +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -428,7 +428,7 @@ do { \ + ({ \ + int __gu_err; \ + __inttype(*(ptr)) __gu_val; \ +- __uaccess_begin(); \ ++ __uaccess_begin_nospec(); \ + __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + __uaccess_end(); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ +@@ -538,7 +538,7 @@ struct __large_struct { unsigned long buf[100]; }; + * get_user_ex(...); + * } get_user_catch(err) + */ +-#define get_user_try uaccess_try ++#define get_user_try uaccess_try_nospec + #define get_user_catch(err) uaccess_catch(err) + + #define get_user_ex(x, ptr) do { \ +@@ -573,7 +573,7 @@ extern void __cmpxchg_wrong_size(void) + __typeof__(ptr) __uval = (uval); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ +- __uaccess_begin(); \ ++ __uaccess_begin_nospec(); \ + switch (size) { \ + case 1: \ + { \ +diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h +index 7d3bdd1..d6d2450 100644 +--- a/arch/x86/include/asm/uaccess_32.h ++++ b/arch/x86/include/asm/uaccess_32.h +@@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) + + switch (n) { + case 1: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_size(*(u8 *)to, from, 1, ret, 1); + __uaccess_end(); + return ret; + case 2: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_size(*(u16 *)to, from, 2, ret, 2); + __uaccess_end(); + return ret; + case 4: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_size(*(u32 *)to, from, 4, ret, 4); + __uaccess_end(); + return ret; +@@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, + + switch (n) { + case 1: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_size(*(u8 *)to, from, 1, ret, 1); + __uaccess_end(); + return ret; + case 2: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_size(*(u16 *)to, from, 2, ret, 2); + __uaccess_end(); + return ret; + case 4: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_size(*(u32 *)to, from, 4, ret, 4); + __uaccess_end(); + return ret; +diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h +index 673059a..6e5cc08 100644 +--- a/arch/x86/include/asm/uaccess_64.h ++++ b/arch/x86/include/asm/uaccess_64.h +@@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) + return copy_user_generic(dst, (__force void *)src, size); + switch (size) { + case 1: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_asm(*(u8 *)dst, (u8 __user *)src, + ret, "b", "b", "=q", 1); + __uaccess_end(); + return ret; + case 2: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_asm(*(u16 *)dst, (u16 __user *)src, + ret, "w", "w", "=r", 2); + __uaccess_end(); + return ret; + case 4: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_asm(*(u32 *)dst, (u32 __user *)src, + ret, "l", "k", "=r", 4); + __uaccess_end(); + return ret; + case 8: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_asm(*(u64 *)dst, (u64 __user *)src, + ret, "q", "", "=r", 8); + __uaccess_end(); + return ret; + case 10: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_asm(*(u64 *)dst, (u64 __user *)src, + ret, "q", "", "=r", 10); + if (likely(!ret)) +@@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) + __uaccess_end(); + return ret; + case 16: +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + __get_user_asm(*(u64 *)dst, (u64 __user *)src, + ret, "q", "", "=r", 16); + if (likely(!ret)) +diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c +index 9b5fa0f..5c06dbf 100644 +--- a/arch/x86/lib/usercopy_32.c ++++ b/arch/x86/lib/usercopy_32.c +@@ -570,7 +570,7 @@ do { \ + unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) + { +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else +@@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); + unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, + unsigned long n) + { +- __uaccess_begin(); ++ __uaccess_begin_nospec(); + #ifdef CONFIG_X86_INTEL_USERCOPY + if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) + n = __copy_user_intel_nocache(to, from, n); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch new file mode 100644 index 00000000..d43e612e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch @@ -0,0 +1,55 @@ +From a7d5826fa05e5a38ef9314db7fafe753db4f6760 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sun, 29 Oct 2017 11:36:19 -0700 +Subject: [PATCH 031/102] kaiser: drop is_atomic arg to kaiser_pagetable_walk() + +I have not observed a might_sleep() warning from setup_fixmap_gdt()'s +use of kaiser_add_mapping() in our tree (why not?), but like upstream +we have not provided a way for that to pass is_atomic true down to +kaiser_pagetable_walk(), and at startup it's far from a likely source +of trouble: so just delete the walk's is_atomic arg and might_sleep(). + +Signed-off-by: Hugh Dickins <hughd@google.com> +Acked-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index b8aa9ad..65ac3fd 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -107,19 +107,13 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +-static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) ++static pte_t *kaiser_pagetable_walk(unsigned long address) + { + pmd_t *pmd; + pud_t *pud; + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + +- if (is_atomic) { +- gfp &= ~GFP_KERNEL; +- gfp |= __GFP_HIGH | __GFP_ATOMIC; +- } else +- might_sleep(); +- + if (pgd_none(*pgd)) { + WARN_ONCE(1, "All shadow pgds should have been populated"); + return NULL; +@@ -194,7 +188,7 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, + ret = -EIO; + break; + } +- pte = kaiser_pagetable_walk(address, false); ++ pte = kaiser_pagetable_walk(address); + if (!pte) { + ret = -ENOMEM; + break; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch new file mode 100644 index 00000000..c58bff80 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch @@ -0,0 +1,100 @@ +From aa9e88541e4443ffd498e0dd1912b2e658a659e6 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:54 -0800 +Subject: [PATCH 31/42] x86/get_user: Use pointer masking to limit speculation + +(cherry picked from commit c7f631cb07e7da06ac1d231ca178452339e32a94) + +Quoting Linus: + + I do think that it would be a good idea to very expressly document + the fact that it's not that the user access itself is unsafe. I do + agree that things like "get_user()" want to be protected, but not + because of any direct bugs or problems with get_user() and friends, + but simply because get_user() is an excellent source of a pointer + that is obviously controlled from a potentially attacking user + space. So it's a prime candidate for then finding _subsequent_ + accesses that can then be used to perturb the cache. + +Unlike the __get_user() case get_user() includes the address limit check +near the pointer de-reference. With that locality the speculation can be +mitigated with pointer narrowing rather than a barrier, i.e. +array_index_nospec(). Where the narrowing is performed by: + + cmp %limit, %ptr + sbb %mask, %mask + and %mask, %ptr + +With respect to speculation the value of %ptr is either less than %limit +or NULL. + +Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: Kees Cook <keescook@chromium.org> +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: Al Viro <viro@zeniv.linux.org.uk> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: torvalds@linux-foundation.org +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727417469.33451.11804043010080838495.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/lib/getuser.S | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S +index 37b62d4..b12b214 100644 +--- a/arch/x86/lib/getuser.S ++++ b/arch/x86/lib/getuser.S +@@ -39,6 +39,8 @@ ENTRY(__get_user_1) + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ ++ and %_ASM_DX, %_ASM_AX + ASM_STAC + 1: movzbl (%_ASM_AX),%edx + xor %eax,%eax +@@ -53,6 +55,8 @@ ENTRY(__get_user_2) + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ ++ and %_ASM_DX, %_ASM_AX + ASM_STAC + 2: movzwl -1(%_ASM_AX),%edx + xor %eax,%eax +@@ -67,6 +71,8 @@ ENTRY(__get_user_4) + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ ++ and %_ASM_DX, %_ASM_AX + ASM_STAC + 3: movl -3(%_ASM_AX),%edx + xor %eax,%eax +@@ -82,6 +88,8 @@ ENTRY(__get_user_8) + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ ++ and %_ASM_DX, %_ASM_AX + ASM_STAC + 4: movq -7(%_ASM_AX),%rdx + xor %eax,%eax +@@ -93,6 +101,8 @@ ENTRY(__get_user_8) + mov PER_CPU_VAR(current_task), %_ASM_DX + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user_8 ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ ++ and %_ASM_DX, %_ASM_AX + ASM_STAC + 4: movl -7(%_ASM_AX),%edx + 5: movl -3(%_ASM_AX),%ecx +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch new file mode 100644 index 00000000..1b462c50 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch @@ -0,0 +1,88 @@ +From 95c03985a61a61abac25f542f4effd5133ed7a49 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sat, 4 Nov 2017 18:23:24 -0700 +Subject: [PATCH 032/102] kaiser: asm/tlbflush.h handle noPGE at lower level + +I found asm/tlbflush.h too twisty, and think it safer not to avoid +__native_flush_tlb_global_irq_disabled() in the kaiser_enabled case, +but instead let it handle kaiser_enabled along with cr3: it can just +use __native_flush_tlb() for that, no harm in re-disabling preemption. + +(This is not the same change as Kirill and Dave have suggested for +upstream, flipping PGE in cr4: that's neat, but needs a cpu_has_pge +check; cr3 is enough for kaiser, and thought to be cheaper than cr4.) + +Also delete the X86_FEATURE_INVPCID invpcid_flush_all_nonglobals() +preference from __native_flush_tlb(): unlike the invpcid_flush_all() +preference in __native_flush_tlb_global(), it's not seen in upstream +4.14, and was recently reported to be surprisingly slow. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Acked-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tlbflush.h | 27 +++------------------------ + 1 file changed, 3 insertions(+), 24 deletions(-) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 13a74f6..bc6f979 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -153,14 +153,6 @@ static inline void kaiser_flush_tlb_on_return_to_user(void) + + static inline void __native_flush_tlb(void) + { +- if (this_cpu_has(X86_FEATURE_INVPCID)) { +- /* +- * Note, this works with CR4.PCIDE=0 or 1. +- */ +- invpcid_flush_all_nonglobals(); +- return; +- } +- + /* + * If current->mm == NULL then we borrow a mm which may change during a + * task switch and therefore we must not be preempted while we write CR3 +@@ -184,11 +176,8 @@ static inline void __native_flush_tlb_global_irq_disabled(void) + /* restore PGE as it was before */ + native_write_cr4(cr4); + } else { +- /* +- * x86_64 microcode update comes this way when CR4.PGE is not +- * enabled, and it's safer for all callers to allow this case. +- */ +- native_write_cr3(native_read_cr3()); ++ /* do it with cr3, letting kaiser flush user PCID */ ++ __native_flush_tlb(); + } + } + +@@ -196,12 +185,6 @@ static inline void __native_flush_tlb_global(void) + { + unsigned long flags; + +- if (kaiser_enabled) { +- /* Globals are not used at all */ +- __native_flush_tlb(); +- return; +- } +- + if (this_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes +@@ -257,11 +240,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) + + static inline void __flush_tlb_all(void) + { +- if (boot_cpu_has(X86_FEATURE_PGE)) +- __flush_tlb_global(); +- else +- __flush_tlb(); +- ++ __flush_tlb_global(); + /* + * Note: if we somehow had PCID but not PGE, then this wouldn't work -- + * we'd end up flushing kernel translations for the current ASID but +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch new file mode 100644 index 00000000..8acd9616 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch @@ -0,0 +1,64 @@ +From 0c89c81045ecacb413a4cd61ec5187f7aa688074 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:02:59 -0800 +Subject: [PATCH 32/42] x86/syscall: Sanitize syscall table de-references under + speculation + +(cherry picked from commit 2fbd7af5af8665d18bcefae3e9700be07e22b681) + +The syscall table base is a user controlled function pointer in kernel +space. Use array_index_nospec() to prevent any out of bounds speculation. + +While retpoline prevents speculating into a userspace directed target it +does not stop the pointer de-reference, the concern is leaking memory +relative to the syscall table base, by observing instruction cache +behavior. + +Reported-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: Andy Lutomirski <luto@kernel.org> +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727417984.33451.1216731042505722161.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/common.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c +index bd1d102..b0cd306 100644 +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -20,6 +20,7 @@ + #include <linux/export.h> + #include <linux/context_tracking.h> + #include <linux/user-return-notifier.h> ++#include <linux/nospec.h> + #include <linux/uprobes.h> + + #include <asm/desc.h> +@@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs) + * regs->orig_ax, which changes the behavior of some syscalls. + */ + if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { +- regs->ax = sys_call_table[nr & __SYSCALL_MASK]( ++ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); ++ regs->ax = sys_call_table[nr]( + regs->di, regs->si, regs->dx, + regs->r10, regs->r8, regs->r9); + } +@@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) + } + + if (likely(nr < IA32_NR_syscalls)) { ++ nr = array_index_nospec(nr, IA32_NR_syscalls); + /* + * It's possible that a 32-bit syscall implementation + * takes a 64-bit parameter but nonetheless assumes that +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch new file mode 100644 index 00000000..85d10cc6 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch @@ -0,0 +1,93 @@ +From e57b7ded6155d14093d87783a34eb2d33384b059 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sat, 4 Nov 2017 18:43:06 -0700 +Subject: [PATCH 033/102] kaiser: kaiser_flush_tlb_on_return_to_user() check + PCID + +Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID +check, instead of each caller doing it inline first: nobody needs +to optimize for the noPCID case, it's clearer this way, and better +suits later changes. Replace those no-op X86_CR3_PCID_KERN_FLUSH lines +by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Acked-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tlbflush.h | 4 ++-- + arch/x86/mm/kaiser.c | 6 +++--- + arch/x86/mm/tlb.c | 8 ++++---- + 3 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index bc6f979..8db339a 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -159,7 +159,7 @@ static inline void __native_flush_tlb(void) + * back: + */ + preempt_disable(); +- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) ++ if (kaiser_enabled) + kaiser_flush_tlb_on_return_to_user(); + native_write_cr3(native_read_cr3()); + preempt_enable(); +@@ -218,7 +218,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) + */ + + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { +- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) ++ if (kaiser_enabled) + kaiser_flush_tlb_on_return_to_user(); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + return; +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 65ac3fd..8600663 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -435,12 +435,12 @@ void kaiser_setup_pcid(void) + + /* + * Make a note that this cpu will need to flush USER tlb on return to user. +- * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling: +- * if cpu does not, then the NOFLUSH bit will never have been set. ++ * If cpu does not have PCID, then the NOFLUSH bit will never have been set. + */ + void kaiser_flush_tlb_on_return_to_user(void) + { +- this_cpu_write(x86_cr3_pcid_user, ++ if (this_cpu_has(X86_FEATURE_PCID)) ++ this_cpu_write(x86_cr3_pcid_user, + X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); + } + EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index fde44bb..e81f8bb 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -41,7 +41,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) + { + unsigned long new_mm_cr3 = __pa(pgdir); + +- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { ++ if (kaiser_enabled) { + /* + * We reuse the same PCID for different tasks, so we must + * flush all the entries for the PCID out when we change tasks. +@@ -52,10 +52,10 @@ static void load_new_mm_cr3(pgd_t *pgdir) + * do it here, but can only be used if X86_FEATURE_INVPCID is + * available - and many machines support pcid without invpcid. + * +- * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0; +- * but keep that line in there in case something changes. ++ * If X86_CR3_PCID_KERN_FLUSH actually added something, then it ++ * would be needed in the write_cr3() below - if PCIDs enabled. + */ +- new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; ++ BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH); + kaiser_flush_tlb_on_return_to_user(); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch new file mode 100644 index 00000000..d9334b8e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch @@ -0,0 +1,57 @@ +From daf0f36d9103ecacecf426f868c8608e7e3edd95 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:03:05 -0800 +Subject: [PATCH 33/42] vfs, fdtable: Prevent bounds-check bypass via + speculative execution + +(cherry picked from commit 56c30ba7b348b90484969054d561f711ba196507) + +'fd' is a user controlled value that is used as a data dependency to +read from the 'fdt->fd' array. In order to avoid potential leaks of +kernel memory values, block speculative execution of the instruction +stream that could issue reads based on an invalid 'file *' returned from +__fcheck_files. + +Co-developed-by: Elena Reshetova <elena.reshetova@intel.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: Al Viro <viro@zeniv.linux.org.uk> +Cc: torvalds@linux-foundation.org +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/fdtable.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h +index 6e84b2cae..442b54a 100644 +--- a/include/linux/fdtable.h ++++ b/include/linux/fdtable.h +@@ -9,6 +9,7 @@ + #include <linux/compiler.h> + #include <linux/spinlock.h> + #include <linux/rcupdate.h> ++#include <linux/nospec.h> + #include <linux/types.h> + #include <linux/init.h> + #include <linux/fs.h> +@@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i + { + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + +- if (fd < fdt->max_fds) ++ if (fd < fdt->max_fds) { ++ fd = array_index_nospec(fd, fdt->max_fds); + return rcu_dereference_raw(fdt->fd[fd]); ++ } + return NULL; + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-paravirt-Dont-patch-flush_tlb_single.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-paravirt-Dont-patch-flush_tlb_single.patch new file mode 100644 index 00000000..aad935dc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-paravirt-Dont-patch-flush_tlb_single.patch @@ -0,0 +1,71 @@ +From 7ca54ff851a9411364d557f555edb904f2786c5c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Mon, 4 Dec 2017 15:07:30 +0100 +Subject: [PATCH 034/102] x86/paravirt: Dont patch flush_tlb_single + +commit a035795499ca1c2bd1928808d1a156eda1420383 upstream + +native_flush_tlb_single() will be changed with the upcoming +PAGE_TABLE_ISOLATION feature. This requires to have more code in +there than INVLPG. + +Remove the paravirt patching for it. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +Acked-by: Peter Zijlstra <peterz@infradead.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Borislav Petkov <bpetkov@suse.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Laight <David.Laight@aculab.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: Eduardo Valentin <eduval@amazon.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: Will Deacon <will.deacon@arm.com> +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Cc: linux-mm@kvack.org +Cc: michael.schwarz@iaik.tugraz.at +Cc: moritz.lipp@iaik.tugraz.at +Cc: richard.fellner@student.tugraz.at +Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Acked-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/paravirt_patch_64.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c +index bb3840c..ee43b36 100644 +--- a/arch/x86/kernel/paravirt_patch_64.c ++++ b/arch/x86/kernel/paravirt_patch_64.c +@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); + DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); + DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); + DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); +-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); + DEF_NATIVE(pv_cpu_ops, clts, "clts"); + DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); + +@@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, + PATCH_SITE(pv_mmu_ops, read_cr3); + PATCH_SITE(pv_mmu_ops, write_cr3); + PATCH_SITE(pv_cpu_ops, clts); +- PATCH_SITE(pv_mmu_ops, flush_tlb_single); + PATCH_SITE(pv_cpu_ops, wbinvd); + #if defined(CONFIG_PARAVIRT_SPINLOCKS) + case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch new file mode 100644 index 00000000..8703f68f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch @@ -0,0 +1,43 @@ +From 793cff3e2e196a3287441de5c10c969d031ae64c Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Mon, 29 Jan 2018 17:03:21 -0800 +Subject: [PATCH 34/42] x86/spectre: Report get_user mitigation for spectre_v1 + +(cherry picked from commit edfbae53dab8348fca778531be9f4855d2ca0360) + +Reflect the presence of get_user(), __get_user(), and 'syscall' protections +in sysfs. The expectation is that new and better tooling will allow the +kernel to grow more usages of array_index_nospec(), for now, only claim +mitigation for __user pointer de-references. + +Reported-by: Jiri Slaby <jslaby@suse.cz> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-arch@vger.kernel.org +Cc: kernel-hardening@lists.openwall.com +Cc: gregkh@linuxfoundation.org +Cc: torvalds@linux-foundation.org +Cc: alan@linux.intel.com +Link: https://lkml.kernel.org/r/151727420158.33451.11658324346540434635.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 51624c6..d4658e0 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -296,7 +296,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, + { + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); +- return sprintf(buf, "Vulnerable\n"); ++ return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + } + + ssize_t cpu_show_spectre_v2(struct device *dev, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-kaiser-Reenable-PARAVIRT.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-kaiser-Reenable-PARAVIRT.patch new file mode 100644 index 00000000..7d9090de --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-kaiser-Reenable-PARAVIRT.patch @@ -0,0 +1,30 @@ +From 8faf9d3b08e32650dcc838b1a45be05a3b1ef8c7 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Tue, 2 Jan 2018 14:19:49 +0100 +Subject: [PATCH 035/102] x86/kaiser: Reenable PARAVIRT + +Now that the required bits have been addressed, reenable +PARAVIRT. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + security/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/security/Kconfig b/security/Kconfig +index d8ae933..fd2ceeb 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -34,7 +34,7 @@ config SECURITY + config KAISER + bool "Remove the kernel mapping in user mode" + default y +- depends on X86_64 && SMP && !PARAVIRT ++ depends on X86_64 && SMP + help + This enforces a strict kernel and user space isolation, in order + to close hardware side channels on kernel address information. +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch new file mode 100644 index 00000000..6308fc6d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch @@ -0,0 +1,41 @@ +From 5f49c69f0110c99880f0d85cf96e7cc60acd4987 Mon Sep 17 00:00:00 2001 +From: Colin Ian King <colin.king@canonical.com> +Date: Tue, 30 Jan 2018 19:32:18 +0000 +Subject: [PATCH 35/42] x86/spectre: Fix spelling mistake: "vunerable"-> + "vulnerable" + +(cherry picked from commit e698dcdfcda41efd0984de539767b4cddd235f1e) + +Trivial fix to spelling mistake in pr_err error message text. + +Signed-off-by: Colin Ian King <colin.king@canonical.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: kernel-janitors@vger.kernel.org +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@suse.de> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Link: https://lkml.kernel.org/r/20180130193218.9271-1-colin.king@canonical.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index d4658e0..aec7daf 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -102,7 +102,7 @@ bool retpoline_module_ok(bool has_retpoline) + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) + return true; + +- pr_err("System may be vunerable to spectre v2\n"); ++ pr_err("System may be vulnerable to spectre v2\n"); + spectre_v2_bad_module = true; + return false; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-kaiser-disabled-on-Xen-PV.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-kaiser-disabled-on-Xen-PV.patch new file mode 100644 index 00000000..c3b92286 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-kaiser-disabled-on-Xen-PV.patch @@ -0,0 +1,44 @@ +From 4fcc032fb48c21d614c5a691092f923880c94232 Mon Sep 17 00:00:00 2001 +From: Jiri Kosina <jkosina@suse.cz> +Date: Tue, 2 Jan 2018 14:19:49 +0100 +Subject: [PATCH 036/102] kaiser: disabled on Xen PV + +Kaiser cannot be used on paravirtualized MMUs (namely reading and writing CR3). +This does not work with KAISER as the CR3 switch from and to user space PGD +would require to map the whole XEN_PV machinery into both. + +More importantly, enabling KAISER on Xen PV doesn't make too much sense, as PV +guests use distinct %cr3 values for kernel and user already. + +Signed-off-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 8600663..2768854 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -263,6 +263,9 @@ void __init kaiser_check_boottime_disable(void) + char arg[5]; + int ret; + ++ if (boot_cpu_has(X86_FEATURE_XENPV)) ++ goto silent_disable; ++ + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); + if (ret > 0) { + if (!strncmp(arg, "on", 2)) +@@ -290,6 +293,8 @@ void __init kaiser_check_boottime_disable(void) + + disable: + pr_info("Kernel/User page tables isolation: disabled\n"); ++ ++silent_disable: + kaiser_enabled = 0; + setup_clear_cpu_cap(X86_FEATURE_KAISER); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch new file mode 100644 index 00000000..54039e5f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch @@ -0,0 +1,127 @@ +From 230aaaad00ca4c1e2c350ce30188d03417a170fe Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Tue, 30 Jan 2018 14:30:23 +0000 +Subject: [PATCH 36/42] x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature + bits on Intel + +(cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b) + +Despite the fact that all the other code there seems to be doing it, just +using set_cpu_cap() in early_intel_init() doesn't actually work. + +For CPUs with PKU support, setup_pku() calls get_cpu_cap() after +c->c_init() has set those feature bits. That resets those bits back to what +was queried from the hardware. + +Turning the bits off for bad microcode is easy to fix. That can just use +setup_clear_cpu_cap() to force them off for all CPUs. + +I was less keen on forcing the feature bits *on* that way, just in case +of inconsistencies. I appreciate that the kernel is going to get this +utterly wrong if CPU features are not consistent, because it has already +applied alternatives by the time secondary CPUs are brought up. + +But at least if setup_force_cpu_cap() isn't being used, we might have a +chance of *detecting* the lack of the corresponding bit and either +panicking or refusing to bring the offending CPU online. + +So ensure that the appropriate feature bits are set within get_cpu_cap() +regardless of how many extra times it's called. + +Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags") +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: karahmed@amazon.de +Cc: peterz@infradead.org +Cc: bp@alien8.de +Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ + arch/x86/kernel/cpu/intel.c | 27 ++++++++------------------- + 2 files changed, 29 insertions(+), 19 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index cfa026f..60e537d 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -718,6 +718,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) + } + } + ++static void init_speculation_control(struct cpuinfo_x86 *c) ++{ ++ /* ++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, ++ * and they also have a different bit for STIBP support. Also, ++ * a hypervisor might have set the individual AMD bits even on ++ * Intel CPUs, for finer-grained selection of what's available. ++ * ++ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware ++ * features, which are visible in /proc/cpuinfo and used by the ++ * kernel. So set those accordingly from the Intel bits. ++ */ ++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { ++ set_cpu_cap(c, X86_FEATURE_IBRS); ++ set_cpu_cap(c, X86_FEATURE_IBPB); ++ } ++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) ++ set_cpu_cap(c, X86_FEATURE_STIBP); ++} ++ + void get_cpu_cap(struct cpuinfo_x86 *c) + { + u32 eax, ebx, ecx, edx; +@@ -812,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) + c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + + init_scattered_cpuid_features(c); ++ init_speculation_control(c); + } + + static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 2e257f8..4097b43 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -140,28 +140,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) + rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); + } + +- /* +- * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, +- * and they also have a different bit for STIBP support. Also, +- * a hypervisor might have set the individual AMD bits even on +- * Intel CPUs, for finer-grained selection of what's available. +- */ +- if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { +- set_cpu_cap(c, X86_FEATURE_IBRS); +- set_cpu_cap(c, X86_FEATURE_IBPB); +- } +- if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) +- set_cpu_cap(c, X86_FEATURE_STIBP); +- + /* Now if any of them are set, check the blacklist and clear the lot */ +- if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || ++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || ++ cpu_has(c, X86_FEATURE_INTEL_STIBP) || ++ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); +- clear_cpu_cap(c, X86_FEATURE_IBRS); +- clear_cpu_cap(c, X86_FEATURE_IBPB); +- clear_cpu_cap(c, X86_FEATURE_STIBP); +- clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); +- clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); ++ setup_clear_cpu_cap(X86_FEATURE_IBRS); ++ setup_clear_cpu_cap(X86_FEATURE_IBPB); ++ setup_clear_cpu_cap(X86_FEATURE_STIBP); ++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); ++ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-kaiser-Move-feature-detection-up.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-kaiser-Move-feature-detection-up.patch new file mode 100644 index 00000000..f59d427c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-kaiser-Move-feature-detection-up.patch @@ -0,0 +1,85 @@ +From 8190c42189289770ebddf8dd479aea223665637d Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Mon, 25 Dec 2017 13:57:16 +0100 +Subject: [PATCH 037/102] x86/kaiser: Move feature detection up + +... before the first use of kaiser_enabled as otherwise funky +things happen: + + about to get started... + (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0000] + (XEN) Pagetable walk from ffff88022a449090: + (XEN) L4[0x110] = 0000000229e0e067 0000000000001e0e + (XEN) L3[0x008] = 0000000000000000 ffffffffffffffff + (XEN) domain_crash_sync called from entry.S: fault at ffff82d08033fd08 + entry.o#create_bounce_frame+0x135/0x14d + (XEN) Domain 0 (vcpu#0) crashed on cpu#0: + (XEN) ----[ Xen-4.9.1_02-3.21 x86_64 debug=n Not tainted ]---- + (XEN) CPU: 0 + (XEN) RIP: e033:[<ffffffff81007460>] + (XEN) RFLAGS: 0000000000000286 EM: 1 CONTEXT: pv guest (d0v0) + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kaiser.h | 2 ++ + arch/x86/kernel/setup.c | 7 +++++++ + arch/x86/mm/kaiser.c | 2 -- + 3 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 906150d..b5e46aa 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -96,8 +96,10 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; + + extern int kaiser_enabled; ++extern void __init kaiser_check_boottime_disable(void); + #else + #define kaiser_enabled 0 ++static inline void __init kaiser_check_boottime_disable(void) {} + #endif /* CONFIG_KAISER */ + + /* +diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c +index 9c337b0..545a95a 100644 +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -114,6 +114,7 @@ + #include <asm/microcode.h> + #include <asm/mmu_context.h> + #include <asm/kaslr.h> ++#include <asm/kaiser.h> + + /* + * max_low_pfn_mapped: highest direct mapped pfn under 4GB +@@ -1019,6 +1020,12 @@ void __init setup_arch(char **cmdline_p) + */ + init_hypervisor_platform(); + ++ /* ++ * This needs to happen right after XENPV is set on xen and ++ * kaiser_enabled is checked below in cleanup_highmap(). ++ */ ++ kaiser_check_boottime_disable(); ++ + x86_init.resources.probe_roms(); + + /* after parse_early_param, so could debug it */ +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 2768854..d43f369 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -310,8 +310,6 @@ void __init kaiser_init(void) + { + int cpu; + +- kaiser_check_boottime_disable(); +- + if (!kaiser_enabled) + return; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch new file mode 100644 index 00000000..846ec86f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch @@ -0,0 +1,54 @@ +From 72e87893e6f14922dcd6231a7676bac67154dae8 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 1 Feb 2018 11:27:20 +0000 +Subject: [PATCH 37/42] x86/retpoline: Avoid retpolines for built-in __init + functions + +(cherry picked from commit 66f793099a636862a71c59d4a6ba91387b155e0c) + +There's no point in building init code with retpolines, since it runs before +any potentially hostile userspace does. And before the retpoline is actually +ALTERNATIVEd into place, for much of it. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: karahmed@amazon.de +Cc: peterz@infradead.org +Cc: bp@alien8.de +Link: https://lkml.kernel.org/r/1517484441-1420-2-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/init.h | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/include/linux/init.h b/include/linux/init.h +index e30104c..8e346d1 100644 +--- a/include/linux/init.h ++++ b/include/linux/init.h +@@ -4,6 +4,13 @@ + #include <linux/compiler.h> + #include <linux/types.h> + ++/* Built-in __init functions needn't be compiled with retpoline */ ++#if defined(RETPOLINE) && !defined(MODULE) ++#define __noretpoline __attribute__((indirect_branch("keep"))) ++#else ++#define __noretpoline ++#endif ++ + /* These macros are used to mark some functions or + * initialized data (doesn't apply to uninitialized data) + * as `initialization' functions. The kernel can take this +@@ -39,7 +46,7 @@ + + /* These are for everybody (although not all archs will actually + discard it in modules) */ +-#define __init __section(.init.text) __cold notrace __latent_entropy ++#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline + #define __initdata __section(.init.data) + #define __initconst __section(.init.rodata) + #define __exitdata __section(.exit.data) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch new file mode 100644 index 00000000..bd48e9c6 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch @@ -0,0 +1,359 @@ +From 4c484c8da3f97360d9451ac79a6f687d0155088e Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Wed, 3 Jan 2018 10:17:35 -0800 +Subject: [PATCH 038/102] KPTI: Rename to PAGE_TABLE_ISOLATION + +This renames CONFIG_KAISER to CONFIG_PAGE_TABLE_ISOLATION. + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/boot/compressed/misc.h | 2 +- + arch/x86/entry/entry_64.S | 12 ++++++------ + arch/x86/events/intel/ds.c | 4 ++-- + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/include/asm/kaiser.h | 12 ++++++------ + arch/x86/include/asm/pgtable.h | 4 ++-- + arch/x86/include/asm/pgtable_64.h | 4 ++-- + arch/x86/include/asm/pgtable_types.h | 2 +- + arch/x86/include/asm/tlbflush.h | 2 +- + arch/x86/kernel/head_64.S | 2 +- + arch/x86/mm/Makefile | 2 +- + arch/x86/mm/kaslr.c | 2 +- + include/linux/kaiser.h | 6 +++--- + include/linux/percpu-defs.h | 2 +- + security/Kconfig | 2 +- + tools/arch/x86/include/asm/cpufeatures.h | 2 +- + 16 files changed, 31 insertions(+), 31 deletions(-) + +diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h +index cd80024..4f4c42a 100644 +--- a/arch/x86/boot/compressed/misc.h ++++ b/arch/x86/boot/compressed/misc.h +@@ -9,7 +9,7 @@ + */ + #undef CONFIG_PARAVIRT + #undef CONFIG_PARAVIRT_SPINLOCKS +-#undef CONFIG_KAISER ++#undef CONFIG_PAGE_TABLE_ISOLATION + #undef CONFIG_KASAN + + #include <linux/linkage.h> +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index d4ba81e..5bb9b02 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1071,7 +1071,7 @@ ENTRY(paranoid_entry) + SWAPGS + xorl %ebx, %ebx + 1: +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * We might have come in between a swapgs and a SWITCH_KERNEL_CR3 + * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit. +@@ -1111,7 +1111,7 @@ ENTRY(paranoid_exit) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF_DEBUG + TRACE_IRQS_IRETQ_DEBUG +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ + testl $2, %ebx /* SWITCH_USER_CR3 needed? */ + jz paranoid_exit_no_switch +@@ -1338,7 +1338,7 @@ ENTRY(nmi) + + movq %rsp, %rdi + movq $-1, %rsi +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER +@@ -1352,7 +1352,7 @@ ENTRY(nmi) + #endif + call do_nmi + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Unconditionally restore CR3. I know we return to + * kernel code that needs user CR3, but do we ever return +@@ -1582,7 +1582,7 @@ end_repeat_nmi: + 1: + movq %rsp, %rdi + movq $-1, %rsi +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* Unconditionally use kernel CR3 for do_nmi() */ + /* %rax is saved above, so OK to clobber here */ + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER +@@ -1598,7 +1598,7 @@ end_repeat_nmi: + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ + call do_nmi + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Unconditionally restore CR3. We might be returning to + * kernel code that needs user CR3, like just just before +diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c +index c2e4ae2..f97d8b4 100644 +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -274,7 +274,7 @@ static DEFINE_PER_CPU(void *, insn_buffer); + + static void *dsalloc(size_t size, gfp_t flags, int node) + { +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + unsigned int order = get_order(size); + struct page *page; + unsigned long addr; +@@ -295,7 +295,7 @@ static void *dsalloc(size_t size, gfp_t flags, int node) + + static void dsfree(const void *buffer, size_t size) + { +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!buffer) + return; + kaiser_remove_mapping((unsigned long)buffer, size); +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 20271d6..454a37a 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -199,7 +199,7 @@ + #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +-#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ ++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index b5e46aa..802bbbd 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -20,7 +20,7 @@ + #define KAISER_SHADOW_PGD_OFFSET 0x1000 + + #ifdef __ASSEMBLY__ +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg +@@ -69,7 +69,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + 8: + .endm + +-#else /* CONFIG_KAISER */ ++#else /* CONFIG_PAGE_TABLE_ISOLATION */ + + .macro SWITCH_KERNEL_CR3 + .endm +@@ -78,11 +78,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + .macro SWITCH_KERNEL_CR3_NO_STACK + .endm + +-#endif /* CONFIG_KAISER */ ++#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + + #else /* __ASSEMBLY__ */ + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Upon kernel/user mode switch, it may happen that the address + * space has to be switched before the registers have been +@@ -100,10 +100,10 @@ extern void __init kaiser_check_boottime_disable(void); + #else + #define kaiser_enabled 0 + static inline void __init kaiser_check_boottime_disable(void) {} +-#endif /* CONFIG_KAISER */ ++#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + + /* +- * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, ++ * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set, + * so as to build with tests on kaiser_enabled instead of #ifdefs. + */ + +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 217e83a..2536f90 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -18,7 +18,7 @@ + #ifndef __ASSEMBLY__ + #include <asm/x86_init.h> + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + extern int kaiser_enabled; + #else + #define kaiser_enabled 0 +@@ -920,7 +920,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, + static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) + { + memcpy(dst, src, count * sizeof(pgd_t)); +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (kaiser_enabled) { + /* Clone the shadow pgd part as well */ + memcpy(native_get_shadow_pgd(dst), +diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h +index cf68b5c..ce97c8c6 100644 +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -106,7 +106,7 @@ static inline void native_pud_clear(pud_t *pud) + native_set_pud(pud, native_make_pud(0)); + } + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); + + static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) +@@ -127,7 +127,7 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) + BUILD_BUG_ON(1); + return NULL; + } +-#endif /* CONFIG_KAISER */ ++#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) + { +diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h +index f0d9a1a..f1c8ac4 100644 +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -144,7 +144,7 @@ + #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) + #define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) + +-#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) ++#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64) + /* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ + #define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 8db339a..183af59 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -137,7 +137,7 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) + * Declare a couple of kaiser interfaces here for convenience, + * to avoid the need for asm/kaiser.h in unexpected places. + */ +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + extern int kaiser_enabled; + extern void kaiser_setup_pcid(void); + extern void kaiser_flush_tlb_on_return_to_user(void); +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index d04479b..67cd7c1 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -405,7 +405,7 @@ GLOBAL(early_recursion_flag) + .balign PAGE_SIZE; \ + GLOBAL(name) + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Each PGD needs to be 8k long and 8k aligned. We do not + * ever go out to userspace with these, so we do not +diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile +index c505569..c548b46 100644 +--- a/arch/x86/mm/Makefile ++++ b/arch/x86/mm/Makefile +@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o + obj-$(CONFIG_X86_INTEL_MPX) += mpx.o + obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o + obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +-obj-$(CONFIG_KAISER) += kaiser.o ++obj-$(CONFIG_PAGE_TABLE_ISOLATION) += kaiser.o +diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c +index 9284ec1..319183d 100644 +--- a/arch/x86/mm/kaslr.c ++++ b/arch/x86/mm/kaslr.c +@@ -189,6 +189,6 @@ void __meminit init_trampoline(void) + *pud_tramp = *pud; + } + +- /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */ ++ /* Avoid set_pgd(), in case it's complicated by CONFIG_PAGE_TABLE_ISOLATION */ + trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); + } +diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h +index 4a4d6d9..58c55b1 100644 +--- a/include/linux/kaiser.h ++++ b/include/linux/kaiser.h +@@ -1,7 +1,7 @@ + #ifndef _LINUX_KAISER_H + #define _LINUX_KAISER_H + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + #include <asm/kaiser.h> + + static inline int kaiser_map_thread_stack(void *stack) +@@ -24,7 +24,7 @@ static inline void kaiser_unmap_thread_stack(void *stack) + #else + + /* +- * These stubs are used whenever CONFIG_KAISER is off, which ++ * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which + * includes architectures that support KAISER, but have it disabled. + */ + +@@ -48,5 +48,5 @@ static inline void kaiser_unmap_thread_stack(void *stack) + { + } + +-#endif /* !CONFIG_KAISER */ ++#endif /* !CONFIG_PAGE_TABLE_ISOLATION */ + #endif /* _LINUX_KAISER_H */ +diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h +index cfe13cb..8902f23 100644 +--- a/include/linux/percpu-defs.h ++++ b/include/linux/percpu-defs.h +@@ -35,7 +35,7 @@ + + #endif + +-#ifdef CONFIG_KAISER ++#ifdef CONFIG_PAGE_TABLE_ISOLATION + #define USER_MAPPED_SECTION "..user_mapped" + #else + #define USER_MAPPED_SECTION "" +diff --git a/security/Kconfig b/security/Kconfig +index fd2ceeb..32f36b4 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -31,7 +31,7 @@ config SECURITY + + If you are unsure how to answer this question, answer N. + +-config KAISER ++config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on X86_64 && SMP +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index 67c93d9..f79669a 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -198,7 +198,7 @@ + #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +-#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ ++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch new file mode 100644 index 00000000..ad179306 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch @@ -0,0 +1,141 @@ +From 825c7a1a9545787191c7dec21823a4b854dd8172 Mon Sep 17 00:00:00 2001 +From: KarimAllah Ahmed <karahmed@amazon.de> +Date: Thu, 1 Feb 2018 11:27:21 +0000 +Subject: [PATCH 38/42] x86/spectre: Simplify spectre_v2 command line parsing + +(cherry picked from commit 9005c6834c0ffdfe46afa76656bd9276cca864f6) + +[dwmw2: Use ARRAY_SIZE] + +Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: peterz@infradead.org +Cc: bp@alien8.de +Link: https://lkml.kernel.org/r/1517484441-1420-3-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 86 ++++++++++++++++++++++++++++++---------------- + 1 file changed, 56 insertions(+), 30 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index aec7daf..957ad44 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -118,13 +118,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; } + static void __init spec2_print_if_insecure(const char *reason) + { + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +- pr_info("%s\n", reason); ++ pr_info("%s selected on command line.\n", reason); + } + + static void __init spec2_print_if_secure(const char *reason) + { + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +- pr_info("%s\n", reason); ++ pr_info("%s selected on command line.\n", reason); + } + + static inline bool retp_compiler(void) +@@ -139,42 +139,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) + return len == arglen && !strncmp(arg, opt, len); + } + ++static const struct { ++ const char *option; ++ enum spectre_v2_mitigation_cmd cmd; ++ bool secure; ++} mitigation_options[] = { ++ { "off", SPECTRE_V2_CMD_NONE, false }, ++ { "on", SPECTRE_V2_CMD_FORCE, true }, ++ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, ++ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, ++ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, ++ { "auto", SPECTRE_V2_CMD_AUTO, false }, ++}; ++ + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + { + char arg[20]; +- int ret; +- +- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, +- sizeof(arg)); +- if (ret > 0) { +- if (match_option(arg, ret, "off")) { +- goto disable; +- } else if (match_option(arg, ret, "on")) { +- spec2_print_if_secure("force enabled on command line."); +- return SPECTRE_V2_CMD_FORCE; +- } else if (match_option(arg, ret, "retpoline")) { +- spec2_print_if_insecure("retpoline selected on command line."); +- return SPECTRE_V2_CMD_RETPOLINE; +- } else if (match_option(arg, ret, "retpoline,amd")) { +- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { +- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); +- return SPECTRE_V2_CMD_AUTO; +- } +- spec2_print_if_insecure("AMD retpoline selected on command line."); +- return SPECTRE_V2_CMD_RETPOLINE_AMD; +- } else if (match_option(arg, ret, "retpoline,generic")) { +- spec2_print_if_insecure("generic retpoline selected on command line."); +- return SPECTRE_V2_CMD_RETPOLINE_GENERIC; +- } else if (match_option(arg, ret, "auto")) { ++ int ret, i; ++ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; ++ ++ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) ++ return SPECTRE_V2_CMD_NONE; ++ else { ++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, ++ sizeof(arg)); ++ if (ret < 0) ++ return SPECTRE_V2_CMD_AUTO; ++ ++ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { ++ if (!match_option(arg, ret, mitigation_options[i].option)) ++ continue; ++ cmd = mitigation_options[i].cmd; ++ break; ++ } ++ ++ if (i >= ARRAY_SIZE(mitigation_options)) { ++ pr_err("unknown option (%s). Switching to AUTO select\n", ++ mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + } + +- if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) ++ if ((cmd == SPECTRE_V2_CMD_RETPOLINE || ++ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || ++ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && ++ !IS_ENABLED(CONFIG_RETPOLINE)) { ++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", ++ mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; +-disable: +- spec2_print_if_insecure("disabled on command line."); +- return SPECTRE_V2_CMD_NONE; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { ++ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (mitigation_options[i].secure) ++ spec2_print_if_secure(mitigation_options[i].option); ++ else ++ spec2_print_if_insecure(mitigation_options[i].option); ++ ++ return cmd; + } + + /* Check for Skylake-like CPUs (for RSB handling) */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-KPTI-Report-when-enabled.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-KPTI-Report-when-enabled.patch new file mode 100644 index 00000000..2827a652 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-KPTI-Report-when-enabled.patch @@ -0,0 +1,48 @@ +From a6060f8fa954e0e863ab51937ceca40627d81bcd Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Wed, 3 Jan 2018 10:18:01 -0800 +Subject: [PATCH 039/102] KPTI: Report when enabled + +Make sure dmesg reports when KPTI is enabled. + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index d43f369..b6b0f3a 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -10,6 +10,9 @@ + #include <linux/mm.h> + #include <linux/uaccess.h> + ++#undef pr_fmt ++#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt ++ + #include <asm/kaiser.h> + #include <asm/tlbflush.h> /* to verify its kaiser declarations */ + #include <asm/pgtable.h> +@@ -292,7 +295,7 @@ void __init kaiser_check_boottime_disable(void) + return; + + disable: +- pr_info("Kernel/User page tables isolation: disabled\n"); ++ pr_info("disabled\n"); + + silent_disable: + kaiser_enabled = 0; +@@ -352,6 +355,8 @@ void __init kaiser_init(void) + kaiser_add_user_map_early(&debug_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); ++ ++ pr_info("enabled\n"); + } + + /* Add a mapping to the shadow mapping, and synchronize the mappings */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-pti-Mark-constant-arrays-as-__initconst.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-pti-Mark-constant-arrays-as-__initconst.patch new file mode 100644 index 00000000..a53ec46b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-pti-Mark-constant-arrays-as-__initconst.patch @@ -0,0 +1,55 @@ +From a89a8bf00b6ad57d89f9d42ae682f7367fcd0d27 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann <arnd@arndb.de> +Date: Fri, 2 Feb 2018 22:39:23 +0100 +Subject: [PATCH 39/42] x86/pti: Mark constant arrays as __initconst + +(cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e) + +I'm seeing build failures from the two newly introduced arrays that +are marked 'const' and '__initdata', which are mutually exclusive: + +arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init' +arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init' + +The correct annotation is __initconst. + +Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown") +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@suse.de> +Cc: Thomas Garnier <thgarnie@google.com> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 60e537d..08e89ed 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -861,7 +861,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) + #endif + } + +-static const __initdata struct x86_cpu_id cpu_no_speculation[] = { ++static const __initconst struct x86_cpu_id cpu_no_speculation[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, +@@ -874,7 +874,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { + {} + }; + +-static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { ++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { + { X86_VENDOR_AMD }, + {} + }; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-kaiser-Set-_PAGE_NX-only-if-supported.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-kaiser-Set-_PAGE_NX-only-if-supported.patch new file mode 100644 index 00000000..b6131785 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-kaiser-Set-_PAGE_NX-only-if-supported.patch @@ -0,0 +1,121 @@ +From 4fcdcb102a1ff37315086c48dd5890e666533035 Mon Sep 17 00:00:00 2001 +From: Guenter Roeck <groeck@chromium.org> +Date: Thu, 4 Jan 2018 13:41:55 -0800 +Subject: [PATCH 040/102] kaiser: Set _PAGE_NX only if supported + +This resolves a crash if loaded under qemu + haxm under windows. +See https://www.spinics.net/lists/kernel/msg2689835.html for details. +Here is a boot log (the log is from chromeos-4.4, but Tao Wu says that +the same log is also seen with vanilla v4.4.110-rc1). + +[ 0.712750] Freeing unused kernel memory: 552K +[ 0.721821] init: Corrupted page table at address 57b029b332e0 +[ 0.722761] PGD 80000000bb238067 PUD bc36a067 PMD bc369067 PTE 45d2067 +[ 0.722761] Bad pagetable: 000b [#1] PREEMPT SMP +[ 0.722761] Modules linked in: +[ 0.722761] CPU: 1 PID: 1 Comm: init Not tainted 4.4.96 #31 +[ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS +rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 +[ 0.722761] task: ffff8800bc290000 ti: ffff8800bc28c000 task.ti: ffff8800bc28c000 +[ 0.722761] RIP: 0010:[<ffffffff83f4129e>] [<ffffffff83f4129e>] __clear_user+0x42/0x67 +[ 0.722761] RSP: 0000:ffff8800bc28fcf8 EFLAGS: 00010202 +[ 0.722761] RAX: 0000000000000000 RBX: 00000000000001a4 RCX: 00000000000001a4 +[ 0.722761] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000057b029b332e0 +[ 0.722761] RBP: ffff8800bc28fd08 R08: ffff8800bc290000 R09: ffff8800bb2f4000 +[ 0.722761] R10: ffff8800bc290000 R11: ffff8800bb2f4000 R12: 000057b029b332e0 +[ 0.722761] R13: 0000000000000000 R14: 000057b029b33340 R15: ffff8800bb1e2a00 +[ 0.722761] FS: 0000000000000000(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 +[ 0.722761] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +[ 0.722761] CR2: 000057b029b332e0 CR3: 00000000bb2f8000 CR4: 00000000000006e0 +[ 0.722761] Stack: +[ 0.722761] 000057b029b332e0 ffff8800bb95fa80 ffff8800bc28fd18 ffffffff83f4120c +[ 0.722761] ffff8800bc28fe18 ffffffff83e9e7a1 ffff8800bc28fd68 0000000000000000 +[ 0.722761] ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 +[ 0.722761] Call Trace: +[ 0.722761] [<ffffffff83f4120c>] clear_user+0x2e/0x30 +[ 0.722761] [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7 +[ 0.722761] [<ffffffff83de2088>] search_binary_handler+0x86/0x19c +[ 0.722761] [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98 +[ 0.722761] [<ffffffff844febe0>] ? rest_init+0x87/0x87 +[ 0.722761] [<ffffffff83de40be>] do_execve+0x23/0x25 +[ 0.722761] [<ffffffff83c002e3>] run_init_process+0x2b/0x2d +[ 0.722761] [<ffffffff844fec4d>] kernel_init+0x6d/0xda +[ 0.722761] [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70 +[ 0.722761] [<ffffffff844febe0>] ? rest_init+0x87/0x87 +[ 0.722761] Code: 86 84 be 12 00 00 00 e8 87 0d e8 ff 66 66 90 48 89 d8 48 c1 +eb 03 4c 89 e7 83 e0 07 48 89 d9 be 08 00 00 00 31 d2 48 85 c9 74 0a <48> 89 17 +48 01 f7 ff c9 75 f6 48 89 c1 85 c9 74 09 88 17 48 ff +[ 0.722761] RIP [<ffffffff83f4129e>] __clear_user+0x42/0x67 +[ 0.722761] RSP <ffff8800bc28fcf8> +[ 0.722761] ---[ end trace def703879b4ff090 ]--- +[ 0.722761] BUG: sleeping function called from invalid context at /mnt/host/source/src/third_party/kernel/v4.4/kernel/locking/rwsem.c:21 +[ 0.722761] in_atomic(): 0, irqs_disabled(): 1, pid: 1, name: init +[ 0.722761] CPU: 1 PID: 1 Comm: init Tainted: G D 4.4.96 #31 +[ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 +[ 0.722761] 0000000000000086 dcb5d76098c89836 ffff8800bc28fa30 ffffffff83f34004 +[ 0.722761] ffffffff84839dc2 0000000000000015 ffff8800bc28fa40 ffffffff83d57dc9 +[ 0.722761] ffff8800bc28fa68 ffffffff83d57e6a ffffffff84a53640 0000000000000000 +[ 0.722761] Call Trace: +[ 0.722761] [<ffffffff83f34004>] dump_stack+0x4d/0x63 +[ 0.722761] [<ffffffff83d57dc9>] ___might_sleep+0x13a/0x13c +[ 0.722761] [<ffffffff83d57e6a>] __might_sleep+0x9f/0xa6 +[ 0.722761] [<ffffffff84502788>] down_read+0x20/0x31 +[ 0.722761] [<ffffffff83cc5d9b>] __blocking_notifier_call_chain+0x35/0x63 +[ 0.722761] [<ffffffff83cc5ddd>] blocking_notifier_call_chain+0x14/0x16 +[ 0.800374] usb 1-1: new full-speed USB device number 2 using uhci_hcd +[ 0.722761] [<ffffffff83cefe97>] profile_task_exit+0x1a/0x1c +[ 0.802309] [<ffffffff83cac84e>] do_exit+0x39/0xe7f +[ 0.802309] [<ffffffff83ce5938>] ? vprintk_default+0x1d/0x1f +[ 0.802309] [<ffffffff83d7bb95>] ? printk+0x57/0x73 +[ 0.802309] [<ffffffff83c46e25>] oops_end+0x80/0x85 +[ 0.802309] [<ffffffff83c7b747>] pgtable_bad+0x8a/0x95 +[ 0.802309] [<ffffffff83ca7f4a>] __do_page_fault+0x8c/0x352 +[ 0.802309] [<ffffffff83eefba5>] ? file_has_perm+0xc4/0xe5 +[ 0.802309] [<ffffffff83ca821c>] do_page_fault+0xc/0xe +[ 0.802309] [<ffffffff84507682>] page_fault+0x22/0x30 +[ 0.802309] [<ffffffff83f4129e>] ? __clear_user+0x42/0x67 +[ 0.802309] [<ffffffff83f4127f>] ? __clear_user+0x23/0x67 +[ 0.802309] [<ffffffff83f4120c>] clear_user+0x2e/0x30 +[ 0.802309] [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7 +[ 0.802309] [<ffffffff83de2088>] search_binary_handler+0x86/0x19c +[ 0.802309] [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98 +[ 0.802309] [<ffffffff844febe0>] ? rest_init+0x87/0x87 +[ 0.802309] [<ffffffff83de40be>] do_execve+0x23/0x25 +[ 0.802309] [<ffffffff83c002e3>] run_init_process+0x2b/0x2d +[ 0.802309] [<ffffffff844fec4d>] kernel_init+0x6d/0xda +[ 0.802309] [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70 +[ 0.802309] [<ffffffff844febe0>] ? rest_init+0x87/0x87 +[ 0.830559] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 +[ 0.830559] +[ 0.831305] Kernel Offset: 0x2c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) +[ 0.831305] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 + +The crash part of this problem may be solved with the following patch +(thanks to Hugh for the hint). There is still another problem, though - +with this patch applied, the qemu session aborts with "VCPU Shutdown +request", whatever that means. + +Cc: lepton <ytht.net@gmail.com> +Signed-off-by: Guenter Roeck <groeck@chromium.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index b6b0f3a..d8376b4 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -413,7 +413,8 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) + * get out to userspace running on the kernel CR3, + * userspace will crash instead of running. + */ +- pgd.pgd |= _PAGE_NX; ++ if (__supported_pte_mask & _PAGE_NX) ++ pgd.pgd |= _PAGE_NX; + } + } else if (!pgd.pgd) { + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch new file mode 100644 index 00000000..4e57ccfc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch @@ -0,0 +1,41 @@ +From 95f8f24919bd97bf372f5edbf9a25d5d358c4596 Mon Sep 17 00:00:00 2001 +From: Darren Kenny <darren.kenny@oracle.com> +Date: Fri, 2 Feb 2018 19:12:20 +0000 +Subject: [PATCH 40/42] x86/speculation: Fix typo IBRS_ATT, which should be + IBRS_ALL + +(cherry picked from commit af189c95a371b59f493dbe0f50c0a09724868881) + +Fixes: 117cc7a908c83 ("x86/retpoline: Fill return stack buffer on vmexit") +Signed-off-by: Darren Kenny <darren.kenny@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Masami Hiramatsu <mhiramat@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Link: https://lkml.kernel.org/r/20180202191220.blvgkgutojecxr3b@starbug-vm.ie.oracle.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index df4ecec..300cc15 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -150,7 +150,7 @@ extern char __indirect_thunk_end[]; + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both + * retpoline and IBRS mitigations for Spectre v2 need this; only on future +- * CPUs with IBRS_ATT *might* it be avoided. ++ * CPUs with IBRS_ALL *might* it be avoided. + */ + static inline void vmexit_fill_RSB(void) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-kaiser-Set-_PAGE_NX-only-if-supported.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-kaiser-Set-_PAGE_NX-only-if-supported.patch new file mode 100644 index 00000000..fefefa11 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-kaiser-Set-_PAGE_NX-only-if-supported.patch @@ -0,0 +1,34 @@ +From 1536b0a74f2fb213490c79375ba6accd5b6dc585 Mon Sep 17 00:00:00 2001 +From: Lepton Wu <ytht.net@gmail.com> +Date: Fri, 12 Jan 2018 13:42:56 -0800 +Subject: [PATCH 041/102] kaiser: Set _PAGE_NX only if supported + +This finally resolve crash if loaded under qemu + haxm. Haitao Shan pointed +out that the reason of that crash is that NX bit get set for page tables. +It seems we missed checking if _PAGE_NX is supported in kaiser_add_user_map + +Link: https://www.spinics.net/lists/kernel/msg2689835.html + +Reviewed-by: Guenter Roeck <groeck@chromium.org> +Signed-off-by: Lepton Wu <ytht.net@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index d8376b4..42a5307 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -184,6 +184,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, + * requires that not to be #defined to 0): so mask it off here. + */ + flags &= ~_PAGE_GLOBAL; ++ if (!(__supported_pte_mask & _PAGE_NX)) ++ flags &= ~_PAGE_NX; + + for (; address < end_addr; address += PAGE_SIZE) { + target_address = get_pa_from_mapping(address); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-x86-microcode-Do-the-family-check-first.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-x86-microcode-Do-the-family-check-first.patch new file mode 100644 index 00000000..1f502096 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-x86-microcode-Do-the-family-check-first.patch @@ -0,0 +1,94 @@ +From e614d84ae1ca7bad08645003fb3195a80fbdaae1 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Thu, 12 Oct 2017 13:23:16 +0200 +Subject: [PATCH 41/42] x86/microcode: Do the family check first + +commit 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 upstream with adjustments. + +On CPUs like AMD's Geode, for example, we shouldn't even try to load +microcode because they do not support the modern microcode loading +interface. + +However, we do the family check *after* the other checks whether the +loader has been disabled on the command line or whether we're running in +a guest. + +So move the family checks first in order to exit early if we're being +loaded on an unsupported family. + +Reported-and-tested-by: Sven Glodowski <glodi1@arcor.de> +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: <stable@vger.kernel.org> # 4.11.. +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://bugzilla.suse.com/show_bug.cgi?id=1061396 +Link: http://lkml.kernel.org/r/20171012112316.977-1-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/microcode/core.c | 27 ++++++++++++++++++--------- + 1 file changed, 18 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c +index dc0b9f8..0afaf00 100644 +--- a/arch/x86/kernel/cpu/microcode/core.c ++++ b/arch/x86/kernel/cpu/microcode/core.c +@@ -86,9 +86,6 @@ static bool __init check_loader_disabled_bsp(void) + bool *res = &dis_ucode_ldr; + #endif + +- if (!have_cpuid_p()) +- return *res; +- + a = 1; + c = 0; + native_cpuid(&a, &b, &c, &d); +@@ -130,8 +127,9 @@ void __init load_ucode_bsp(void) + { + int vendor; + unsigned int family; ++ bool intel = true; + +- if (check_loader_disabled_bsp()) ++ if (!have_cpuid_p()) + return; + + vendor = x86_cpuid_vendor(); +@@ -139,16 +137,27 @@ void __init load_ucode_bsp(void) + + switch (vendor) { + case X86_VENDOR_INTEL: +- if (family >= 6) +- load_ucode_intel_bsp(); ++ if (family < 6) ++ return; + break; ++ + case X86_VENDOR_AMD: +- if (family >= 0x10) +- load_ucode_amd_bsp(family); ++ if (family < 0x10) ++ return; ++ intel = false; + break; ++ + default: +- break; ++ return; + } ++ ++ if (check_loader_disabled_bsp()) ++ return; ++ ++ if (intel) ++ load_ucode_intel_bsp(); ++ else ++ load_ucode_amd_bsp(family); + } + + static bool check_loader_disabled_ap(void) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-bpf-move-fixup_bpf_calls-function.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-bpf-move-fixup_bpf_calls-function.patch new file mode 100644 index 00000000..1da99d50 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-bpf-move-fixup_bpf_calls-function.patch @@ -0,0 +1,169 @@ +From c4086a8adedd648b76aa589e5f0c440c61234275 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov <ast@fb.com> +Date: Wed, 15 Mar 2017 18:26:39 -0700 +Subject: [PATCH 042/102] bpf: move fixup_bpf_calls() function + +commit e245c5c6a5656e4d61aa7bb08e9694fd6e5b2b9d upstream. + +no functional change. +move fixup_bpf_calls() to verifier.c +it's being refactored in the next patch + +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: David S. Miller <davem@davemloft.net> +Cc: Jiri Slaby <jslaby@suse.cz> +[backported to 4.9 - gregkh] +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/syscall.c | 54 --------------------------------------------------- + kernel/bpf/verifier.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 54 insertions(+), 54 deletions(-) + +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index 237f3d6..6ae783b 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -539,57 +539,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) + list_add(&tl->list_node, &bpf_prog_types); + } + +-/* fixup insn->imm field of bpf_call instructions: +- * if (insn->imm == BPF_FUNC_map_lookup_elem) +- * insn->imm = bpf_map_lookup_elem - __bpf_call_base; +- * else if (insn->imm == BPF_FUNC_map_update_elem) +- * insn->imm = bpf_map_update_elem - __bpf_call_base; +- * else ... +- * +- * this function is called after eBPF program passed verification +- */ +-static void fixup_bpf_calls(struct bpf_prog *prog) +-{ +- const struct bpf_func_proto *fn; +- int i; +- +- for (i = 0; i < prog->len; i++) { +- struct bpf_insn *insn = &prog->insnsi[i]; +- +- if (insn->code == (BPF_JMP | BPF_CALL)) { +- /* we reach here when program has bpf_call instructions +- * and it passed bpf_check(), means that +- * ops->get_func_proto must have been supplied, check it +- */ +- BUG_ON(!prog->aux->ops->get_func_proto); +- +- if (insn->imm == BPF_FUNC_get_route_realm) +- prog->dst_needed = 1; +- if (insn->imm == BPF_FUNC_get_prandom_u32) +- bpf_user_rnd_init_once(); +- if (insn->imm == BPF_FUNC_tail_call) { +- /* mark bpf_tail_call as different opcode +- * to avoid conditional branch in +- * interpeter for every normal call +- * and to prevent accidental JITing by +- * JIT compiler that doesn't support +- * bpf_tail_call yet +- */ +- insn->imm = 0; +- insn->code |= BPF_X; +- continue; +- } +- +- fn = prog->aux->ops->get_func_proto(insn->imm); +- /* all functions that have prototype and verifier allowed +- * programs to call them, must be real in-kernel functions +- */ +- BUG_ON(!fn->func); +- insn->imm = fn->func - __bpf_call_base; +- } +- } +-} +- + /* drop refcnt on maps used by eBPF program and free auxilary data */ + static void free_used_maps(struct bpf_prog_aux *aux) + { +@@ -782,9 +731,6 @@ static int bpf_prog_load(union bpf_attr *attr) + if (err < 0) + goto free_used_maps; + +- /* fixup BPF_CALL->imm field */ +- fixup_bpf_calls(prog); +- + /* eBPF program is ready to be JITed */ + prog = bpf_prog_select_runtime(prog, &err); + if (err < 0) +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 85d1c94..b960a3a 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3085,6 +3085,57 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) + return 0; + } + ++/* fixup insn->imm field of bpf_call instructions: ++ * if (insn->imm == BPF_FUNC_map_lookup_elem) ++ * insn->imm = bpf_map_lookup_elem - __bpf_call_base; ++ * else if (insn->imm == BPF_FUNC_map_update_elem) ++ * insn->imm = bpf_map_update_elem - __bpf_call_base; ++ * else ... ++ * ++ * this function is called after eBPF program passed verification ++ */ ++static void fixup_bpf_calls(struct bpf_prog *prog) ++{ ++ const struct bpf_func_proto *fn; ++ int i; ++ ++ for (i = 0; i < prog->len; i++) { ++ struct bpf_insn *insn = &prog->insnsi[i]; ++ ++ if (insn->code == (BPF_JMP | BPF_CALL)) { ++ /* we reach here when program has bpf_call instructions ++ * and it passed bpf_check(), means that ++ * ops->get_func_proto must have been supplied, check it ++ */ ++ BUG_ON(!prog->aux->ops->get_func_proto); ++ ++ if (insn->imm == BPF_FUNC_get_route_realm) ++ prog->dst_needed = 1; ++ if (insn->imm == BPF_FUNC_get_prandom_u32) ++ bpf_user_rnd_init_once(); ++ if (insn->imm == BPF_FUNC_tail_call) { ++ /* mark bpf_tail_call as different opcode ++ * to avoid conditional branch in ++ * interpeter for every normal call ++ * and to prevent accidental JITing by ++ * JIT compiler that doesn't support ++ * bpf_tail_call yet ++ */ ++ insn->imm = 0; ++ insn->code |= BPF_X; ++ continue; ++ } ++ ++ fn = prog->aux->ops->get_func_proto(insn->imm); ++ /* all functions that have prototype and verifier allowed ++ * programs to call them, must be real in-kernel functions ++ */ ++ BUG_ON(!fn->func); ++ insn->imm = fn->func - __bpf_call_base; ++ } ++ } ++} ++ + static void free_states(struct bpf_verifier_env *env) + { + struct bpf_verifier_state_list *sl, *sln; +@@ -3183,6 +3234,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) + /* program is valid, convert *(u32*)(ctx + off) accesses */ + ret = convert_ctx_accesses(env); + ++ if (ret == 0) ++ fixup_bpf_calls(env->prog); ++ + if (log_level && log_len >= log_size - 1) { + BUG_ON(log_len >= log_size); + /* verifier log exceeded user supplied buffer */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-bpf-refactor-fixup_bpf_calls.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-bpf-refactor-fixup_bpf_calls.patch new file mode 100644 index 00000000..c8b58125 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-bpf-refactor-fixup_bpf_calls.patch @@ -0,0 +1,125 @@ +From b0daedd01e01895c380eba49711304ea80df9c2b Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov <ast@fb.com> +Date: Wed, 15 Mar 2017 18:26:40 -0700 +Subject: [PATCH 043/102] bpf: refactor fixup_bpf_calls() + +commit 79741b3bdec01a8628368fbcfccc7d189ed606cb upstream. + +reduce indent and make it iterate over instructions similar to +convert_ctx_accesses(). Also convert hard BUG_ON into soft verifier error. + +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: David S. Miller <davem@davemloft.net> +Cc: Jiri Slaby <jslaby@suse.cz> +[Backported to 4.9.y - gregkh] +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/verifier.c | 73 ++++++++++++++++++++++++--------------------------- + 1 file changed, 34 insertions(+), 39 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index b960a3a..5118d3e 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3085,55 +3085,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) + return 0; + } + +-/* fixup insn->imm field of bpf_call instructions: +- * if (insn->imm == BPF_FUNC_map_lookup_elem) +- * insn->imm = bpf_map_lookup_elem - __bpf_call_base; +- * else if (insn->imm == BPF_FUNC_map_update_elem) +- * insn->imm = bpf_map_update_elem - __bpf_call_base; +- * else ... ++/* fixup insn->imm field of bpf_call instructions + * + * this function is called after eBPF program passed verification + */ +-static void fixup_bpf_calls(struct bpf_prog *prog) ++static int fixup_bpf_calls(struct bpf_verifier_env *env) + { ++ struct bpf_prog *prog = env->prog; ++ struct bpf_insn *insn = prog->insnsi; + const struct bpf_func_proto *fn; ++ const int insn_cnt = prog->len; + int i; + +- for (i = 0; i < prog->len; i++) { +- struct bpf_insn *insn = &prog->insnsi[i]; ++ for (i = 0; i < insn_cnt; i++, insn++) { ++ if (insn->code != (BPF_JMP | BPF_CALL)) ++ continue; + +- if (insn->code == (BPF_JMP | BPF_CALL)) { +- /* we reach here when program has bpf_call instructions +- * and it passed bpf_check(), means that +- * ops->get_func_proto must have been supplied, check it +- */ +- BUG_ON(!prog->aux->ops->get_func_proto); +- +- if (insn->imm == BPF_FUNC_get_route_realm) +- prog->dst_needed = 1; +- if (insn->imm == BPF_FUNC_get_prandom_u32) +- bpf_user_rnd_init_once(); +- if (insn->imm == BPF_FUNC_tail_call) { +- /* mark bpf_tail_call as different opcode +- * to avoid conditional branch in +- * interpeter for every normal call +- * and to prevent accidental JITing by +- * JIT compiler that doesn't support +- * bpf_tail_call yet +- */ +- insn->imm = 0; +- insn->code |= BPF_X; +- continue; +- } ++ if (insn->imm == BPF_FUNC_get_route_realm) ++ prog->dst_needed = 1; ++ if (insn->imm == BPF_FUNC_get_prandom_u32) ++ bpf_user_rnd_init_once(); ++ if (insn->imm == BPF_FUNC_tail_call) { ++ /* mark bpf_tail_call as different opcode to avoid ++ * conditional branch in the interpeter for every normal ++ * call and to prevent accidental JITing by JIT compiler ++ * that doesn't support bpf_tail_call yet ++ */ ++ insn->imm = 0; ++ insn->code |= BPF_X; ++ continue; ++ } + +- fn = prog->aux->ops->get_func_proto(insn->imm); +- /* all functions that have prototype and verifier allowed +- * programs to call them, must be real in-kernel functions +- */ +- BUG_ON(!fn->func); +- insn->imm = fn->func - __bpf_call_base; ++ fn = prog->aux->ops->get_func_proto(insn->imm); ++ /* all functions that have prototype and verifier allowed ++ * programs to call them, must be real in-kernel functions ++ */ ++ if (!fn->func) { ++ verbose("kernel subsystem misconfigured func %d\n", ++ insn->imm); ++ return -EFAULT; + } ++ insn->imm = fn->func - __bpf_call_base; + } ++ ++ return 0; + } + + static void free_states(struct bpf_verifier_env *env) +@@ -3235,7 +3230,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) + ret = convert_ctx_accesses(env); + + if (ret == 0) +- fixup_bpf_calls(env->prog); ++ ret = fixup_bpf_calls(env); + + if (log_level && log_len >= log_size - 1) { + BUG_ON(log_len >= log_size); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-bpf-prevent-out-of-bounds-speculation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-bpf-prevent-out-of-bounds-speculation.patch new file mode 100644 index 00000000..c78bafc9 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-bpf-prevent-out-of-bounds-speculation.patch @@ -0,0 +1,274 @@ +From 282d67fffa131c0df11807ce60f9ff3fea1dc340 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov <ast@kernel.org> +Date: Sun, 7 Jan 2018 17:33:02 -0800 +Subject: [PATCH 044/102] bpf: prevent out-of-bounds speculation + +commit b2157399cc9898260d6031c5bfe45fe137c1fbe7 upstream. + +Under speculation, CPUs may mis-predict branches in bounds checks. Thus, +memory accesses under a bounds check may be speculated even if the +bounds check fails, providing a primitive for building a side channel. + +To avoid leaking kernel data round up array-based maps and mask the index +after bounds check, so speculated load with out of bounds index will load +either valid value from the array or zero from the padded area. + +Unconditionally mask index for all array types even when max_entries +are not rounded to power of 2 for root user. +When map is created by unpriv user generate a sequence of bpf insns +that includes AND operation to make sure that JITed code includes +the same 'index & index_mask' operation. + +If prog_array map is created by unpriv user replace + bpf_tail_call(ctx, map, index); +with + if (index >= max_entries) { + index &= map->index_mask; + bpf_tail_call(ctx, map, index); + } +(along with roundup to power 2) to prevent out-of-bounds speculation. +There is secondary redundant 'if (index >= max_entries)' in the interpreter +and in all JITs, but they can be optimized later if necessary. + +Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array) +cannot be used by unpriv, so no changes there. + +That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on +all architectures with and without JIT. + +v2->v3: +Daniel noticed that attack potentially can be crafted via syscall commands +without loading the program, so add masking to those paths as well. + +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Acked-by: John Fastabend <john.fastabend@gmail.com> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Cc: Jiri Slaby <jslaby@suse.cz> +[ Backported to 4.9 - gregkh ] +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 2 ++ + include/linux/bpf_verifier.h | 6 +++++- + kernel/bpf/arraymap.c | 31 ++++++++++++++++++++++--------- + kernel/bpf/verifier.c | 42 +++++++++++++++++++++++++++++++++++++++--- + 4 files changed, 68 insertions(+), 13 deletions(-) + +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index c201017..0dbb21b 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -43,6 +43,7 @@ struct bpf_map { + u32 max_entries; + u32 map_flags; + u32 pages; ++ bool unpriv_array; + struct user_struct *user; + const struct bpf_map_ops *ops; + struct work_struct work; +@@ -189,6 +190,7 @@ struct bpf_prog_aux { + struct bpf_array { + struct bpf_map map; + u32 elem_size; ++ u32 index_mask; + /* 'ownership' of prog_array is claimed by the first program that + * is going to use this map or by the first program which FD is stored + * in the map to make sure that all callers and callees have the same +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index a13b031..2edf8de 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -66,7 +66,11 @@ struct bpf_verifier_state_list { + }; + + struct bpf_insn_aux_data { +- enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ ++ union { ++ enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ ++ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ ++ }; ++ bool seen; /* this insn was processed by the verifier */ + }; + + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c +index a2ac051..eeb7f1b 100644 +--- a/kernel/bpf/arraymap.c ++++ b/kernel/bpf/arraymap.c +@@ -47,9 +47,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) + static struct bpf_map *array_map_alloc(union bpf_attr *attr) + { + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; ++ u32 elem_size, index_mask, max_entries; ++ bool unpriv = !capable(CAP_SYS_ADMIN); + struct bpf_array *array; + u64 array_size; +- u32 elem_size; + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || +@@ -64,11 +65,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) + + elem_size = round_up(attr->value_size, 8); + ++ max_entries = attr->max_entries; ++ index_mask = roundup_pow_of_two(max_entries) - 1; ++ ++ if (unpriv) ++ /* round up array size to nearest power of 2, ++ * since cpu will speculate within index_mask limits ++ */ ++ max_entries = index_mask + 1; ++ + array_size = sizeof(*array); + if (percpu) +- array_size += (u64) attr->max_entries * sizeof(void *); ++ array_size += (u64) max_entries * sizeof(void *); + else +- array_size += (u64) attr->max_entries * elem_size; ++ array_size += (u64) max_entries * elem_size; + + /* make sure there is no u32 overflow later in round_up() */ + if (array_size >= U32_MAX - PAGE_SIZE) +@@ -82,6 +92,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) + if (!array) + return ERR_PTR(-ENOMEM); + } ++ array->index_mask = index_mask; ++ array->map.unpriv_array = unpriv; + + /* copy mandatory map attributes */ + array->map.map_type = attr->map_type; +@@ -115,7 +127,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) + if (unlikely(index >= array->map.max_entries)) + return NULL; + +- return array->value + array->elem_size * index; ++ return array->value + array->elem_size * (index & array->index_mask); + } + + /* Called from eBPF program */ +@@ -127,7 +139,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) + if (unlikely(index >= array->map.max_entries)) + return NULL; + +- return this_cpu_ptr(array->pptrs[index]); ++ return this_cpu_ptr(array->pptrs[index & array->index_mask]); + } + + int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) +@@ -147,7 +159,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) + */ + size = round_up(map->value_size, 8); + rcu_read_lock(); +- pptr = array->pptrs[index]; ++ pptr = array->pptrs[index & array->index_mask]; + for_each_possible_cpu(cpu) { + bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); + off += size; +@@ -195,10 +207,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, + return -EEXIST; + + if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) +- memcpy(this_cpu_ptr(array->pptrs[index]), ++ memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), + value, map->value_size); + else +- memcpy(array->value + array->elem_size * index, ++ memcpy(array->value + ++ array->elem_size * (index & array->index_mask), + value, map->value_size); + return 0; + } +@@ -232,7 +245,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, + */ + size = round_up(map->value_size, 8); + rcu_read_lock(); +- pptr = array->pptrs[index]; ++ pptr = array->pptrs[index & array->index_mask]; + for_each_possible_cpu(cpu) { + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); + off += size; +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 5118d3e..56a867f 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1165,7 +1165,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) + } + } + +-static int check_call(struct bpf_verifier_env *env, int func_id) ++static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) + { + struct bpf_verifier_state *state = &env->cur_state; + const struct bpf_func_proto *fn = NULL; +@@ -1216,6 +1216,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id) + err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); + if (err) + return err; ++ if (func_id == BPF_FUNC_tail_call) { ++ if (meta.map_ptr == NULL) { ++ verbose("verifier bug\n"); ++ return -EINVAL; ++ } ++ env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; ++ } + err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); + if (err) + return err; +@@ -2799,7 +2806,7 @@ static int do_check(struct bpf_verifier_env *env) + return -EINVAL; + } + +- err = check_call(env, insn->imm); ++ err = check_call(env, insn->imm, insn_idx); + if (err) + return err; + +@@ -3095,7 +3102,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) + struct bpf_insn *insn = prog->insnsi; + const struct bpf_func_proto *fn; + const int insn_cnt = prog->len; +- int i; ++ struct bpf_insn insn_buf[16]; ++ struct bpf_prog *new_prog; ++ struct bpf_map *map_ptr; ++ int i, cnt, delta = 0; ++ + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL)) +@@ -3113,6 +3124,31 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) + */ + insn->imm = 0; + insn->code |= BPF_X; ++ ++ /* instead of changing every JIT dealing with tail_call ++ * emit two extra insns: ++ * if (index >= max_entries) goto out; ++ * index &= array->index_mask; ++ * to avoid out-of-bounds cpu speculation ++ */ ++ map_ptr = env->insn_aux_data[i + delta].map_ptr; ++ if (!map_ptr->unpriv_array) ++ continue; ++ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, ++ map_ptr->max_entries, 2); ++ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, ++ container_of(map_ptr, ++ struct bpf_array, ++ map)->index_mask); ++ insn_buf[2] = *insn; ++ cnt = 3; ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; + continue; + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch new file mode 100644 index 00000000..def8e08e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch @@ -0,0 +1,83 @@ +From ac13c748f64dbc040dc206f6cc3665f6218d3cd4 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Wed, 10 Jan 2018 23:25:05 +0100 +Subject: [PATCH 045/102] bpf, array: fix overflow in max_entries and undefined + behavior in index_mask + +commit bbeb6e4323dad9b5e0ee9f60c223dd532e2403b1 upstream. + +syzkaller tried to alloc a map with 0xfffffffd entries out of a userns, +and thus unprivileged. With the recently added logic in b2157399cc98 +("bpf: prevent out-of-bounds speculation") we round this up to the next +power of two value for max_entries for unprivileged such that we can +apply proper masking into potentially zeroed out map slots. + +However, this will generate an index_mask of 0xffffffff, and therefore +a + 1 will let this overflow into new max_entries of 0. This will pass +allocation, etc, and later on map access we still enforce on the original +attr->max_entries value which was 0xfffffffd, therefore triggering GPF +all over the place. Thus bail out on overflow in such case. + +Moreover, on 32 bit archs roundup_pow_of_two() can also not be used, +since fls_long(max_entries - 1) can result in 32 and 1UL << 32 in 32 bit +space is undefined. Therefore, do this by hand in a 64 bit variable. + +This fixes all the issues triggered by syzkaller's reproducers. + +Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") +Reported-by: syzbot+b0efb8e572d01bce1ae0@syzkaller.appspotmail.com +Reported-by: syzbot+6c15e9744f75f2364773@syzkaller.appspotmail.com +Reported-by: syzbot+d2f5524fb46fd3b312ee@syzkaller.appspotmail.com +Reported-by: syzbot+61d23c95395cc90dbc2b@syzkaller.appspotmail.com +Reported-by: syzbot+0d363c942452cca68c01@syzkaller.appspotmail.com +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/arraymap.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c +index eeb7f1b..c6c0b62 100644 +--- a/kernel/bpf/arraymap.c ++++ b/kernel/bpf/arraymap.c +@@ -50,7 +50,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) + u32 elem_size, index_mask, max_entries; + bool unpriv = !capable(CAP_SYS_ADMIN); + struct bpf_array *array; +- u64 array_size; ++ u64 array_size, mask64; + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || +@@ -66,13 +66,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) + elem_size = round_up(attr->value_size, 8); + + max_entries = attr->max_entries; +- index_mask = roundup_pow_of_two(max_entries) - 1; + +- if (unpriv) ++ /* On 32 bit archs roundup_pow_of_two() with max_entries that has ++ * upper most bit set in u32 space is undefined behavior due to ++ * resulting 1U << 32, so do it manually here in u64 space. ++ */ ++ mask64 = fls_long(max_entries - 1); ++ mask64 = 1ULL << mask64; ++ mask64 -= 1; ++ ++ index_mask = mask64; ++ if (unpriv) { + /* round up array size to nearest power of 2, + * since cpu will speculate within index_mask limits + */ + max_entries = index_mask + 1; ++ /* Check for overflows. */ ++ if (max_entries < attr->max_entries) ++ return ERR_PTR(-E2BIG); ++ } + + array_size = sizeof(*array); + if (percpu) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-Documentation-Add-PTI-description.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-Documentation-Add-PTI-description.patch new file mode 100644 index 00000000..ccc9e1ab --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-Documentation-Add-PTI-description.patch @@ -0,0 +1,267 @@ +From 302892e3e6f39a49526bee06934553f0738271fd Mon Sep 17 00:00:00 2001 +From: Dave Hansen <dave.hansen@linux.intel.com> +Date: Fri, 5 Jan 2018 09:44:36 -0800 +Subject: [PATCH 046/102] x86/Documentation: Add PTI description + +commit 01c9b17bf673b05bb401b76ec763e9730ccf1376 upstream. + +Add some details about how PTI works, what some of the downsides +are, and how to debug it when things go wrong. + +Also document the kernel parameter: 'pti/nopti'. + +Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Randy Dunlap <rdunlap@infradead.org> +Reviewed-by: Kees Cook <keescook@chromium.org> +Cc: Moritz Lipp <moritz.lipp@iaik.tugraz.at> +Cc: Daniel Gruss <daniel.gruss@iaik.tugraz.at> +Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at> +Cc: Richard Fellner <richard.fellner@student.tugraz.at> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Hugh Dickins <hughd@google.com> +Cc: Andi Lutomirsky <luto@kernel.org> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 21 ++-- + Documentation/x86/pti.txt | 186 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 200 insertions(+), 7 deletions(-) + create mode 100644 Documentation/x86/pti.txt + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 9f04c53..3d53778 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2753,8 +2753,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + + nojitter [IA-64] Disables jitter checking for ITC timers. + +- nopti [X86-64] Disable KAISER isolation of kernel from user. +- + no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver + + no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page +@@ -3317,11 +3315,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + pt. [PARIDE] + See Documentation/blockdev/paride.txt. + +- pti= [X86_64] +- Control KAISER user/kernel address space isolation: +- on - enable +- off - disable +- auto - default setting ++ pti= [X86_64] Control Page Table Isolation of user and ++ kernel address spaces. Disabling this feature ++ removes hardening, but improves performance of ++ system calls and interrupts. ++ ++ on - unconditionally enable ++ off - unconditionally disable ++ auto - kernel detects whether your CPU model is ++ vulnerable to issues that PTI mitigates ++ ++ Not specifying this option is equivalent to pti=auto. ++ ++ nopti [X86_64] ++ Equivalent to pti=off + + pty.legacy_count= + [KNL] Number of legacy pty's. Overwrites compiled-in +diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt +new file mode 100644 +index 0000000..d11eff6 +--- /dev/null ++++ b/Documentation/x86/pti.txt +@@ -0,0 +1,186 @@ ++Overview ++======== ++ ++Page Table Isolation (pti, previously known as KAISER[1]) is a ++countermeasure against attacks on the shared user/kernel address ++space such as the "Meltdown" approach[2]. ++ ++To mitigate this class of attacks, we create an independent set of ++page tables for use only when running userspace applications. When ++the kernel is entered via syscalls, interrupts or exceptions, the ++page tables are switched to the full "kernel" copy. When the system ++switches back to user mode, the user copy is used again. ++ ++The userspace page tables contain only a minimal amount of kernel ++data: only what is needed to enter/exit the kernel such as the ++entry/exit functions themselves and the interrupt descriptor table ++(IDT). There are a few strictly unnecessary things that get mapped ++such as the first C function when entering an interrupt (see ++comments in pti.c). ++ ++This approach helps to ensure that side-channel attacks leveraging ++the paging structures do not function when PTI is enabled. It can be ++enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. ++Once enabled at compile-time, it can be disabled at boot with the ++'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). ++ ++Page Table Management ++===================== ++ ++When PTI is enabled, the kernel manages two sets of page tables. ++The first set is very similar to the single set which is present in ++kernels without PTI. This includes a complete mapping of userspace ++that the kernel can use for things like copy_to_user(). ++ ++Although _complete_, the user portion of the kernel page tables is ++crippled by setting the NX bit in the top level. This ensures ++that any missed kernel->user CR3 switch will immediately crash ++userspace upon executing its first instruction. ++ ++The userspace page tables map only the kernel data needed to enter ++and exit the kernel. This data is entirely contained in the 'struct ++cpu_entry_area' structure which is placed in the fixmap which gives ++each CPU's copy of the area a compile-time-fixed virtual address. ++ ++For new userspace mappings, the kernel makes the entries in its ++page tables like normal. The only difference is when the kernel ++makes entries in the top (PGD) level. In addition to setting the ++entry in the main kernel PGD, a copy of the entry is made in the ++userspace page tables' PGD. ++ ++This sharing at the PGD level also inherently shares all the lower ++layers of the page tables. This leaves a single, shared set of ++userspace page tables to manage. One PTE to lock, one set of ++accessed bits, dirty bits, etc... ++ ++Overhead ++======== ++ ++Protection against side-channel attacks is important. But, ++this protection comes at a cost: ++ ++1. Increased Memory Use ++ a. Each process now needs an order-1 PGD instead of order-0. ++ (Consumes an additional 4k per process). ++ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB ++ aligned so that it can be mapped by setting a single PMD ++ entry. This consumes nearly 2MB of RAM once the kernel ++ is decompressed, but no space in the kernel image itself. ++ ++2. Runtime Cost ++ a. CR3 manipulation to switch between the page table copies ++ must be done at interrupt, syscall, and exception entry ++ and exit (it can be skipped when the kernel is interrupted, ++ though.) Moves to CR3 are on the order of a hundred ++ cycles, and are required at every entry and exit. ++ b. A "trampoline" must be used for SYSCALL entry. This ++ trampoline depends on a smaller set of resources than the ++ non-PTI SYSCALL entry code, so requires mapping fewer ++ things into the userspace page tables. The downside is ++ that stacks must be switched at entry time. ++ d. Global pages are disabled for all kernel structures not ++ mapped into both kernel and userspace page tables. This ++ feature of the MMU allows different processes to share TLB ++ entries mapping the kernel. Losing the feature means more ++ TLB misses after a context switch. The actual loss of ++ performance is very small, however, never exceeding 1%. ++ d. Process Context IDentifiers (PCID) is a CPU feature that ++ allows us to skip flushing the entire TLB when switching page ++ tables by setting a special bit in CR3 when the page tables ++ are changed. This makes switching the page tables (at context ++ switch, or kernel entry/exit) cheaper. But, on systems with ++ PCID support, the context switch code must flush both the user ++ and kernel entries out of the TLB. The user PCID TLB flush is ++ deferred until the exit to userspace, minimizing the cost. ++ See intel.com/sdm for the gory PCID/INVPCID details. ++ e. The userspace page tables must be populated for each new ++ process. Even without PTI, the shared kernel mappings ++ are created by copying top-level (PGD) entries into each ++ new process. But, with PTI, there are now *two* kernel ++ mappings: one in the kernel page tables that maps everything ++ and one for the entry/exit structures. At fork(), we need to ++ copy both. ++ f. In addition to the fork()-time copying, there must also ++ be an update to the userspace PGD any time a set_pgd() is done ++ on a PGD used to map userspace. This ensures that the kernel ++ and userspace copies always map the same userspace ++ memory. ++ g. On systems without PCID support, each CR3 write flushes ++ the entire TLB. That means that each syscall, interrupt ++ or exception flushes the TLB. ++ h. INVPCID is a TLB-flushing instruction which allows flushing ++ of TLB entries for non-current PCIDs. Some systems support ++ PCIDs, but do not support INVPCID. On these systems, addresses ++ can only be flushed from the TLB for the current PCID. When ++ flushing a kernel address, we need to flush all PCIDs, so a ++ single kernel address flush will require a TLB-flushing CR3 ++ write upon the next use of every PCID. ++ ++Possible Future Work ++==================== ++1. We can be more careful about not actually writing to CR3 ++ unless its value is actually changed. ++2. Allow PTI to be enabled/disabled at runtime in addition to the ++ boot-time switching. ++ ++Testing ++======== ++ ++To test stability of PTI, the following test procedure is recommended, ++ideally doing all of these in parallel: ++ ++1. Set CONFIG_DEBUG_ENTRY=y ++2. Run several copies of all of the tools/testing/selftests/x86/ tests ++ (excluding MPX and protection_keys) in a loop on multiple CPUs for ++ several minutes. These tests frequently uncover corner cases in the ++ kernel entry code. In general, old kernels might cause these tests ++ themselves to crash, but they should never crash the kernel. ++3. Run the 'perf' tool in a mode (top or record) that generates many ++ frequent performance monitoring non-maskable interrupts (see "NMI" ++ in /proc/interrupts). This exercises the NMI entry/exit code which ++ is known to trigger bugs in code paths that did not expect to be ++ interrupted, including nested NMIs. Using "-c" boosts the rate of ++ NMIs, and using two -c with separate counters encourages nested NMIs ++ and less deterministic behavior. ++ ++ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done ++ ++4. Launch a KVM virtual machine. ++5. Run 32-bit binaries on systems supporting the SYSCALL instruction. ++ This has been a lightly-tested code path and needs extra scrutiny. ++ ++Debugging ++========= ++ ++Bugs in PTI cause a few different signatures of crashes ++that are worth noting here. ++ ++ * Failures of the selftests/x86 code. Usually a bug in one of the ++ more obscure corners of entry_64.S ++ * Crashes in early boot, especially around CPU bringup. Bugs ++ in the trampoline code or mappings cause these. ++ * Crashes at the first interrupt. Caused by bugs in entry_64.S, ++ like screwing up a page table switch. Also caused by ++ incorrectly mapping the IRQ handler entry code. ++ * Crashes at the first NMI. The NMI code is separate from main ++ interrupt handlers and can have bugs that do not affect ++ normal interrupts. Also caused by incorrectly mapping NMI ++ code. NMIs that interrupt the entry code must be very ++ careful and can be the cause of crashes that show up when ++ running perf. ++ * Kernel crashes at the first exit to userspace. entry_64.S ++ bugs, or failing to map some of the exit code. ++ * Crashes at first interrupt that interrupts userspace. The paths ++ in entry_64.S that return to userspace are sometimes separate ++ from the ones that return to the kernel. ++ * Double faults: overflowing the kernel stack because of page ++ faults upon page faults. Caused by touching non-pti-mapped ++ data in the entry code, or forgetting to switch to kernel ++ CR3 before calling into C functions which are not pti-mapped. ++ * Userspace segfaults early in boot, sometimes manifesting ++ as mount(8) failing to mount the rootfs. These have ++ tended to be TLB invalidation issues. Usually invalidating ++ the wrong PCID, or otherwise missing an invalidation. ++ ++1. https://gruss.cc/files/kaiser.pdf ++2. https://meltdownattack.com/meltdown.pdf +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch new file mode 100644 index 00000000..dddc2038 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch @@ -0,0 +1,81 @@ +From bf7f7fb7085a06cf290b2736a6d8f65caceec373 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Wed, 18 Jan 2017 11:15:38 -0800 +Subject: [PATCH 047/102] x86/cpu: Factor out application of forced CPU caps + +commit 8bf1ebca215c262e48c15a4a15f175991776f57f upstream. + +There are multiple call sites that apply forced CPU caps. Factor +them into a helper. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: Fenghua Yu <fenghua.yu@intel.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Matthew Whitehead <tedheadster@gmail.com> +Cc: Oleg Nesterov <oleg@redhat.com> +Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Yu-cheng Yu <yu-cheng.yu@intel.com> +Link: http://lkml.kernel.org/r/623ff7555488122143e4417de09b18be2085ad06.1484705016.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 918e447..4c65225 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c) + } + } + ++static void apply_forced_caps(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ for (i = 0; i < NCAPINTS; i++) { ++ c->x86_capability[i] &= ~cpu_caps_cleared[i]; ++ c->x86_capability[i] |= cpu_caps_set[i]; ++ } ++} ++ + void get_cpu_cap(struct cpuinfo_x86 *c) + { + u32 eax, ebx, ecx, edx; +@@ -1086,10 +1096,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) + this_cpu->c_identify(c); + + /* Clear/Set all flags overridden by options, after probe */ +- for (i = 0; i < NCAPINTS; i++) { +- c->x86_capability[i] &= ~cpu_caps_cleared[i]; +- c->x86_capability[i] |= cpu_caps_set[i]; +- } ++ apply_forced_caps(c); + + #ifdef CONFIG_X86_64 + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); +@@ -1151,10 +1158,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) + * Clear/Set all flags overridden by options, need do it + * before following smp all cpus cap AND. + */ +- for (i = 0; i < NCAPINTS; i++) { +- c->x86_capability[i] &= ~cpu_caps_cleared[i]; +- c->x86_capability[i] |= cpu_caps_set[i]; +- } ++ apply_forced_caps(c); + + /* + * On SMP, boot_cpu_data holds the common feature set between +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-x86-cpufeatures-Make-CPU-bugs-sticky.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-x86-cpufeatures-Make-CPU-bugs-sticky.patch new file mode 100644 index 00000000..ac804370 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-x86-cpufeatures-Make-CPU-bugs-sticky.patch @@ -0,0 +1,102 @@ +From 3d4dfd5b131c277aa7d44d00d1d6e7d084020b4c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Mon, 4 Dec 2017 15:07:32 +0100 +Subject: [PATCH 048/102] x86/cpufeatures: Make CPU bugs sticky + +commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream. + +There is currently no way to force CPU bug bits like CPU feature bits. That +makes it impossible to set a bug bit once at boot and have it stick for all +upcoming CPUs. + +Extend the force set/clear arrays to handle bug bits as well. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Borislav Petkov <bpetkov@suse.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Laight <David.Laight@aculab.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: Eduardo Valentin <eduval@amazon.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Juergen Gross <jgross@suse.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Rik van Riel <riel@redhat.com> +Cc: Will Deacon <will.deacon@arm.com> +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeature.h | 2 ++ + arch/x86/include/asm/processor.h | 4 ++-- + arch/x86/kernel/cpu/common.c | 6 +++--- + 3 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +index 1d2b69f..9ea67a0 100644 +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; + set_bit(bit, (unsigned long *)cpu_caps_set); \ + } while (0) + ++#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) ++ + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) + /* + * Static testing of CPU features. Used the same as boot_cpu_has(). +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index 8cb52ee..e40b19c 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data; + extern struct cpuinfo_x86 new_cpu_data; + + extern struct tss_struct doublefault_tss; +-extern __u32 cpu_caps_cleared[NCAPINTS]; +-extern __u32 cpu_caps_set[NCAPINTS]; ++extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; ++extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; + + #ifdef CONFIG_SMP + DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 4c65225..ba9b601 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -480,8 +480,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) + return NULL; /* Not found */ + } + +-__u32 cpu_caps_cleared[NCAPINTS]; +-__u32 cpu_caps_set[NCAPINTS]; ++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; ++__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; + + void load_percpu_segment(int cpu) + { +@@ -710,7 +710,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) + { + int i; + +- for (i = 0; i < NCAPINTS; i++) { ++ for (i = 0; i < NCAPINTS + NBUGINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch new file mode 100644 index 00000000..98fc9866 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch @@ -0,0 +1,78 @@ +From 2d3a9eb98a32cc9405922494625e110db3123e77 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Mon, 4 Dec 2017 15:07:33 +0100 +Subject: [PATCH 049/102] x86/cpufeatures: Add X86_BUG_CPU_INSECURE + +commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd upstream. + +Many x86 CPUs leak information to user space due to missing isolation of +user space and kernel space page tables. There are many well documented +ways to exploit that. + +The upcoming software migitation of isolating the user and kernel space +page tables needs a misfeature flag so code can be made runtime +conditional. + +Add the BUG bits which indicates that the CPU is affected and add a feature +bit which indicates that the software migitation is enabled. + +Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be +made later. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Laight <David.Laight@aculab.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: Eduardo Valentin <eduval@amazon.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Juergen Gross <jgross@suse.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Will Deacon <will.deacon@arm.com> +Cc: aliguori@amazon.com +Cc: daniel.gruss@iaik.tugraz.at +Cc: hughd@google.com +Cc: keescook@google.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/common.c | 4 ++++ + 2 files changed, 5 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 454a37a..57bd52c 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -316,5 +316,6 @@ + #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ + #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ + #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ ++#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index ba9b601..8c81adc 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -882,6 +882,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + } + + setup_force_cpu_cap(X86_FEATURE_ALWAYS); ++ ++ /* Assume for now that ALL x86 CPUs are insecure */ ++ setup_force_cpu_bug(X86_BUG_CPU_INSECURE); ++ + fpu__init_system(c); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch new file mode 100644 index 00000000..6bf13885 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch @@ -0,0 +1,61 @@ +From 6490dc4544ecf2d877b7f332371111942f7665e8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri, 5 Jan 2018 15:27:34 +0100 +Subject: [PATCH 050/102] x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN + +commit de791821c295cc61419a06fe5562288417d1bc58 upstream. + +Use the name associated with the particular attack which needs page table +isolation for mitigation. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> +Cc: Jiri Koshina <jikos@kernel.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Andi Lutomirski <luto@amacapital.net> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Paul Turner <pjt@google.com> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Greg KH <gregkh@linux-foundation.org> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos +Signed-off-by: Razvan Ghitulete <rga@amazon.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/kernel/cpu/common.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 57bd52c..985dfd7 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -316,6 +316,6 @@ + #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ + #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ + #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +-#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ ++#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 8c81adc..5ab4fd7 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -884,7 +884,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + /* Assume for now that ALL x86 CPUs are insecure */ +- setup_force_cpu_bug(X86_BUG_CPU_INSECURE); ++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + + fpu__init_system(c); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch new file mode 100644 index 00000000..f91b8a57 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch @@ -0,0 +1,62 @@ +From d01b76fe5c2bcac20ddf912ef5964f37a5ae2e55 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Sat, 6 Jan 2018 11:49:23 +0000 +Subject: [PATCH 051/102] x86/cpufeatures: Add X86_BUG_SPECTRE_V[12] + +commit 99c6fa2511d8a683e61468be91b83f85452115fa upstream. + +Add the bug bits for spectre v1/2 and force them unconditionally for all +cpus. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Razvan Ghitulete <rga@amazon.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 2 ++ + arch/x86/kernel/cpu/common.c | 3 +++ + 2 files changed, 5 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 985dfd7..f364c891 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -317,5 +317,7 @@ + #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ + #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ ++#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ ++#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 5ab4fd7..8339b43 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -886,6 +886,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + /* Assume for now that ALL x86 CPUs are insecure */ + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); ++ + fpu__init_system(c); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch new file mode 100644 index 00000000..9e7cb785 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch @@ -0,0 +1,141 @@ +From c0a3941a56392b9e536c4dbae9c1c85a9b2efa50 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Mon, 24 Oct 2016 19:38:43 +0200 +Subject: [PATCH 052/102] x86/cpu: Merge bugs.c and bugs_64.c + +commit 62a67e123e058a67db58bc6a14354dd037bafd0a upstream. + +Should be easier when following boot paths. It probably is a left over +from the x86 unification eons ago. + +No functionality change. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20161024173844.23038-3-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Razvan Ghitulete <rga@amazon.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/Makefile | 4 +--- + arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++---- + arch/x86/kernel/cpu/bugs_64.c | 33 --------------------------------- + 3 files changed, 23 insertions(+), 40 deletions(-) + delete mode 100644 arch/x86/kernel/cpu/bugs_64.c + +diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile +index 4a8697f..33b6367 100644 +--- a/arch/x86/kernel/cpu/Makefile ++++ b/arch/x86/kernel/cpu/Makefile +@@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o + obj-y += common.o + obj-y += rdrand.o + obj-y += match.o ++obj-y += bugs.o + + obj-$(CONFIG_PROC_FS) += proc.o + obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o + +-obj-$(CONFIG_X86_32) += bugs.o +-obj-$(CONFIG_X86_64) += bugs_64.o +- + obj-$(CONFIG_CPU_SUP_INTEL) += intel.o + obj-$(CONFIG_CPU_SUP_AMD) += amd.o + obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index bd17db1..a44ef52 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -16,15 +16,19 @@ + #include <asm/msr.h> + #include <asm/paravirt.h> + #include <asm/alternative.h> ++#include <asm/pgtable.h> ++#include <asm/cacheflush.h> + + void __init check_bugs(void) + { + identify_boot_cpu(); +-#ifndef CONFIG_SMP +- pr_info("CPU: "); +- print_cpu_info(&boot_cpu_data); +-#endif + ++ if (!IS_ENABLED(CONFIG_SMP)) { ++ pr_info("CPU: "); ++ print_cpu_info(&boot_cpu_data); ++ } ++ ++#ifdef CONFIG_X86_32 + /* + * Check whether we are able to run this kernel safely on SMP. + * +@@ -40,4 +44,18 @@ void __init check_bugs(void) + alternative_instructions(); + + fpu__init_check_bugs(); ++#else /* CONFIG_X86_64 */ ++ alternative_instructions(); ++ ++ /* ++ * Make sure the first 2MB area is not mapped by huge pages ++ * There are typically fixed size MTRRs in there and overlapping ++ * MTRRs into large pages causes slow downs. ++ * ++ * Right now we don't do that with gbpages because there seems ++ * very little benefit for that case. ++ */ ++ if (!direct_gbpages) ++ set_memory_4k((unsigned long)__va(0), 1); ++#endif + } +diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c +deleted file mode 100644 +index a972ac4..0000000 +--- a/arch/x86/kernel/cpu/bugs_64.c ++++ /dev/null +@@ -1,33 +0,0 @@ +-/* +- * Copyright (C) 1994 Linus Torvalds +- * Copyright (C) 2000 SuSE +- */ +- +-#include <linux/kernel.h> +-#include <linux/init.h> +-#include <asm/alternative.h> +-#include <asm/bugs.h> +-#include <asm/processor.h> +-#include <asm/mtrr.h> +-#include <asm/cacheflush.h> +- +-void __init check_bugs(void) +-{ +- identify_boot_cpu(); +-#if !defined(CONFIG_SMP) +- pr_info("CPU: "); +- print_cpu_info(&boot_cpu_data); +-#endif +- alternative_instructions(); +- +- /* +- * Make sure the first 2MB area is not mapped by huge pages +- * There are typically fixed size MTRRs in there and overlapping +- * MTRRs into large pages causes slow downs. +- * +- * Right now we don't do that with gbpages because there seems +- * very little benefit for that case. +- */ +- if (!direct_gbpages) +- set_memory_4k((unsigned long)__va(0), 1); +-} +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-sysfs-cpu-Add-vulnerability-folder.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-sysfs-cpu-Add-vulnerability-folder.patch new file mode 100644 index 00000000..117c98ae --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-sysfs-cpu-Add-vulnerability-folder.patch @@ -0,0 +1,157 @@ +From e288c57f82b662d0bc2d8fcf64e78e23c4c77919 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 7 Jan 2018 22:48:00 +0100 +Subject: [PATCH 053/102] sysfs/cpu: Add vulnerability folder + +commit 87590ce6e373d1a5401f6539f0c59ef92dd924a9 upstream. + +As the meltdown/spectre problem affects several CPU architectures, it makes +sense to have common way to express whether a system is affected by a +particular vulnerability or not. If affected the way to express the +mitigation should be common as well. + +Create /sys/devices/system/cpu/vulnerabilities folder and files for +meltdown, spectre_v1 and spectre_v2. + +Allow architectures to override the show function. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Will Deacon <will.deacon@arm.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Linus Torvalds <torvalds@linuxfoundation.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Link: https://lkml.kernel.org/r/20180107214913.096657732@linutronix.de +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/ABI/testing/sysfs-devices-system-cpu | 16 ++++++++ + drivers/base/Kconfig | 3 ++ + drivers/base/cpu.c | 48 ++++++++++++++++++++++ + include/linux/cpu.h | 7 ++++ + 4 files changed, 74 insertions(+) + +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index 4987417..8b30a48 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org> + Description: AArch64 CPU registers + 'identification' directory exposes the CPU ID registers for + identifying model and revision of the CPU. ++ ++What: /sys/devices/system/cpu/vulnerabilities ++ /sys/devices/system/cpu/vulnerabilities/meltdown ++ /sys/devices/system/cpu/vulnerabilities/spectre_v1 ++ /sys/devices/system/cpu/vulnerabilities/spectre_v2 ++Date: Januar 2018 ++Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> ++Description: Information about CPU vulnerabilities ++ ++ The files are named after the code names of CPU ++ vulnerabilities. The output of those files reflects the ++ state of the CPUs in the system. Possible output values: ++ ++ "Not affected" CPU is not affected by the vulnerability ++ "Vulnerable" CPU is affected and no mitigation in effect ++ "Mitigation: $M" CPU is affetcted and mitigation $M is in effect +diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig +index d02e7c0..0651010 100644 +--- a/drivers/base/Kconfig ++++ b/drivers/base/Kconfig +@@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES + config GENERIC_CPU_AUTOPROBE + bool + ++config GENERIC_CPU_VULNERABILITIES ++ bool ++ + config SOC_BUS + bool + +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index 4c28e1a..56b6c85 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -499,10 +499,58 @@ static void __init cpu_dev_register_generic(void) + #endif + } + ++#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES ++ ++ssize_t __weak cpu_show_meltdown(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ ++ssize_t __weak cpu_show_spectre_v1(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ ++ssize_t __weak cpu_show_spectre_v2(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ ++static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); ++static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); ++static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); ++ ++static struct attribute *cpu_root_vulnerabilities_attrs[] = { ++ &dev_attr_meltdown.attr, ++ &dev_attr_spectre_v1.attr, ++ &dev_attr_spectre_v2.attr, ++ NULL ++}; ++ ++static const struct attribute_group cpu_root_vulnerabilities_group = { ++ .name = "vulnerabilities", ++ .attrs = cpu_root_vulnerabilities_attrs, ++}; ++ ++static void __init cpu_register_vulnerabilities(void) ++{ ++ if (sysfs_create_group(&cpu_subsys.dev_root->kobj, ++ &cpu_root_vulnerabilities_group)) ++ pr_err("Unable to register CPU vulnerabilities\n"); ++} ++ ++#else ++static inline void cpu_register_vulnerabilities(void) { } ++#endif ++ + void __init cpu_dev_init(void) + { + if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) + panic("Failed to register CPU subsystem"); + + cpu_dev_register_generic(); ++ cpu_register_vulnerabilities(); + } +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index e571128..2f475ad 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); + extern int cpu_add_dev_attr_group(struct attribute_group *attrs); + extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); + ++extern ssize_t cpu_show_meltdown(struct device *dev, ++ struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_spectre_v1(struct device *dev, ++ struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_spectre_v2(struct device *dev, ++ struct device_attribute *attr, char *buf); ++ + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch new file mode 100644 index 00000000..a185b426 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch @@ -0,0 +1,86 @@ +From 3e068e333715968e6b23151273aba17a960e2bae Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 7 Jan 2018 22:48:01 +0100 +Subject: [PATCH 054/102] x86/cpu: Implement CPU vulnerabilites sysfs functions + +commit 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 upstream. + +Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and +spectre_v2. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Will Deacon <will.deacon@arm.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Linus Torvalds <torvalds@linuxfoundation.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de +Signed-off-by: Razvan Ghitulete <rga@amazon.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/Kconfig | 1 + + arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 5572997..7877ff0 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -64,6 +64,7 @@ config X86 + select GENERIC_CLOCKEVENTS_MIN_ADJUST + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE ++ select GENERIC_CPU_VULNERABILITIES + select GENERIC_EARLY_IOREMAP + select GENERIC_FIND_FIRST_BIT + select GENERIC_IOMAP +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index a44ef52..cb6b4f9 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -9,6 +9,7 @@ + */ + #include <linux/init.h> + #include <linux/utsname.h> ++#include <linux/cpu.h> + #include <asm/bugs.h> + #include <asm/processor.h> + #include <asm/processor-flags.h> +@@ -59,3 +60,31 @@ void __init check_bugs(void) + set_memory_4k((unsigned long)__va(0), 1); + #endif + } ++ ++#ifdef CONFIG_SYSFS ++ssize_t cpu_show_meltdown(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) ++ return sprintf(buf, "Not affected\n"); ++ if (boot_cpu_has(X86_FEATURE_KAISER)) ++ return sprintf(buf, "Mitigation: PTI\n"); ++ return sprintf(buf, "Vulnerable\n"); ++} ++ ++ssize_t cpu_show_spectre_v1(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) ++ return sprintf(buf, "Not affected\n"); ++ return sprintf(buf, "Vulnerable\n"); ++} ++ ++ssize_t cpu_show_spectre_v2(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) ++ return sprintf(buf, "Not affected\n"); ++ return sprintf(buf, "Vulnerable\n"); ++} ++#endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch new file mode 100644 index 00000000..94821fd2 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch @@ -0,0 +1,70 @@ +From 0fba5a10f1c7bf91b1d2708d7fb83a10a15ca1a8 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Mon, 8 Jan 2018 16:09:21 -0600 +Subject: [PATCH 055/102] x86/cpu/AMD: Make LFENCE a serializing instruction + +commit e4d0e84e490790798691aaa0f2e598637f1867ec upstream. + +To aid in speculation control, make LFENCE a serializing instruction +since it has less overhead than MFENCE. This is done by setting bit 1 +of MSR 0xc0011029 (DE_CFG). Some families that support LFENCE do not +have this MSR. For these families, the LFENCE instruction is already +serializing. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net +Signed-off-by: Razvan Ghitulete <rga@amazon.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/msr-index.h | 2 ++ + arch/x86/kernel/cpu/amd.c | 10 ++++++++++ + 2 files changed, 12 insertions(+) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 78f3760..b1c0969 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -330,6 +330,8 @@ + #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL + #define FAM10H_MMIO_CONF_BASE_SHIFT 20 + #define MSR_FAM10H_NODE_ID 0xc001100c ++#define MSR_F10H_DECFG 0xc0011029 ++#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 + + /* K8 MSRs */ + #define MSR_K8_TOP_MEM1 0xc001001a +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 2b4cf04..8b5b19d 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -782,6 +782,16 @@ static void init_amd(struct cpuinfo_x86 *c) + set_cpu_cap(c, X86_FEATURE_K8); + + if (cpu_has(c, X86_FEATURE_XMM2)) { ++ /* ++ * A serializing LFENCE has less overhead than MFENCE, so ++ * use it for execution serialization. On families which ++ * don't have that MSR, LFENCE is already serializing. ++ * msr_set_bit() uses the safe accessors, too, even if the MSR ++ * is not present. ++ */ ++ msr_set_bit(MSR_F10H_DECFG, ++ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); ++ + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch new file mode 100644 index 00000000..8c8ff74e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch @@ -0,0 +1,86 @@ +From d8298febbbb76536a9434b690d5f00eb1e105581 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Mon, 8 Jan 2018 16:09:32 -0600 +Subject: [PATCH 056/102] x86/cpu/AMD: Use LFENCE_RDTSC in preference to + MFENCE_RDTSC + +commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f upstream. + +With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference +to MFENCE_RDTSC. However, since the kernel could be running under a +hypervisor that does not support writing that MSR, read the MSR back and +verify that the bit has been set successfully. If the MSR can be read +and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the +MFENCE_RDTSC feature. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net +Signed-off-by: Razvan Ghitulete <rga@amazon.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++-- + 2 files changed, 17 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index b1c0969..4eeaa36 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -332,6 +332,7 @@ + #define MSR_FAM10H_NODE_ID 0xc001100c + #define MSR_F10H_DECFG 0xc0011029 + #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 ++#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) + + /* K8 MSRs */ + #define MSR_K8_TOP_MEM1 0xc001001a +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 8b5b19d..1b89f0c 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -782,6 +782,9 @@ static void init_amd(struct cpuinfo_x86 *c) + set_cpu_cap(c, X86_FEATURE_K8); + + if (cpu_has(c, X86_FEATURE_XMM2)) { ++ unsigned long long val; ++ int ret; ++ + /* + * A serializing LFENCE has less overhead than MFENCE, so + * use it for execution serialization. On families which +@@ -792,8 +795,19 @@ static void init_amd(struct cpuinfo_x86 *c) + msr_set_bit(MSR_F10H_DECFG, + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + +- /* MFENCE stops RDTSC speculation */ +- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); ++ /* ++ * Verify that the MSR write was successful (could be running ++ * under a hypervisor) and only then assume that LFENCE is ++ * serializing. ++ */ ++ ret = rdmsrl_safe(MSR_F10H_DECFG, &val); ++ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { ++ /* A serializing LFENCE stops RDTSC speculation */ ++ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); ++ } else { ++ /* MFENCE stops RDTSC speculation */ ++ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); ++ } + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch new file mode 100644 index 00000000..054a149e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch @@ -0,0 +1,37 @@ +From cd3a47074e2523c0c799121cc9e6b6eee6ebddd4 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Tue, 9 Jan 2018 15:02:51 +0000 +Subject: [PATCH 057/102] sysfs/cpu: Fix typos in vulnerability documentation + +commit 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 upstream. + +Fixes: 87590ce6e ("sysfs/cpu: Add vulnerability folder") +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/ABI/testing/sysfs-devices-system-cpu | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index 8b30a48..dfd56ec 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -355,7 +355,7 @@ What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 +-Date: Januar 2018 ++Date: January 2018 + Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> + Description: Information about CPU vulnerabilities + +@@ -365,4 +365,4 @@ Description: Information about CPU vulnerabilities + + "Not affected" CPU is not affected by the vulnerability + "Vulnerable" CPU is affected and no mitigation in effect +- "Mitigation: $M" CPU is affetcted and mitigation $M is in effect ++ "Mitigation: $M" CPU is affected and mitigation $M is in effect +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-alternatives-Fix-optimize_nops-checking.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-alternatives-Fix-optimize_nops-checking.patch new file mode 100644 index 00000000..7c47ab88 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-alternatives-Fix-optimize_nops-checking.patch @@ -0,0 +1,56 @@ +From 6623c9f4b3cdc9c82643c594c2573b60415c8fbb Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Wed, 10 Jan 2018 12:28:16 +0100 +Subject: [PATCH 058/102] x86/alternatives: Fix optimize_nops() checking + +commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca upstream. + +The alternatives code checks only the first byte whether it is a NOP, but +with NOPs in front of the payload and having actual instructions after it +breaks the "optimized' test. + +Make sure to scan all bytes before deciding to optimize the NOPs in there. + +Reported-by: David Woodhouse <dwmw2@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Andi Kleen <andi@firstfloor.org> +Cc: Andrew Lutomirski <luto@kernel.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/alternative.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +index 5cb272a..10d5a3d 100644 +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -340,9 +340,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) + static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) + { + unsigned long flags; ++ int i; + +- if (instr[0] != 0x90) +- return; ++ for (i = 0; i < a->padlen; i++) { ++ if (instr[i] != 0x90) ++ return; ++ } + + local_irq_save(flags); + add_nops(instr + (a->instrlen - a->padlen), a->padlen); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch new file mode 100644 index 00000000..20286993 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch @@ -0,0 +1,59 @@ +From 3e89fd3d1aabbf6b0ab8ab8400c506f6990e3850 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 4 Jan 2018 14:37:05 +0000 +Subject: [PATCH 059/102] x86/alternatives: Add missing '\n' at end of + ALTERNATIVE inline asm + +commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream. + +Where an ALTERNATIVE is used in the middle of an inline asm block, this +would otherwise lead to the following instruction being appended directly +to the trailing ".popsection", and a failed compile. + +Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection") +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: ak@linux.intel.com +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Paul Turner <pjt@google.com> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/alternative.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h +index 1b02038..10a4b2c 100644 +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end) + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ +- ".popsection" ++ ".popsection\n" + + #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ +@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end) + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ +- ".popsection" ++ ".popsection\n" + + /* + * Alternative instructions for different CPU types or capabilities. +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch new file mode 100644 index 00000000..d2a1f775 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch @@ -0,0 +1,48 @@ +From c496ed83a15de9cbad1ee25f6fc55fc7b9136f43 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Sun, 17 Sep 2017 09:03:50 -0700 +Subject: [PATCH 060/102] x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) + earlier + +commit b8b7abaed7a49b350f8ba659ddc264b04931d581 upstream. + +Otherwise we might have the PCID feature bit set during cpu_init(). + +This is just for robustness. I haven't seen any actual bugs here. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bpetkov@suse.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels") +Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/common.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 8339b43..7b9ae04 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -890,6 +890,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + + fpu__init_system(c); ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * Regardless of whether PCID is enumerated, the SDM says ++ * that it can't be enabled in 32-bit mode. ++ */ ++ setup_clear_cpu_cap(X86_FEATURE_PCID); ++#endif + } + + void __init early_cpu_init(void) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-objtool-modules-Discard-objtool-annotation-sections-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-objtool-modules-Discard-objtool-annotation-sections-.patch new file mode 100644 index 00000000..7f25eb0d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-objtool-modules-Discard-objtool-annotation-sections-.patch @@ -0,0 +1,94 @@ +From c871bf6ce937f7bada3b93ec7473f0673b0d92b5 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Wed, 1 Mar 2017 12:04:44 -0600 +Subject: [PATCH 061/102] objtool, modules: Discard objtool annotation sections + for modules + +commit e390f9a9689a42f477a6073e2e7df530a4c1b740 upstream. + +The '__unreachable' and '__func_stack_frame_non_standard' sections are +only used at compile time. They're discarded for vmlinux but they +should also be discarded for modules. + +Since this is a recurring pattern, prefix the section names with +".discard.". It's a nice convention and vmlinux.lds.h already discards +such sections. + +Also remove the 'a' (allocatable) flag from the __unreachable section +since it doesn't make sense for a discarded section. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Jessica Yu <jeyu@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") +Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble +Signed-off-by: Ingo Molnar <mingo@kernel.org> +[dwmw2: Remove the unreachable part in backporting since it's not here yet] +Signed-off-by: David Woodhouse <dwmw@amazon.co.ku> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/frame.h | 2 +- + scripts/mod/modpost.c | 1 + + scripts/module-common.lds | 5 ++++- + tools/objtool/builtin-check.c | 2 +- + 4 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/include/linux/frame.h b/include/linux/frame.h +index e6baaba..d772c61 100644 +--- a/include/linux/frame.h ++++ b/include/linux/frame.h +@@ -11,7 +11,7 @@ + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ + #define STACK_FRAME_NON_STANDARD(func) \ +- static void __used __section(__func_stack_frame_non_standard) \ ++ static void __used __section(.discard.func_stack_frame_non_standard) \ + *__func_stack_frame_non_standard_##func = func + + #else /* !CONFIG_STACK_VALIDATION */ +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index cbb1553..325f1af 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -838,6 +838,7 @@ static const char *const section_white_list[] = + ".cmem*", /* EZchip */ + ".fmt_slot*", /* EZchip */ + ".gnu.lto*", ++ ".discard.*", + NULL + }; + +diff --git a/scripts/module-common.lds b/scripts/module-common.lds +index 73a2c7d..cf7e52e 100644 +--- a/scripts/module-common.lds ++++ b/scripts/module-common.lds +@@ -4,7 +4,10 @@ + * combine them automatically. + */ + SECTIONS { +- /DISCARD/ : { *(.discard) } ++ /DISCARD/ : { ++ *(.discard) ++ *(.discard.*) ++ } + + __ksymtab 0 : { *(SORT(___ksymtab+*)) } + __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index e8a1f69..7b1f7b5 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -1220,7 +1220,7 @@ int cmd_check(int argc, const char **argv) + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); +- file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); ++ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); + file.rodata = find_section_by_name(file.elf, ".rodata"); + file.ignore_unreachables = false; + file.c_file = find_section_by_name(file.elf, ".comment"); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-objtool-Detect-jumps-to-retpoline-thunks.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-objtool-Detect-jumps-to-retpoline-thunks.patch new file mode 100644 index 00000000..6dc05533 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-objtool-Detect-jumps-to-retpoline-thunks.patch @@ -0,0 +1,64 @@ +From df3a283c0e5efdef9f4146188608fe8658478ec7 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Thu, 11 Jan 2018 21:46:23 +0000 +Subject: [PATCH 062/102] objtool: Detect jumps to retpoline thunks + +commit 39b735332cb8b33a27c28592d969e4016c86c3ea upstream. + +A direct jump to a retpoline thunk is really an indirect jump in +disguise. Change the objtool instruction type accordingly. + +Objtool needs to know where indirect branches are so it can detect +switch statement jump tables. + +This fixes a bunch of warnings with CONFIG_RETPOLINE like: + + arch/x86/events/intel/uncore_nhmex.o: warning: objtool: nhmex_rbox_msr_enable_event()+0x44: sibling call from callable instruction with modified stack frame + kernel/signal.o: warning: objtool: copy_siginfo_to_user()+0x91: sibling call from callable instruction with modified stack frame + ... + +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-2-git-send-email-dwmw@amazon.co.uk +[dwmw2: Applies to tools/objtool/builtin-check.c not check.c] +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + tools/objtool/builtin-check.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index 7b1f7b5..36784b8 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -382,6 +382,13 @@ static int add_jump_destinations(struct objtool_file *file) + } else if (rela->sym->sec->idx) { + dest_sec = rela->sym->sec; + dest_off = rela->sym->sym.st_value + rela->addend + 4; ++ } else if (strstr(rela->sym->name, "_indirect_thunk_")) { ++ /* ++ * Retpoline jumps are really dynamic jumps in ++ * disguise, so convert them accordingly. ++ */ ++ insn->type = INSN_JUMP_DYNAMIC; ++ continue; + } else { + /* sibling call */ + insn->jump_dest = 0; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-objtool-Allow-alternatives-to-be-ignored.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-objtool-Allow-alternatives-to-be-ignored.patch new file mode 100644 index 00000000..81beb919 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-objtool-Allow-alternatives-to-be-ignored.patch @@ -0,0 +1,166 @@ +From 6af5187229c3acb6956484634a80b69e149aa3d6 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Thu, 11 Jan 2018 21:46:24 +0000 +Subject: [PATCH 063/102] objtool: Allow alternatives to be ignored + +commit 258c76059cece01bebae098e81bacb1af2edad17 upstream. + +Getting objtool to understand retpolines is going to be a bit of a +challenge. For now, take advantage of the fact that retpolines are +patched in with alternatives. Just read the original (sane) +non-alternative instruction, and ignore the patched-in retpoline. + +This allows objtool to understand the control flow *around* the +retpoline, even if it can't yet follow what's inside. This means the +ORC unwinder will fail to unwind from inside a retpoline, but will work +fine otherwise. + +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-3-git-send-email-dwmw@amazon.co.uk +[dwmw2: Applies to tools/objtool/builtin-check.c not check.[ch]] +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + tools/objtool/builtin-check.c | 64 ++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 57 insertions(+), 7 deletions(-) + +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index 36784b8..ee71d4c 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -51,7 +51,7 @@ struct instruction { + unsigned int len, state; + unsigned char type; + unsigned long immediate; +- bool alt_group, visited; ++ bool alt_group, visited, ignore_alts; + struct symbol *call_dest; + struct instruction *jump_dest; + struct list_head alts; +@@ -353,6 +353,40 @@ static void add_ignores(struct objtool_file *file) + } + + /* ++ * FIXME: For now, just ignore any alternatives which add retpolines. This is ++ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. ++ * But it at least allows objtool to understand the control flow *around* the ++ * retpoline. ++ */ ++static int add_nospec_ignores(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct rela *rela; ++ struct instruction *insn; ++ ++ sec = find_section_by_name(file->elf, ".rela.discard.nospec"); ++ if (!sec) ++ return 0; ++ ++ list_for_each_entry(rela, &sec->rela_list, list) { ++ if (rela->sym->type != STT_SECTION) { ++ WARN("unexpected relocation symbol type in %s", sec->name); ++ return -1; ++ } ++ ++ insn = find_insn(file, rela->sym->sec, rela->addend); ++ if (!insn) { ++ WARN("bad .discard.nospec entry"); ++ return -1; ++ } ++ ++ insn->ignore_alts = true; ++ } ++ ++ return 0; ++} ++ ++/* + * Find the destination instructions for all jumps. + */ + static int add_jump_destinations(struct objtool_file *file) +@@ -435,11 +469,18 @@ static int add_call_destinations(struct objtool_file *file) + dest_off = insn->offset + insn->len + insn->immediate; + insn->call_dest = find_symbol_by_offset(insn->sec, + dest_off); ++ /* ++ * FIXME: Thanks to retpolines, it's now considered ++ * normal for a function to call within itself. So ++ * disable this warning for now. ++ */ ++#if 0 + if (!insn->call_dest) { + WARN_FUNC("can't find call dest symbol at offset 0x%lx", + insn->sec, insn->offset, dest_off); + return -1; + } ++#endif + } else if (rela->sym->type == STT_SECTION) { + insn->call_dest = find_symbol_by_offset(rela->sym->sec, + rela->addend+4); +@@ -601,12 +642,6 @@ static int add_special_section_alts(struct objtool_file *file) + return ret; + + list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { +- alt = malloc(sizeof(*alt)); +- if (!alt) { +- WARN("malloc failed"); +- ret = -1; +- goto out; +- } + + orig_insn = find_insn(file, special_alt->orig_sec, + special_alt->orig_off); +@@ -617,6 +652,10 @@ static int add_special_section_alts(struct objtool_file *file) + goto out; + } + ++ /* Ignore retpoline alternatives. */ ++ if (orig_insn->ignore_alts) ++ continue; ++ + new_insn = NULL; + if (!special_alt->group || special_alt->new_len) { + new_insn = find_insn(file, special_alt->new_sec, +@@ -642,6 +681,13 @@ static int add_special_section_alts(struct objtool_file *file) + goto out; + } + ++ alt = malloc(sizeof(*alt)); ++ if (!alt) { ++ WARN("malloc failed"); ++ ret = -1; ++ goto out; ++ } ++ + alt->insn = new_insn; + list_add_tail(&alt->list, &orig_insn->alts); + +@@ -852,6 +898,10 @@ static int decode_sections(struct objtool_file *file) + + add_ignores(file); + ++ ret = add_nospec_ignores(file); ++ if (ret) ++ return ret; ++ + ret = add_jump_destinations(file); + if (ret) + return ret; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch new file mode 100644 index 00000000..33dc1048 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch @@ -0,0 +1,150 @@ +From 950bbef589db560b5f5cc84ecc4d9a88958e1521 Mon Sep 17 00:00:00 2001 +From: Andrey Ryabinin <aryabinin@virtuozzo.com> +Date: Fri, 29 Sep 2017 17:15:36 +0300 +Subject: [PATCH 064/102] x86/asm: Use register variable to get stack pointer + value + +commit 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc upstream. + +Currently we use current_stack_pointer() function to get the value +of the stack pointer register. Since commit: + + f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") + +... we have a stack register variable declared. It can be used instead of +current_stack_pointer() function which allows to optimize away some +excessive "mov %rsp, %<dst>" instructions: + + -mov %rsp,%rdx + -sub %rdx,%rax + -cmp $0x3fff,%rax + -ja ffffffff810722fd <ist_begin_non_atomic+0x2d> + + +sub %rsp,%rax + +cmp $0x3fff,%rax + +ja ffffffff810722fa <ist_begin_non_atomic+0x2a> + +Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer +and use it instead of the removed function. + +Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> +Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +[dwmw2: We want ASM_CALL_CONSTRAINT for retpoline] +Signed-off-by: David Woodhouse <dwmw@amazon.co.ku> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/asm.h | 11 +++++++++++ + arch/x86/include/asm/thread_info.h | 11 ----------- + arch/x86/kernel/irq_32.c | 6 +++--- + arch/x86/kernel/traps.c | 2 +- + arch/x86/mm/tlb.c | 2 +- + 5 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h +index 7acb51c..0052352 100644 +--- a/arch/x86/include/asm/asm.h ++++ b/arch/x86/include/asm/asm.h +@@ -125,4 +125,15 @@ + /* For C file, we already have NOKPROBE_SYMBOL macro */ + #endif + ++#ifndef __ASSEMBLY__ ++/* ++ * This output constraint should be used for any inline asm which has a "call" ++ * instruction. Otherwise the asm may be inserted before the frame pointer ++ * gets set up by the containing function. If you forget to do this, objtool ++ * may print a "call without frame pointer save/setup" warning. ++ */ ++register unsigned long current_stack_pointer asm(_ASM_SP); ++#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) ++#endif ++ + #endif /* _ASM_X86_ASM_H */ +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index ad6f5eb0..bdf9c4c 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -152,17 +152,6 @@ struct thread_info { + */ + #ifndef __ASSEMBLY__ + +-static inline unsigned long current_stack_pointer(void) +-{ +- unsigned long sp; +-#ifdef CONFIG_X86_64 +- asm("mov %%rsp,%0" : "=g" (sp)); +-#else +- asm("mov %%esp,%0" : "=g" (sp)); +-#endif +- return sp; +-} +- + /* + * Walks up the stack frames to make sure that the specified object is + * entirely contained by a single stack frame. +diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c +index 1f38d9a..d4eb450 100644 +--- a/arch/x86/kernel/irq_32.c ++++ b/arch/x86/kernel/irq_32.c +@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack) + + static inline void *current_stack(void) + { +- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); ++ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); + } + + static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) +@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) + + /* Save the next esp at the bottom of the stack */ + prev_esp = (u32 *)irqstk; +- *prev_esp = current_stack_pointer(); ++ *prev_esp = current_stack_pointer; + + if (unlikely(overflow)) + call_on_stack(print_stack_overflow, isp); +@@ -139,7 +139,7 @@ void do_softirq_own_stack(void) + + /* Push the previous esp onto the stack */ + prev_esp = (u32 *)irqstk; +- *prev_esp = current_stack_pointer(); ++ *prev_esp = current_stack_pointer; + + call_on_stack(__do_softirq, isp); + } +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index bd4e3d4..322f433 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) + * from double_fault. + */ + BUG_ON((unsigned long)(current_top_of_stack() - +- current_stack_pointer()) >= THREAD_SIZE); ++ current_stack_pointer) >= THREAD_SIZE); + + preempt_enable_no_resched(); + } +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index e81f8bb..0cf44ac 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -114,7 +114,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + * mapped in the new pgd, we'll double-fault. Forcibly + * map it. + */ +- unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); ++ unsigned int stack_pgd_index = pgd_index(current_stack_pointer); + + pgd_t *pgd = next->pgd + stack_pgd_index; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-retpoline-Add-initial-retpoline-support.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-retpoline-Add-initial-retpoline-support.patch new file mode 100644 index 00000000..9930f8e3 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-retpoline-Add-initial-retpoline-support.patch @@ -0,0 +1,378 @@ +From 057db5e235721702a02624a50661fde17423eb4c Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:25 +0000 +Subject: [PATCH 065/102] x86/retpoline: Add initial retpoline support + +commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream. + +Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide +the corresponding thunks. Provide assembler macros for invoking the thunks +in the same way that GCC does, from native and inline assembler. + +This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In +some circumstances, IBRS microcode features may be used instead, and the +retpoline can be disabled. + +On AMD CPUs if lfence is serialising, the retpoline can be dramatically +simplified to a simple "lfence; jmp *\reg". A future patch, after it has +been verified that lfence really is serialising in all circumstances, can +enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition +to X86_FEATURE_RETPOLINE. + +Do not align the retpoline in the altinstr section, because there is no +guarantee that it stays aligned when it's copied over the oldinstr during +alternative patching. + +[ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks] +[ tglx: Put actual function CALL/JMP in front of the macros, convert to + symbolic labels ] +[ dwmw2: Convert back to numeric labels, merge objtool fixes ] + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/Kconfig | 13 ++++ + arch/x86/Makefile | 10 +++ + arch/x86/include/asm/asm-prototypes.h | 25 +++++++ + arch/x86/include/asm/cpufeatures.h | 3 + + arch/x86/include/asm/nospec-branch.h | 128 ++++++++++++++++++++++++++++++++++ + arch/x86/kernel/cpu/common.c | 4 ++ + arch/x86/lib/Makefile | 1 + + arch/x86/lib/retpoline.S | 48 +++++++++++++ + 8 files changed, 232 insertions(+) + create mode 100644 arch/x86/include/asm/nospec-branch.h + create mode 100644 arch/x86/lib/retpoline.S + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 7877ff0..7132252 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -408,6 +408,19 @@ config GOLDFISH + def_bool y + depends on X86_GOLDFISH + ++config RETPOLINE ++ bool "Avoid speculative indirect branches in kernel" ++ default y ++ ---help--- ++ Compile kernel with the retpoline compiler options to guard against ++ kernel-to-user data leaks by avoiding speculative indirect ++ branches. Requires a compiler with -mindirect-branch=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++ Without compiler support, at least indirect branches in assembler ++ code are eliminated. Since this includes the syscall entry path, ++ it is not entirely pointless. ++ + if X86_32 + config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" +diff --git a/arch/x86/Makefile b/arch/x86/Makefile +index 2d44933..1e1a733 100644 +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -182,6 +182,16 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables + KBUILD_CFLAGS += $(mflags-y) + KBUILD_AFLAGS += $(mflags-y) + ++# Avoid indirect branches in kernel to deal with Spectre ++ifdef CONFIG_RETPOLINE ++ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ++ ifneq ($(RETPOLINE_CFLAGS),) ++ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE ++ else ++ $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) ++ endif ++endif ++ + archscripts: scripts_basic + $(Q)$(MAKE) $(build)=arch/x86/tools relocs + +diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h +index 44b8762..b15aa40 100644 +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -10,7 +10,32 @@ + #include <asm/pgtable.h> + #include <asm/special_insns.h> + #include <asm/preempt.h> ++#include <asm/asm.h> + + #ifndef CONFIG_X86_CMPXCHG64 + extern void cmpxchg8b_emu(void); + #endif ++ ++#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_X86_32 ++#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); ++#else ++#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); ++INDIRECT_THUNK(8) ++INDIRECT_THUNK(9) ++INDIRECT_THUNK(10) ++INDIRECT_THUNK(11) ++INDIRECT_THUNK(12) ++INDIRECT_THUNK(13) ++INDIRECT_THUNK(14) ++INDIRECT_THUNK(15) ++#endif ++INDIRECT_THUNK(ax) ++INDIRECT_THUNK(bx) ++INDIRECT_THUNK(cx) ++INDIRECT_THUNK(dx) ++INDIRECT_THUNK(si) ++INDIRECT_THUNK(di) ++INDIRECT_THUNK(bp) ++INDIRECT_THUNK(sp) ++#endif /* CONFIG_RETPOLINE */ +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index f364c891..4467568 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -194,6 +194,9 @@ + #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + ++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ ++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ ++ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ + #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +new file mode 100644 +index 0000000..e20e92e +--- /dev/null ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -0,0 +1,128 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++#ifndef __NOSPEC_BRANCH_H__ ++#define __NOSPEC_BRANCH_H__ ++ ++#include <asm/alternative.h> ++#include <asm/alternative-asm.h> ++#include <asm/cpufeatures.h> ++ ++#ifdef __ASSEMBLY__ ++ ++/* ++ * This should be used immediately before a retpoline alternative. It tells ++ * objtool where the retpolines are so that it can make sense of the control ++ * flow by just reading the original instruction(s) and ignoring the ++ * alternatives. ++ */ ++.macro ANNOTATE_NOSPEC_ALTERNATIVE ++ .Lannotate_\@: ++ .pushsection .discard.nospec ++ .long .Lannotate_\@ - . ++ .popsection ++.endm ++ ++/* ++ * These are the bare retpoline primitives for indirect jmp and call. ++ * Do not use these directly; they only exist to make the ALTERNATIVE ++ * invocation below less ugly. ++ */ ++.macro RETPOLINE_JMP reg:req ++ call .Ldo_rop_\@ ++.Lspec_trap_\@: ++ pause ++ jmp .Lspec_trap_\@ ++.Ldo_rop_\@: ++ mov \reg, (%_ASM_SP) ++ ret ++.endm ++ ++/* ++ * This is a wrapper around RETPOLINE_JMP so the called function in reg ++ * returns to the instruction after the macro. ++ */ ++.macro RETPOLINE_CALL reg:req ++ jmp .Ldo_call_\@ ++.Ldo_retpoline_jmp_\@: ++ RETPOLINE_JMP \reg ++.Ldo_call_\@: ++ call .Ldo_retpoline_jmp_\@ ++.endm ++ ++/* ++ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple ++ * indirect jmp/call which may be susceptible to the Spectre variant 2 ++ * attack. ++ */ ++.macro JMP_NOSPEC reg:req ++#ifdef CONFIG_RETPOLINE ++ ANNOTATE_NOSPEC_ALTERNATIVE ++ ALTERNATIVE_2 __stringify(jmp *\reg), \ ++ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ ++ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD ++#else ++ jmp *\reg ++#endif ++.endm ++ ++.macro CALL_NOSPEC reg:req ++#ifdef CONFIG_RETPOLINE ++ ANNOTATE_NOSPEC_ALTERNATIVE ++ ALTERNATIVE_2 __stringify(call *\reg), \ ++ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ ++ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD ++#else ++ call *\reg ++#endif ++.endm ++ ++#else /* __ASSEMBLY__ */ ++ ++#define ANNOTATE_NOSPEC_ALTERNATIVE \ ++ "999:\n\t" \ ++ ".pushsection .discard.nospec\n\t" \ ++ ".long 999b - .\n\t" \ ++ ".popsection\n\t" ++ ++#if defined(CONFIG_X86_64) && defined(RETPOLINE) ++ ++/* ++ * Since the inline asm uses the %V modifier which is only in newer GCC, ++ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. ++ */ ++# define CALL_NOSPEC \ ++ ANNOTATE_NOSPEC_ALTERNATIVE \ ++ ALTERNATIVE( \ ++ "call *%[thunk_target]\n", \ ++ "call __x86_indirect_thunk_%V[thunk_target]\n", \ ++ X86_FEATURE_RETPOLINE) ++# define THUNK_TARGET(addr) [thunk_target] "r" (addr) ++ ++#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) ++/* ++ * For i386 we use the original ret-equivalent retpoline, because ++ * otherwise we'll run out of registers. We don't care about CET ++ * here, anyway. ++ */ ++# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ ++ " jmp 904f;\n" \ ++ " .align 16\n" \ ++ "901: call 903f;\n" \ ++ "902: pause;\n" \ ++ " jmp 902b;\n" \ ++ " .align 16\n" \ ++ "903: addl $4, %%esp;\n" \ ++ " pushl %[thunk_target];\n" \ ++ " ret;\n" \ ++ " .align 16\n" \ ++ "904: call 901b;\n", \ ++ X86_FEATURE_RETPOLINE) ++ ++# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) ++#else /* No retpoline */ ++# define CALL_NOSPEC "call *%[thunk_target]\n" ++# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) ++#endif ++ ++#endif /* __ASSEMBLY__ */ ++#endif /* __NOSPEC_BRANCH_H__ */ +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 7b9ae04..6e885cc 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -889,6 +889,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + ++#ifdef CONFIG_RETPOLINE ++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE); ++#endif ++ + fpu__init_system(c); + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile +index 34a7413..6bf1898 100644 +--- a/arch/x86/lib/Makefile ++++ b/arch/x86/lib/Makefile +@@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o + lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o + lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o + lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o ++lib-$(CONFIG_RETPOLINE) += retpoline.o + + obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o + +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +new file mode 100644 +index 0000000..cb45c6c +--- /dev/null ++++ b/arch/x86/lib/retpoline.S +@@ -0,0 +1,48 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++#include <linux/stringify.h> ++#include <linux/linkage.h> ++#include <asm/dwarf2.h> ++#include <asm/cpufeatures.h> ++#include <asm/alternative-asm.h> ++#include <asm/export.h> ++#include <asm/nospec-branch.h> ++ ++.macro THUNK reg ++ .section .text.__x86.indirect_thunk.\reg ++ ++ENTRY(__x86_indirect_thunk_\reg) ++ CFI_STARTPROC ++ JMP_NOSPEC %\reg ++ CFI_ENDPROC ++ENDPROC(__x86_indirect_thunk_\reg) ++.endm ++ ++/* ++ * Despite being an assembler file we can't just use .irp here ++ * because __KSYM_DEPS__ only uses the C preprocessor and would ++ * only see one instance of "__x86_indirect_thunk_\reg" rather ++ * than one per register with the correct names. So we do it ++ * the simple and nasty way... ++ */ ++#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) ++#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) ++ ++GENERATE_THUNK(_ASM_AX) ++GENERATE_THUNK(_ASM_BX) ++GENERATE_THUNK(_ASM_CX) ++GENERATE_THUNK(_ASM_DX) ++GENERATE_THUNK(_ASM_SI) ++GENERATE_THUNK(_ASM_DI) ++GENERATE_THUNK(_ASM_BP) ++GENERATE_THUNK(_ASM_SP) ++#ifdef CONFIG_64BIT ++GENERATE_THUNK(r8) ++GENERATE_THUNK(r9) ++GENERATE_THUNK(r10) ++GENERATE_THUNK(r11) ++GENERATE_THUNK(r12) ++GENERATE_THUNK(r13) ++GENERATE_THUNK(r14) ++GENERATE_THUNK(r15) ++#endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch new file mode 100644 index 00000000..fd40391c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch @@ -0,0 +1,327 @@ +From 604f62647c10984c0d50957ca4c8e4864db1eae3 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:26 +0000 +Subject: [PATCH 066/102] x86/spectre: Add boot time option to select Spectre + v2 mitigation + +commit da285121560e769cc31797bba6422eea71d473e0 upstream. + +Add a spectre_v2= option to select the mitigation used for the indirect +branch speculation vulnerability. + +Currently, the only option available is retpoline, in its various forms. +This will be expanded to cover the new IBRS/IBPB microcode features. + +The RETPOLINE_AMD feature relies on a serializing LFENCE for speculation +control. For AMD hardware, only set RETPOLINE_AMD if LFENCE is a +serializing instruction, which is indicated by the LFENCE_RDTSC feature. + +[ tglx: Folded back the LFENCE/AMD fixes and reworked it so IBRS + integration becomes simple ] + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-5-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 28 +++++++ + arch/x86/include/asm/nospec-branch.h | 10 +++ + arch/x86/kernel/cpu/bugs.c | 158 ++++++++++++++++++++++++++++++++++- + arch/x86/kernel/cpu/common.c | 4 - + 4 files changed, 195 insertions(+), 5 deletions(-) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 3d53778..4b438e4 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2681,6 +2681,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + nosmt [KNL,S390] Disable symmetric multithreading (SMT). + Equivalent to smt=1. + ++ nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 ++ (indirect branch prediction) vulnerability. System may ++ allow data leaks with this option, which is equivalent ++ to spectre_v2=off. ++ + noxsave [BUGS=X86] Disables x86 extended register state save + and restore using xsave. The kernel will fallback to + enabling legacy floating-point and sse state. +@@ -3934,6 +3939,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + sonypi.*= [HW] Sony Programmable I/O Control Device driver + See Documentation/laptops/sonypi.txt + ++ spectre_v2= [X86] Control mitigation of Spectre variant 2 ++ (indirect branch speculation) vulnerability. ++ ++ on - unconditionally enable ++ off - unconditionally disable ++ auto - kernel detects whether your CPU model is ++ vulnerable ++ ++ Selecting 'on' will, and 'auto' may, choose a ++ mitigation method at run time according to the ++ CPU, the available microcode, the setting of the ++ CONFIG_RETPOLINE configuration option, and the ++ compiler with which the kernel was built. ++ ++ Specific mitigations can also be selected manually: ++ ++ retpoline - replace indirect branches ++ retpoline,generic - google's original retpoline ++ retpoline,amd - AMD-specific minimal thunk ++ ++ Not specifying this option is equivalent to ++ spectre_v2=auto. ++ + spia_io_base= [HW,MTD] + spia_fio_base= + spia_pedr= +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index e20e92e..ea034fa 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -124,5 +124,15 @@ + # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) + #endif + ++/* The Spectre V2 mitigation variants */ ++enum spectre_v2_mitigation { ++ SPECTRE_V2_NONE, ++ SPECTRE_V2_RETPOLINE_MINIMAL, ++ SPECTRE_V2_RETPOLINE_MINIMAL_AMD, ++ SPECTRE_V2_RETPOLINE_GENERIC, ++ SPECTRE_V2_RETPOLINE_AMD, ++ SPECTRE_V2_IBRS, ++}; ++ + #endif /* __ASSEMBLY__ */ + #endif /* __NOSPEC_BRANCH_H__ */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index cb6b4f9..49d25dd 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -10,6 +10,9 @@ + #include <linux/init.h> + #include <linux/utsname.h> + #include <linux/cpu.h> ++ ++#include <asm/nospec-branch.h> ++#include <asm/cmdline.h> + #include <asm/bugs.h> + #include <asm/processor.h> + #include <asm/processor-flags.h> +@@ -20,6 +23,8 @@ + #include <asm/pgtable.h> + #include <asm/cacheflush.h> + ++static void __init spectre_v2_select_mitigation(void); ++ + void __init check_bugs(void) + { + identify_boot_cpu(); +@@ -29,6 +34,9 @@ void __init check_bugs(void) + print_cpu_info(&boot_cpu_data); + } + ++ /* Select the proper spectre mitigation before patching alternatives */ ++ spectre_v2_select_mitigation(); ++ + #ifdef CONFIG_X86_32 + /* + * Check whether we are able to run this kernel safely on SMP. +@@ -61,6 +69,153 @@ void __init check_bugs(void) + #endif + } + ++/* The kernel command line selection */ ++enum spectre_v2_mitigation_cmd { ++ SPECTRE_V2_CMD_NONE, ++ SPECTRE_V2_CMD_AUTO, ++ SPECTRE_V2_CMD_FORCE, ++ SPECTRE_V2_CMD_RETPOLINE, ++ SPECTRE_V2_CMD_RETPOLINE_GENERIC, ++ SPECTRE_V2_CMD_RETPOLINE_AMD, ++}; ++ ++static const char *spectre_v2_strings[] = { ++ [SPECTRE_V2_NONE] = "Vulnerable", ++ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", ++ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", ++ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", ++ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", ++}; ++ ++#undef pr_fmt ++#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt ++ ++static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; ++ ++static void __init spec2_print_if_insecure(const char *reason) ++{ ++ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) ++ pr_info("%s\n", reason); ++} ++ ++static void __init spec2_print_if_secure(const char *reason) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) ++ pr_info("%s\n", reason); ++} ++ ++static inline bool retp_compiler(void) ++{ ++ return __is_defined(RETPOLINE); ++} ++ ++static inline bool match_option(const char *arg, int arglen, const char *opt) ++{ ++ int len = strlen(opt); ++ ++ return len == arglen && !strncmp(arg, opt, len); ++} ++ ++static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) ++{ ++ char arg[20]; ++ int ret; ++ ++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, ++ sizeof(arg)); ++ if (ret > 0) { ++ if (match_option(arg, ret, "off")) { ++ goto disable; ++ } else if (match_option(arg, ret, "on")) { ++ spec2_print_if_secure("force enabled on command line."); ++ return SPECTRE_V2_CMD_FORCE; ++ } else if (match_option(arg, ret, "retpoline")) { ++ spec2_print_if_insecure("retpoline selected on command line."); ++ return SPECTRE_V2_CMD_RETPOLINE; ++ } else if (match_option(arg, ret, "retpoline,amd")) { ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { ++ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ spec2_print_if_insecure("AMD retpoline selected on command line."); ++ return SPECTRE_V2_CMD_RETPOLINE_AMD; ++ } else if (match_option(arg, ret, "retpoline,generic")) { ++ spec2_print_if_insecure("generic retpoline selected on command line."); ++ return SPECTRE_V2_CMD_RETPOLINE_GENERIC; ++ } else if (match_option(arg, ret, "auto")) { ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ } ++ ++ if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) ++ return SPECTRE_V2_CMD_AUTO; ++disable: ++ spec2_print_if_insecure("disabled on command line."); ++ return SPECTRE_V2_CMD_NONE; ++} ++ ++static void __init spectre_v2_select_mitigation(void) ++{ ++ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); ++ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; ++ ++ /* ++ * If the CPU is not affected and the command line mode is NONE or AUTO ++ * then nothing to do. ++ */ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && ++ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) ++ return; ++ ++ switch (cmd) { ++ case SPECTRE_V2_CMD_NONE: ++ return; ++ ++ case SPECTRE_V2_CMD_FORCE: ++ /* FALLTRHU */ ++ case SPECTRE_V2_CMD_AUTO: ++ goto retpoline_auto; ++ ++ case SPECTRE_V2_CMD_RETPOLINE_AMD: ++ if (IS_ENABLED(CONFIG_RETPOLINE)) ++ goto retpoline_amd; ++ break; ++ case SPECTRE_V2_CMD_RETPOLINE_GENERIC: ++ if (IS_ENABLED(CONFIG_RETPOLINE)) ++ goto retpoline_generic; ++ break; ++ case SPECTRE_V2_CMD_RETPOLINE: ++ if (IS_ENABLED(CONFIG_RETPOLINE)) ++ goto retpoline_auto; ++ break; ++ } ++ pr_err("kernel not compiled with retpoline; no mitigation available!"); ++ return; ++ ++retpoline_auto: ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { ++ retpoline_amd: ++ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { ++ pr_err("LFENCE not serializing. Switching to generic retpoline\n"); ++ goto retpoline_generic; ++ } ++ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : ++ SPECTRE_V2_RETPOLINE_MINIMAL_AMD; ++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); ++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE); ++ } else { ++ retpoline_generic: ++ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : ++ SPECTRE_V2_RETPOLINE_MINIMAL; ++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE); ++ } ++ ++ spectre_v2_enabled = mode; ++ pr_info("%s\n", spectre_v2_strings[mode]); ++} ++ ++#undef pr_fmt ++ + #ifdef CONFIG_SYSFS + ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +@@ -85,6 +240,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, + { + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); +- return sprintf(buf, "Vulnerable\n"); ++ ++ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); + } + #endif +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 6e885cc..7b9ae04 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -889,10 +889,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + +-#ifdef CONFIG_RETPOLINE +- setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +-#endif +- + fpu__init_system(c); + + #ifdef CONFIG_X86_32 +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch new file mode 100644 index 00000000..6d671ebd --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch @@ -0,0 +1,135 @@ +From 7a12da3aba08c5a7838315b010ead10ce3fc8b14 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:27 +0000 +Subject: [PATCH 067/102] x86/retpoline/crypto: Convert crypto assembler + indirect jumps + +commit 9697fa39efd3fc3692f2949d4045f393ec58450b upstream. + +Convert all indirect jumps in crypto assembler code to use non-speculative +sequences when CONFIG_RETPOLINE is enabled. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-6-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/crypto/aesni-intel_asm.S | 5 +++-- + arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++- + arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++- + arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++- + 4 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S +index 383a6f8..fa8801b 100644 +--- a/arch/x86/crypto/aesni-intel_asm.S ++++ b/arch/x86/crypto/aesni-intel_asm.S +@@ -32,6 +32,7 @@ + #include <linux/linkage.h> + #include <asm/inst.h> + #include <asm/frame.h> ++#include <asm/nospec-branch.h> + + /* + * The following macros are used to move an (un)aligned 16 byte value to/from +@@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8) + pxor INC, STATE4 + movdqu IV, 0x30(OUTP) + +- call *%r11 ++ CALL_NOSPEC %r11 + + movdqu 0x00(OUTP), INC + pxor INC, STATE1 +@@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8) + _aesni_gf128mul_x_ble() + movups IV, (IVP) + +- call *%r11 ++ CALL_NOSPEC %r11 + + movdqu 0x40(OUTP), INC + pxor INC, STATE1 +diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S +index aa9e8bd..77ff4de 100644 +--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S ++++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S +@@ -17,6 +17,7 @@ + + #include <linux/linkage.h> + #include <asm/frame.h> ++#include <asm/nospec-branch.h> + + #define CAMELLIA_TABLE_BYTE_LEN 272 + +@@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way: + vpxor 14 * 16(%rax), %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + +- call *%r9; ++ CALL_NOSPEC %r9; + + addq $(16 * 16), %rsp; + +diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +index 16186c1..7384342 100644 +--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S ++++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +@@ -12,6 +12,7 @@ + + #include <linux/linkage.h> + #include <asm/frame.h> ++#include <asm/nospec-branch.h> + + #define CAMELLIA_TABLE_BYTE_LEN 272 + +@@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way: + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + +- call *%r9; ++ CALL_NOSPEC %r9; + + addq $(16 * 32), %rsp; + +diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +index dc05f01..174fd41 100644 +--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S ++++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +@@ -45,6 +45,7 @@ + + #include <asm/inst.h> + #include <linux/linkage.h> ++#include <asm/nospec-branch.h> + + ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction + +@@ -172,7 +173,7 @@ continue_block: + movzxw (bufp, %rax, 2), len + lea crc_array(%rip), bufp + lea (bufp, len, 1), bufp +- jmp *bufp ++ JMP_NOSPEC bufp + + ################################################################ + ## 2a) PROCESS FULL BLOCKS: +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-retpoline-entry-Convert-entry-assembler-indirect.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-retpoline-entry-Convert-entry-assembler-indirect.patch new file mode 100644 index 00000000..cd650af8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-retpoline-entry-Convert-entry-assembler-indirect.patch @@ -0,0 +1,122 @@ +From 2553defac996cc0978f3346a1483cb314e59a4ea Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:28 +0000 +Subject: [PATCH 068/102] x86/retpoline/entry: Convert entry assembler indirect + jumps + +commit 2641f08bb7fc63a636a2b18173221d7040a3512e upstream. + +Convert indirect jumps in core 32/64bit entry assembler code to use +non-speculative sequences when CONFIG_RETPOLINE is enabled. + +Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return +address after the 'call' instruction must be *precisely* at the +.Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work, +and the use of alternatives will mess that up unless we play horrid +games to prepend with NOPs and make the variants the same length. It's +not worth it; in the case where we ALTERNATIVE out the retpoline, the +first instruction at __x86.indirect_thunk.rax is going to be a bare +jmp *%rax anyway. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Ingo Molnar <mingo@kernel.org> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_32.S | 5 +++-- + arch/x86/entry/entry_64.S | 10 ++++++++-- + 2 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index edba860..7b95f35 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -45,6 +45,7 @@ + #include <asm/asm.h> + #include <asm/smap.h> + #include <asm/export.h> ++#include <asm/nospec-branch.h> + + .section .entry.text, "ax" + +@@ -260,7 +261,7 @@ ENTRY(ret_from_fork) + + /* kernel thread */ + 1: movl %edi, %eax +- call *%ebx ++ CALL_NOSPEC %ebx + /* + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() +@@ -1062,7 +1063,7 @@ error_code: + movl %ecx, %es + TRACE_IRQS_OFF + movl %esp, %eax # pt_regs pointer +- call *%edi ++ CALL_NOSPEC %edi + jmp ret_from_exception + END(page_fault) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 5bb9b02..f7ebaa1 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -37,6 +37,7 @@ + #include <asm/pgtable_types.h> + #include <asm/export.h> + #include <asm/kaiser.h> ++#include <asm/nospec-branch.h> + #include <linux/err.h> + + /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ +@@ -208,7 +209,12 @@ entry_SYSCALL_64_fastpath: + * It might end up jumping to the slow path. If it jumps, RAX + * and all argument registers are clobbered. + */ ++#ifdef CONFIG_RETPOLINE ++ movq sys_call_table(, %rax, 8), %rax ++ call __x86_indirect_thunk_rax ++#else + call *sys_call_table(, %rax, 8) ++#endif + .Lentry_SYSCALL_64_after_fastpath_call: + + movq %rax, RAX(%rsp) +@@ -380,7 +386,7 @@ ENTRY(stub_ptregs_64) + jmp entry_SYSCALL64_slow_path + + 1: +- jmp *%rax /* Called from C */ ++ JMP_NOSPEC %rax /* Called from C */ + END(stub_ptregs_64) + + .macro ptregs_stub func +@@ -457,7 +463,7 @@ ENTRY(ret_from_fork) + 1: + /* kernel thread */ + movq %r12, %rdi +- call *%rbx ++ CALL_NOSPEC %rbx + /* + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch new file mode 100644 index 00000000..5a337252 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch @@ -0,0 +1,94 @@ +From fe29bd3596a1947d08b63b0ee4f6c8fb989e47c0 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:29 +0000 +Subject: [PATCH 069/102] x86/retpoline/ftrace: Convert ftrace assembler + indirect jumps + +commit 9351803bd803cdbeb9b5a7850b7b6f464806e3db upstream. + +Convert all indirect jumps in ftrace assembler code to use non-speculative +sequences when CONFIG_RETPOLINE is enabled. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_32.S | 5 +++-- + arch/x86/kernel/mcount_64.S | 7 ++++--- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index 7b95f35..bdc9aea 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -985,7 +985,8 @@ trace: + movl 0x4(%ebp), %edx + subl $MCOUNT_INSN_SIZE, %eax + +- call *ftrace_trace_function ++ movl ftrace_trace_function, %ecx ++ CALL_NOSPEC %ecx + + popl %edx + popl %ecx +@@ -1021,7 +1022,7 @@ return_to_handler: + movl %eax, %ecx + popl %edx + popl %eax +- jmp *%ecx ++ JMP_NOSPEC %ecx + #endif + + #ifdef CONFIG_TRACING +diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S +index 7b0d3da..287ec3b 100644 +--- a/arch/x86/kernel/mcount_64.S ++++ b/arch/x86/kernel/mcount_64.S +@@ -8,7 +8,7 @@ + #include <asm/ptrace.h> + #include <asm/ftrace.h> + #include <asm/export.h> +- ++#include <asm/nospec-branch.h> + + .code64 + .section .entry.text, "ax" +@@ -290,8 +290,9 @@ trace: + * ip and parent ip are used and the list function is called when + * function tracing is enabled. + */ +- call *ftrace_trace_function + ++ movq ftrace_trace_function, %r8 ++ CALL_NOSPEC %r8 + restore_mcount_regs + + jmp fgraph_trace +@@ -334,5 +335,5 @@ GLOBAL(return_to_handler) + movq 8(%rsp), %rdx + movq (%rsp), %rax + addq $24, %rsp +- jmp *%rdi ++ JMP_NOSPEC %rdi + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch new file mode 100644 index 00000000..1e141f3c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch @@ -0,0 +1,79 @@ +From 9d16619e38ccad7ba5ba531c2a4ac857c2846d86 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:30 +0000 +Subject: [PATCH 070/102] x86/retpoline/hyperv: Convert assembler indirect + jumps + +commit e70e5892b28c18f517f29ab6e83bd57705104b31 upstream. + +Convert all indirect jumps in hyperv inline asm code to use non-speculative +sequences when CONFIG_RETPOLINE is enabled. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.uk +[ backport to 4.9, hopefully correct, not tested... - gregkh ] +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/hv/hv.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c +index e0a8216..13c32eb4 100644 +--- a/drivers/hv/hv.c ++++ b/drivers/hv/hv.c +@@ -31,6 +31,7 @@ + #include <linux/clockchips.h> + #include <asm/hyperv.h> + #include <asm/mshyperv.h> ++#include <asm/nospec-branch.h> + #include "hyperv_vmbus.h" + + /* The one and only */ +@@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) + return (u64)ULLONG_MAX; + + __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); +- __asm__ __volatile__("call *%3" : "=a" (hv_status) : ++ __asm__ __volatile__(CALL_NOSPEC : ++ "=a" (hv_status) : + "c" (control), "d" (input_address), +- "m" (hypercall_page)); ++ THUNK_TARGET(hypercall_page)); + + return hv_status; + +@@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) + if (!hypercall_page) + return (u64)ULLONG_MAX; + +- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), ++ __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi), + "=a"(hv_status_lo) : "d" (control_hi), + "a" (control_lo), "b" (input_address_hi), + "c" (input_address_lo), "D"(output_address_hi), +- "S"(output_address_lo), "m" (hypercall_page)); ++ "S"(output_address_lo), ++ THUNK_TARGET(hypercall_page)); + + return hv_status_lo | ((u64)hv_status_hi << 32); + #endif /* !x86_64 */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch new file mode 100644 index 00000000..a51c24a0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch @@ -0,0 +1,64 @@ +From 14b52723b7a5c9ff4e3a3f95aaa8c859390b5951 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:31 +0000 +Subject: [PATCH 071/102] x86/retpoline/xen: Convert Xen hypercall indirect + jumps + +commit ea08816d5b185ab3d09e95e393f265af54560350 upstream. + +Convert indirect call in Xen hypercall to use non-speculative sequence, +when CONFIG_RETPOLINE is enabled. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Acked-by: Ingo Molnar <mingo@kernel.org> +Reviewed-by: Juergen Gross <jgross@suse.com> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/xen/hypercall.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h +index a12a047..8b1f91f 100644 +--- a/arch/x86/include/asm/xen/hypercall.h ++++ b/arch/x86/include/asm/xen/hypercall.h +@@ -43,6 +43,7 @@ + + #include <asm/page.h> + #include <asm/pgtable.h> ++#include <asm/nospec-branch.h> + + #include <xen/interface/xen.h> + #include <xen/interface/sched.h> +@@ -214,9 +215,9 @@ privcmd_call(unsigned call, + __HYPERCALL_DECLS; + __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + +- asm volatile("call *%[call]" ++ asm volatile(CALL_NOSPEC + : __HYPERCALL_5PARAM +- : [call] "a" (&hypercall_page[call]) ++ : [thunk_target] "a" (&hypercall_page[call]) + : __HYPERCALL_CLOBBER5); + + return (long)__res; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-retpoline-checksum32-Convert-assembler-indirect-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-retpoline-checksum32-Convert-assembler-indirect-.patch new file mode 100644 index 00000000..295a787d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-retpoline-checksum32-Convert-assembler-indirect-.patch @@ -0,0 +1,70 @@ +From 757a4a6dc16cb312ec9012620f7c28548c5879f6 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Thu, 11 Jan 2018 21:46:32 +0000 +Subject: [PATCH 072/102] x86/retpoline/checksum32: Convert assembler indirect + jumps + +commit 5096732f6f695001fa2d6f1335a2680b37912c69 upstream. + +Convert all indirect jumps in 32bit checksum assembler code to use +non-speculative sequences when CONFIG_RETPOLINE is enabled. + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/lib/checksum_32.S | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S +index 4d34bb5..46e71a7 100644 +--- a/arch/x86/lib/checksum_32.S ++++ b/arch/x86/lib/checksum_32.S +@@ -29,7 +29,8 @@ + #include <asm/errno.h> + #include <asm/asm.h> + #include <asm/export.h> +- ++#include <asm/nospec-branch.h> ++ + /* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ +@@ -156,7 +157,7 @@ ENTRY(csum_partial) + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi +- jmp *%ebx ++ JMP_NOSPEC %ebx + + # Handle 2-byte-aligned regions + 20: addw (%esi), %ax +@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi +- jmp *%ebx ++ JMP_NOSPEC %ebx + 1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch new file mode 100644 index 00000000..634bf6dc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch @@ -0,0 +1,77 @@ +From 28e71e659b1404454de3bab9b662cc8ed75f6fd7 Mon Sep 17 00:00:00 2001 +From: Andi Kleen <ak@linux.intel.com> +Date: Thu, 11 Jan 2018 21:46:33 +0000 +Subject: [PATCH 073/102] x86/retpoline/irq32: Convert assembler indirect jumps + +commit 7614e913db1f40fff819b36216484dc3808995d4 upstream. + +Convert all indirect jumps in 32bit irq inline asm code to use non +speculative sequences. + +Signed-off-by: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Acked-by: Ingo Molnar <mingo@kernel.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515707194-20531-12-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/irq_32.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c +index d4eb450..2763573 100644 +--- a/arch/x86/kernel/irq_32.c ++++ b/arch/x86/kernel/irq_32.c +@@ -19,6 +19,7 @@ + #include <linux/mm.h> + + #include <asm/apic.h> ++#include <asm/nospec-branch.h> + + #ifdef CONFIG_DEBUG_STACKOVERFLOW + +@@ -54,11 +55,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); + static void call_on_stack(void *func, void *stack) + { + asm volatile("xchgl %%ebx,%%esp \n" +- "call *%%edi \n" ++ CALL_NOSPEC + "movl %%ebx,%%esp \n" + : "=b" (stack) + : "0" (stack), +- "D"(func) ++ [thunk_target] "D"(func) + : "memory", "cc", "edx", "ecx", "eax"); + } + +@@ -94,11 +95,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) + call_on_stack(print_stack_overflow, isp); + + asm volatile("xchgl %%ebx,%%esp \n" +- "call *%%edi \n" ++ CALL_NOSPEC + "movl %%ebx,%%esp \n" + : "=a" (arg1), "=b" (isp) + : "0" (desc), "1" (isp), +- "D" (desc->handle_irq) ++ [thunk_target] "D" (desc->handle_irq) + : "memory", "cc", "ecx"); + return 1; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch new file mode 100644 index 00000000..baab673d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch @@ -0,0 +1,195 @@ +From 3902bfc9ca590ff16bcbe293c226c31703d31990 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Fri, 12 Jan 2018 11:11:27 +0000 +Subject: [PATCH 074/102] x86/retpoline: Fill return stack buffer on vmexit + +commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream. + +In accordance with the Intel and AMD documentation, we need to overwrite +all entries in the RSB on exiting a guest, to prevent malicious branch +target predictions from affecting the host kernel. This is needed both +for retpoline and for IBRS. + +[ak: numbers again for the RSB stuffing labels] + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 78 +++++++++++++++++++++++++++++++++++- + arch/x86/kvm/svm.c | 4 ++ + arch/x86/kvm/vmx.c | 4 ++ + 3 files changed, 85 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index ea034fa..402a11c 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -7,6 +7,48 @@ + #include <asm/alternative-asm.h> + #include <asm/cpufeatures.h> + ++/* ++ * Fill the CPU return stack buffer. ++ * ++ * Each entry in the RSB, if used for a speculative 'ret', contains an ++ * infinite 'pause; jmp' loop to capture speculative execution. ++ * ++ * This is required in various cases for retpoline and IBRS-based ++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to ++ * eliminate potentially bogus entries from the RSB, and sometimes ++ * purely to ensure that it doesn't get empty, which on some CPUs would ++ * allow predictions from other (unwanted!) sources to be used. ++ * ++ * We define a CPP macro such that it can be used from both .S files and ++ * inline assembly. It's possible to do a .macro and then include that ++ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. ++ */ ++ ++#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ ++#define RSB_FILL_LOOPS 16 /* To avoid underflow */ ++ ++/* ++ * Google experimented with loop-unrolling and this turned out to be ++ * the optimal version — two calls, each with their own speculation ++ * trap should their return address end up getting used, in a loop. ++ */ ++#define __FILL_RETURN_BUFFER(reg, nr, sp) \ ++ mov $(nr/2), reg; \ ++771: \ ++ call 772f; \ ++773: /* speculation trap */ \ ++ pause; \ ++ jmp 773b; \ ++772: \ ++ call 774f; \ ++775: /* speculation trap */ \ ++ pause; \ ++ jmp 775b; \ ++774: \ ++ dec reg; \ ++ jnz 771b; \ ++ add $(BITS_PER_LONG/8) * nr, sp; ++ + #ifdef __ASSEMBLY__ + + /* +@@ -76,6 +118,20 @@ + #endif + .endm + ++ /* ++ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP ++ * monstrosity above, manually. ++ */ ++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ++#ifdef CONFIG_RETPOLINE ++ ANNOTATE_NOSPEC_ALTERNATIVE ++ ALTERNATIVE "jmp .Lskip_rsb_\@", \ ++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ ++ \ftr ++.Lskip_rsb_\@: ++#endif ++.endm ++ + #else /* __ASSEMBLY__ */ + + #define ANNOTATE_NOSPEC_ALTERNATIVE \ +@@ -119,7 +175,7 @@ + X86_FEATURE_RETPOLINE) + + # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +-#else /* No retpoline */ ++#else /* No retpoline for C / inline asm */ + # define CALL_NOSPEC "call *%[thunk_target]\n" + # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) + #endif +@@ -134,5 +190,25 @@ enum spectre_v2_mitigation { + SPECTRE_V2_IBRS, + }; + ++/* ++ * On VMEXIT we must ensure that no RSB predictions learned in the guest ++ * can be followed in the host, by overwriting the RSB completely. Both ++ * retpoline and IBRS mitigations for Spectre v2 need this; only on future ++ * CPUs with IBRS_ATT *might* it be avoided. ++ */ ++static inline void vmexit_fill_RSB(void) ++{ ++#ifdef CONFIG_RETPOLINE ++ unsigned long loops = RSB_CLEAR_LOOPS / 2; ++ ++ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE ++ ALTERNATIVE("jmp 910f", ++ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), ++ X86_FEATURE_RETPOLINE) ++ "910:" ++ : "=&r" (loops), ASM_CALL_CONSTRAINT ++ : "r" (loops) : "memory" ); ++#endif ++} + #endif /* __ASSEMBLY__ */ + #endif /* __NOSPEC_BRANCH_H__ */ +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 8ca1eca..975ea99 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -44,6 +44,7 @@ + #include <asm/debugreg.h> + #include <asm/kvm_para.h> + #include <asm/irq_remapping.h> ++#include <asm/nospec-branch.h> + + #include <asm/virtext.h> + #include "trace.h" +@@ -4886,6 +4887,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + #endif + ); + ++ /* Eliminate branch target predictions from guest mode */ ++ vmexit_fill_RSB(); ++ + #ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); + #else +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 69b8f8a..4ead27f 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -48,6 +48,7 @@ + #include <asm/kexec.h> + #include <asm/apic.h> + #include <asm/irq_remapping.h> ++#include <asm/nospec-branch.h> + + #include "trace.h" + #include "pmu.h" +@@ -8989,6 +8990,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + #endif + ); + ++ /* Eliminate branch target predictions from guest mode */ ++ vmexit_fill_RSB(); ++ + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ + if (debugctlmsr) + update_debugctlmsr(debugctlmsr); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-retpoline-Remove-compile-time-warning.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-retpoline-Remove-compile-time-warning.patch new file mode 100644 index 00000000..9a9f0f3d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-retpoline-Remove-compile-time-warning.patch @@ -0,0 +1,62 @@ +From e30267167bed762e3a2bfd39982315d0b1cb4e73 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 14 Jan 2018 22:13:29 +0100 +Subject: [PATCH 075/102] x86/retpoline: Remove compile time warning + +commit b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 upstream. + +Remove the compile time warning when CONFIG_RETPOLINE=y and the compiler +does not have retpoline support. Linus rationale for this is: + + It's wrong because it will just make people turn off RETPOLINE, and the + asm updates - and return stack clearing - that are independent of the + compiler are likely the most important parts because they are likely the + ones easiest to target. + + And it's annoying because most people won't be able to do anything about + it. The number of people building their own compiler? Very small. So if + their distro hasn't got a compiler yet (and pretty much nobody does), the + warning is just annoying crap. + + It is already properly reported as part of the sysfs interface. The + compile-time warning only encourages bad things. + +Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") +Requested-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Link: https://lkml.kernel.org/r/CA+55aFzWgquv4i6Mab6bASqYXg3ErV3XDFEYf=GEcCDQg5uAtw@mail.gmail.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/Makefile | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/arch/x86/Makefile b/arch/x86/Makefile +index 1e1a733..cd22cb8 100644 +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -187,8 +187,6 @@ ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +- else +- $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) + endif + endif + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch new file mode 100644 index 00000000..27c73a1d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch @@ -0,0 +1,44 @@ +From 4e428c54800e729730d6278751a7426ddb41b051 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Mon, 15 Jan 2018 11:00:54 -0600 +Subject: [PATCH 076/102] objtool: Fix retpoline support for pre-ORC objtool + +Objtool 1.0 (pre-ORC) produces the following warning when it encounters +a retpoline: + + arch/x86/crypto/camellia-aesni-avx2-asm_64.o: warning: objtool: .altinstr_replacement+0xf: return instruction outside of a callable function + +That warning is meant to catch GCC bugs and missing ENTRY/ENDPROC +annotations, neither of which are applicable to alternatives. Silence +the warning for alternative instructions, just like objtool 2.0 already +does. + +Reported-by: David Woodhouse <dwmw2@infradead.org> +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + tools/objtool/builtin-check.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index ee71d4c..377bff0 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -1221,6 +1221,14 @@ static int validate_uncallable_instructions(struct objtool_file *file) + + for_each_insn(file, insn) { + if (!insn->visited && insn->type == INSN_RETURN) { ++ ++ /* ++ * Don't warn about call instructions in unvisited ++ * retpoline alternatives. ++ */ ++ if (!strcmp(insn->sec->name, ".altinstr_replacement")) ++ continue; ++ + WARN_FUNC("return instruction outside of a callable function", + insn->sec, insn->offset); + warnings++; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch new file mode 100644 index 00000000..cf1e93e1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch @@ -0,0 +1,79 @@ +From f444abdea27bd6a8063e37ba783e97c2b3ada092 Mon Sep 17 00:00:00 2001 +From: Pavel Tatashin <pasha.tatashin@oracle.com> +Date: Mon, 15 Jan 2018 11:44:14 -0500 +Subject: [PATCH 077/102] x86/pti/efi: broken conversion from efi to kernel + page table + +The page table order must be increased for EFI table in order to avoid a +bug where NMI tries to change the page table to kernel page table, while +efi page table is active. + +For more disccussion about this bug, see this thread: +http://lkml.iu.edu/hypermail/linux/kernel/1801.1/00951.html + +Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> +Reviewed-by: Steven Sistare <steven.sistare@oracle.com> +Acked-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/pgalloc.h | 11 +++++++++++ + arch/x86/mm/pgtable.c | 7 ------- + arch/x86/platform/efi/efi_64.c | 2 +- + 3 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h +index b6d4259..1178a51 100644 +--- a/arch/x86/include/asm/pgalloc.h ++++ b/arch/x86/include/asm/pgalloc.h +@@ -27,6 +27,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} + */ + extern gfp_t __userpte_alloc_gfp; + ++#ifdef CONFIG_PAGE_TABLE_ISOLATION ++/* ++ * Instead of one PGD, we acquire two PGDs. Being order-1, it is ++ * both 8k in size and 8k-aligned. That lets us just flip bit 12 ++ * in a pointer to swap between the two 4k halves. ++ */ ++#define PGD_ALLOCATION_ORDER 1 ++#else ++#define PGD_ALLOCATION_ORDER 0 ++#endif ++ + /* + * Allocate and free page tables. + */ +diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c +index 5aaec8e..209b946 100644 +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd) + } + #else + +-/* +- * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is +- * both 8k in size and 8k-aligned. That lets us just flip bit 12 +- * in a pointer to swap between the two 4k halves. +- */ +-#define PGD_ALLOCATION_ORDER kaiser_enabled +- + static inline pgd_t *_pgd_alloc(void) + { + return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); +diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c +index 2f25a36..dcb2d9d 100644 +--- a/arch/x86/platform/efi/efi_64.c ++++ b/arch/x86/platform/efi/efi_64.c +@@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void) + return 0; + + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; +- efi_pgd = (pgd_t *)__get_free_page(gfp_mask); ++ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + if (!efi_pgd) + return -ENOMEM; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch new file mode 100644 index 00000000..4f2113dd --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch @@ -0,0 +1,179 @@ +From 8728e5638cd0a4650d4d9bfd4056905fe9797dea Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Fri, 12 Jan 2018 17:49:25 +0000 +Subject: [PATCH 078/102] x86/retpoline: Fill RSB on context switch for + affected CPUs + +commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. + +On context switch from a shallow call stack to a deeper one, as the CPU +does 'ret' up the deeper side it may encounter RSB entries (predictions for +where the 'ret' goes to) which were populated in userspace. + +This is problematic if neither SMEP nor KPTI (the latter of which marks +userspace pages as NX for the kernel) are active, as malicious code in +userspace may then be executed speculatively. + +Overwrite the CPU's return prediction stack with calls which are predicted +to return to an infinite loop, to "capture" speculation if this +happens. This is required both for retpoline, and also in conjunction with +IBRS for !SMEP && !KPTI. + +On Skylake+ the problem is slightly different, and an *underflow* of the +RSB may cause errant branch predictions to occur. So there it's not so much +overwrite, as *filling* the RSB to attempt to prevent it getting +empty. This is only a partial solution for Skylake+ since there are many +other conditions which may result in the RSB becoming empty. The full +solution on Skylake+ is to use IBRS, which will prevent the problem even +when the RSB becomes empty. With IBRS, the RSB-stuffing will not be +required on context switch. + +[ tglx: Added missing vendor check and slighty massaged comments and + changelog ] + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Cc: gnomes@lxorguk.ukuu.org.uk +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kees Cook <keescook@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Paul Turner <pjt@google.com> +Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_32.S | 11 +++++++++++ + arch/x86/entry/entry_64.S | 11 +++++++++++ + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++++ + 4 files changed, 59 insertions(+) + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index bdc9aea..a76dc73 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -229,6 +229,17 @@ ENTRY(__switch_to_asm) + movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset + #endif + ++#ifdef CONFIG_RETPOLINE ++ /* ++ * When switching from a shallower to a deeper call stack ++ * the RSB may either underflow or use entries populated ++ * with userspace addresses. On CPUs where those concerns ++ * exist, overwrite the RSB with entries which capture ++ * speculative execution to prevent attack. ++ */ ++ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW ++#endif ++ + /* restore callee-saved registers */ + popl %esi + popl %edi +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index f7ebaa1..eff47f5 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -427,6 +427,17 @@ ENTRY(__switch_to_asm) + movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset + #endif + ++#ifdef CONFIG_RETPOLINE ++ /* ++ * When switching from a shallower to a deeper call stack ++ * the RSB may either underflow or use entries populated ++ * with userspace addresses. On CPUs where those concerns ++ * exist, overwrite the RSB with entries which capture ++ * speculative execution to prevent attack. ++ */ ++ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW ++#endif ++ + /* restore callee-saved registers */ + popq %r15 + popq %r14 +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 4467568..2f60cb5 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -200,6 +200,7 @@ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ + #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ ++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ + + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ + #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 49d25dd..8cacf62 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -22,6 +22,7 @@ + #include <asm/alternative.h> + #include <asm/pgtable.h> + #include <asm/cacheflush.h> ++#include <asm/intel-family.h> + + static void __init spectre_v2_select_mitigation(void); + +@@ -154,6 +155,23 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + return SPECTRE_V2_CMD_NONE; + } + ++/* Check for Skylake-like CPUs (for RSB handling) */ ++static bool __init is_skylake_era(void) ++{ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && ++ boot_cpu_data.x86 == 6) { ++ switch (boot_cpu_data.x86_model) { ++ case INTEL_FAM6_SKYLAKE_MOBILE: ++ case INTEL_FAM6_SKYLAKE_DESKTOP: ++ case INTEL_FAM6_SKYLAKE_X: ++ case INTEL_FAM6_KABYLAKE_MOBILE: ++ case INTEL_FAM6_KABYLAKE_DESKTOP: ++ return true; ++ } ++ } ++ return false; ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -212,6 +230,24 @@ static void __init spectre_v2_select_mitigation(void) + + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); ++ ++ /* ++ * If neither SMEP or KPTI are available, there is a risk of ++ * hitting userspace addresses in the RSB after a context switch ++ * from a shallow call stack to a deeper one. To prevent this fill ++ * the entire RSB, even when using IBRS. ++ * ++ * Skylake era CPUs have a separate issue with *underflow* of the ++ * RSB, when they will predict 'ret' targets from the generic BTB. ++ * The proper mitigation for this is IBRS. If IBRS is not supported ++ * or deactivated in favour of retpolines the RSB fill on context ++ * switch is required. ++ */ ++ if ((!boot_cpu_has(X86_FEATURE_KAISER) && ++ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { ++ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); ++ pr_info("Filling RSB on context switch\n"); ++ } + } + + #undef pr_fmt +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch new file mode 100644 index 00000000..87f4783e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch @@ -0,0 +1,94 @@ +From efb8168d47849c6ab8dcda4a96f6246645c23a5a Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Sat, 13 Jan 2018 17:27:30 -0600 +Subject: [PATCH 079/102] x86/retpoline: Add LFENCE to the retpoline/RSB + filling RSB macros + +commit 28d437d550e1e39f805d99f9f8ac399c778827b7 upstream. + +The PAUSE instruction is currently used in the retpoline and RSB filling +macros as a speculation trap. The use of PAUSE was originally suggested +because it showed a very, very small difference in the amount of +cycles/time used to execute the retpoline as compared to LFENCE. On AMD, +the PAUSE instruction is not a serializing instruction, so the pause/jmp +loop will use excess power as it is speculated over waiting for return +to mispredict to the correct target. + +The RSB filling macro is applicable to AMD, and, if software is unable to +verify that LFENCE is serializing on AMD (possible when running under a +hypervisor), the generic retpoline support will be used and, so, is also +applicable to AMD. Keep the current usage of PAUSE for Intel, but add an +LFENCE instruction to the speculation trap for AMD. + +The same sequence has been adopted by GCC for the GCC generated retpolines. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@alien8.de> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Acked-by: Arjan van de Ven <arjan@linux.intel.com> +Cc: Rik van Riel <riel@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Paul Turner <pjt@google.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Jiri Kosina <jikos@kernel.org> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Cc: Kees Cook <keescook@google.com> +Link: https://lkml.kernel.org/r/20180113232730.31060.36287.stgit@tlendack-t1.amdoffice.net +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 402a11c..7b45d84 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -11,7 +11,7 @@ + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an +- * infinite 'pause; jmp' loop to capture speculative execution. ++ * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to +@@ -38,11 +38,13 @@ + call 772f; \ + 773: /* speculation trap */ \ + pause; \ ++ lfence; \ + jmp 773b; \ + 772: \ + call 774f; \ + 775: /* speculation trap */ \ + pause; \ ++ lfence; \ + jmp 775b; \ + 774: \ + dec reg; \ +@@ -73,6 +75,7 @@ + call .Ldo_rop_\@ + .Lspec_trap_\@: + pause ++ lfence + jmp .Lspec_trap_\@ + .Ldo_rop_\@: + mov \reg, (%_ASM_SP) +@@ -165,6 +168,7 @@ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ ++ " lfence;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-objtool-Improve-error-message-for-bad-file-argument.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-objtool-Improve-error-message-for-bad-file-argument.patch new file mode 100644 index 00000000..44296532 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-objtool-Improve-error-message-for-bad-file-argument.patch @@ -0,0 +1,53 @@ +From 509a2d106a745c528ffcdd71af04e3886ea3732a Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Mon, 15 Jan 2018 08:17:08 -0600 +Subject: [PATCH 080/102] objtool: Improve error message for bad file argument + +commit 385d11b152c4eb638eeb769edcb3249533bb9a00 upstream. + +If a nonexistent file is supplied to objtool, it complains with a +non-helpful error: + + open: No such file or directory + +Improve it to: + + objtool: Can't open 'foo': No such file or directory + +Reported-by: Markus <M4rkusXXL@web.de> +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/406a3d00a21225eee2819844048e17f68523ccf6.1516025651.git.jpoimboe@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + tools/objtool/elf.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c +index 0d7983a..14a74d4 100644 +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -26,6 +26,7 @@ + #include <stdlib.h> + #include <string.h> + #include <unistd.h> ++#include <errno.h> + + #include "elf.h" + #include "warn.h" +@@ -358,7 +359,8 @@ struct elf *elf_open(const char *name) + + elf->fd = open(name, O_RDONLY); + if (elf->fd == -1) { +- perror("open"); ++ fprintf(stderr, "objtool: Can't open '%s': %s\n", ++ name, strerror(errno)); + goto err; + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch new file mode 100644 index 00000000..fb569cc7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch @@ -0,0 +1,73 @@ +From c864a508fb128fb4d064063d5c97fb42284e2aca Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Tue, 16 Jan 2018 16:42:25 +0100 +Subject: [PATCH 081/102] x86/cpufeature: Move processor tracing out of + scattered features +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 4fdec2034b7540dda461c6ba33325dfcff345c64 upstream. + +Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX), +so do not duplicate it in the scattered features word. + +Besides being more tidy, this will be useful for KVM when it presents +processor tracing to the guests. KVM selects host features that are +supported by both the host kernel (depending on command line options, +CPU errata, or whatever) and KVM. Whenever a full feature word exists, +KVM's code is written in the expectation that the CPUID bit number +matches the X86_FEATURE_* bit number, but this is not the case for +X86_FEATURE_INTEL_PT. + +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Cc: Borislav Petkov <bp@suse.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Luwei Kang <luwei.kang@intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: kvm@vger.kernel.org +Link: http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonzini@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/kernel/cpu/scattered.c | 1 - + 2 files changed, 1 insertion(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 2f60cb5..8537a21 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -197,7 +197,6 @@ + #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ + +-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ + #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +@@ -236,6 +235,7 @@ + #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ + #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ + #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ ++#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ + #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ + #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ + #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index 1db8dc4..b0dd9ae 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -31,7 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) + const struct cpuid_bit *cb; + + static const struct cpuid_bit cpuid_bits[] = { +- { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, + { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-module-Add-retpoline-tag-to-VERMAGIC.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-module-Add-retpoline-tag-to-VERMAGIC.patch new file mode 100644 index 00000000..424f3c6b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-module-Add-retpoline-tag-to-VERMAGIC.patch @@ -0,0 +1,55 @@ +From 134164c659e681c1ed401ddd773b12852a1b9019 Mon Sep 17 00:00:00 2001 +From: Andi Kleen <ak@linux.intel.com> +Date: Tue, 16 Jan 2018 12:52:28 -0800 +Subject: [PATCH 082/102] module: Add retpoline tag to VERMAGIC + +commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12 upstream. + +Add a marker for retpoline to the module VERMAGIC. This catches the case +when a non RETPOLINE compiled module gets loaded into a retpoline kernel, +making it insecure. + +It doesn't handle the case when retpoline has been runtime disabled. Even +in this case the match of the retcompile status will be enforced. This +implies that even with retpoline run time disabled all modules loaded need +to be recompiled. + +Signed-off-by: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: rusty@rustcorp.com.au +Cc: arjan.van.de.ven@intel.com +Cc: jeyu@kernel.org +Cc: torvalds@linux-foundation.org +Link: https://lkml.kernel.org/r/20180116205228.4890-1-andi@firstfloor.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/vermagic.h | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h +index 6f8fbcf..a3d0493 100644 +--- a/include/linux/vermagic.h ++++ b/include/linux/vermagic.h +@@ -24,10 +24,16 @@ + #ifndef MODULE_ARCH_VERMAGIC + #define MODULE_ARCH_VERMAGIC "" + #endif ++#ifdef RETPOLINE ++#define MODULE_VERMAGIC_RETPOLINE "retpoline " ++#else ++#define MODULE_VERMAGIC_RETPOLINE "" ++#endif + + #define VERMAGIC_STRING \ + UTS_RELEASE " " \ + MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ + MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ +- MODULE_ARCH_VERMAGIC ++ MODULE_ARCH_VERMAGIC \ ++ MODULE_VERMAGIC_RETPOLINE + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch new file mode 100644 index 00000000..b66a63ed --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch @@ -0,0 +1,48 @@ +From 3934caaec25585f9562f8a2fc04e695c9fbd190d Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Tue, 26 Dec 2017 23:43:54 -0600 +Subject: [PATCH 083/102] x86/cpu, x86/pti: Do not enable PTI on AMD processors + +commit 694d99d40972f12e59a3696effee8a376b79d7c8 upstream. + +AMD processors are not subject to the types of attacks that the kernel +page table isolation feature protects against. The AMD microarchitecture +does not allow memory references, including speculative references, that +access higher privileged data when running in a lesser privileged mode +when that access would result in a page fault. + +Disable page table isolation by default on AMD processors by not setting +the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI +is set. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net +Cc: Nick Lowe <nick.lowe@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 7b9ae04..d198ae0 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -883,8 +883,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + + setup_force_cpu_cap(X86_FEATURE_ALWAYS); + +- /* Assume for now that ALL x86 CPUs are insecure */ +- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); ++ if (c->x86_vendor != X86_VENDOR_AMD) ++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-x86-mce-Make-machine-check-speculation-protected.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-x86-mce-Make-machine-check-speculation-protected.patch new file mode 100644 index 00000000..5586d316 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-x86-mce-Make-machine-check-speculation-protected.patch @@ -0,0 +1,69 @@ +From 0cc6142ee5773328340c3fdfdbdb30debea7643a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 18 Jan 2018 16:28:26 +0100 +Subject: [PATCH 084/102] x86/mce: Make machine check speculation protected + +commit 6f41c34d69eb005e7848716bbcafc979b35037d5 upstream. + +The machine check idtentry uses an indirect branch directly from the low +level code. This evades the speculation protection. + +Replace it by a direct call into C code and issue the indirect call there +so the compiler can apply the proper speculation protection. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by:Borislav Petkov <bp@alien8.de> +Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> +Niced-by: Peter Zijlstra <peterz@infradead.org> +Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801181626290.1847@nanos +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 2 +- + arch/x86/include/asm/traps.h | 1 + + arch/x86/kernel/cpu/mcheck/mce.c | 5 +++++ + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index eff47f5..16146eb 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1064,7 +1064,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 + #endif + + #ifdef CONFIG_X86_MCE +-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) ++idtentry machine_check do_mce has_error_code=0 paranoid=1 + #endif + + /* +diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h +index 01fd0a7..688315b 100644 +--- a/arch/x86/include/asm/traps.h ++++ b/arch/x86/include/asm/traps.h +@@ -92,6 +92,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); + #ifdef CONFIG_X86_32 + dotraplinkage void do_iret_error(struct pt_regs *, long); + #endif ++dotraplinkage void do_mce(struct pt_regs *, long); + + static inline int get_si_code(unsigned long condition) + { +diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c +index 4671229..72bcd08 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -1773,6 +1773,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) + void (*machine_check_vector)(struct pt_regs *, long error_code) = + unexpected_machine_check; + ++dotraplinkage void do_mce(struct pt_regs *regs, long error_code) ++{ ++ machine_check_vector(regs, error_code); ++} ++ + /* + * Called for each booted CPU to set up machine checks. + * Must be called with preempt off: +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-retpoline-Introduce-start-end-markers-of-indirect-th.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-retpoline-Introduce-start-end-markers-of-indirect-th.patch new file mode 100644 index 00000000..8c2e998f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-retpoline-Introduce-start-end-markers-of-indirect-th.patch @@ -0,0 +1,78 @@ +From 4e78cb7647511b318443ad83c07b29a35a3bdf98 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu <mhiramat@kernel.org> +Date: Fri, 19 Jan 2018 01:14:21 +0900 +Subject: [PATCH 085/102] retpoline: Introduce start/end markers of indirect + thunk + +commit 736e80a4213e9bbce40a7c050337047128b472ac upstream. + +Introduce start/end markers of __x86_indirect_thunk_* functions. +To make it easy, consolidate .text.__x86.indirect_thunk.* sections +to one .text.__x86.indirect_thunk section and put it in the +end of kernel text section and adds __indirect_thunk_start/end +so that other subsystem (e.g. kprobes) can identify it. + +Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Link: https://lkml.kernel.org/r/151629206178.10241.6828804696410044771.stgit@devbox +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 3 +++ + arch/x86/kernel/vmlinux.lds.S | 7 +++++++ + arch/x86/lib/retpoline.S | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 7b45d84..19ba5ad 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -194,6 +194,9 @@ enum spectre_v2_mitigation { + SPECTRE_V2_IBRS, + }; + ++extern char __indirect_thunk_start[]; ++extern char __indirect_thunk_end[]; ++ + /* + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index dbf67f6..c7194e9 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -105,6 +105,13 @@ SECTIONS + SOFTIRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) ++ ++#ifdef CONFIG_RETPOLINE ++ __indirect_thunk_start = .; ++ *(.text.__x86.indirect_thunk) ++ __indirect_thunk_end = .; ++#endif ++ + /* End of text section */ + _etext = .; + } :text = 0x9090 +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index cb45c6c..d3415dc 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -9,7 +9,7 @@ + #include <asm/nospec-branch.h> + + .macro THUNK reg +- .section .text.__x86.indirect_thunk.\reg ++ .section .text.__x86.indirect_thunk + + ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch new file mode 100644 index 00000000..23e51a92 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch @@ -0,0 +1,43 @@ +From a95461ad5d09956144c7f29354dc3c16c43a3067 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu <mhiramat@kernel.org> +Date: Fri, 19 Jan 2018 01:14:51 +0900 +Subject: [PATCH 086/102] kprobes/x86: Blacklist indirect thunk functions for + kprobes + +commit c1804a236894ecc942da7dc6c5abe209e56cba93 upstream. + +Mark __x86_indirect_thunk_* functions as blacklist for kprobes +because those functions can be called from anywhere in the kernel +including blacklist functions of kprobes. + +Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Link: https://lkml.kernel.org/r/151629209111.10241.5444852823378068683.stgit@devbox +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/lib/retpoline.S | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index d3415dc..dfb2ba9 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg) + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +-#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) ++#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) ++#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) + #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) + + GENERATE_THUNK(_ASM_AX) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch new file mode 100644 index 00000000..491a6eca --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch @@ -0,0 +1,83 @@ +From 43265fd2e2bf8ea746717f420ca21b730904dae8 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu <mhiramat@kernel.org> +Date: Fri, 19 Jan 2018 01:15:20 +0900 +Subject: [PATCH 087/102] kprobes/x86: Disable optimizing on the function jumps + to indirect thunk + +commit c86a32c09f8ced67971a2310e3b0dda4d1749007 upstream. + +Since indirect jump instructions will be replaced by jump +to __x86_indirect_thunk_*, those jmp instruction must be +treated as an indirect jump. Since optprobe prohibits to +optimize probes in the function which uses an indirect jump, +it also needs to find out the function which jump to +__x86_indirect_thunk_* and disable optimization. + +Add a check that the jump target address is between the +__indirect_thunk_start/end when optimizing kprobe. + +Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> +Link: https://lkml.kernel.org/r/151629212062.10241.6991266100233002273.stgit@devbox +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/kprobes/opt.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c +index 3bb4c5f..90f8cd6 100644 +--- a/arch/x86/kernel/kprobes/opt.c ++++ b/arch/x86/kernel/kprobes/opt.c +@@ -37,6 +37,7 @@ + #include <asm/alternative.h> + #include <asm/insn.h> + #include <asm/debugreg.h> ++#include <asm/nospec-branch.h> + + #include "common.h" + +@@ -192,7 +193,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) + } + + /* Check whether insn is indirect jump */ +-static int insn_is_indirect_jump(struct insn *insn) ++static int __insn_is_indirect_jump(struct insn *insn) + { + return ((insn->opcode.bytes[0] == 0xff && + (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ +@@ -226,6 +227,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) + return (start <= target && target <= start + len); + } + ++static int insn_is_indirect_jump(struct insn *insn) ++{ ++ int ret = __insn_is_indirect_jump(insn); ++ ++#ifdef CONFIG_RETPOLINE ++ /* ++ * Jump to x86_indirect_thunk_* is treated as an indirect jump. ++ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with ++ * older gcc may use indirect jump. So we add this check instead of ++ * replace indirect-jump check. ++ */ ++ if (!ret) ++ ret = insn_jump_into_range(insn, ++ (unsigned long)__indirect_thunk_start, ++ (unsigned long)__indirect_thunk_end - ++ (unsigned long)__indirect_thunk_start); ++#endif ++ return ret; ++} ++ + /* Decode whole function to ensure any instructions don't jump into target */ + static int can_optimize(unsigned long paddr) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-x86-pti-Document-fix-wrong-index.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-x86-pti-Document-fix-wrong-index.patch new file mode 100644 index 00000000..4ce27937 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-x86-pti-Document-fix-wrong-index.patch @@ -0,0 +1,34 @@ +From dd2bd68f2eeb07af7dda1e5db82645d3c877d427 Mon Sep 17 00:00:00 2001 +From: "zhenwei.pi" <zhenwei.pi@youruncloud.com> +Date: Thu, 18 Jan 2018 09:04:52 +0800 +Subject: [PATCH 088/102] x86/pti: Document fix wrong index + +commit 98f0fceec7f84d80bc053e49e596088573086421 upstream. + +In section <2. Runtime Cost>, fix wrong index. + +Signed-off-by: zhenwei.pi <zhenwei.pi@youruncloud.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: dave.hansen@linux.intel.com +Link: https://lkml.kernel.org/r/1516237492-27739-1-git-send-email-zhenwei.pi@youruncloud.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/x86/pti.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt +index d11eff6..5cd5843 100644 +--- a/Documentation/x86/pti.txt ++++ b/Documentation/x86/pti.txt +@@ -78,7 +78,7 @@ this protection comes at a cost: + non-PTI SYSCALL entry code, so requires mapping fewer + things into the userspace page tables. The downside is + that stacks must be switched at entry time. +- d. Global pages are disabled for all kernel structures not ++ c. Global pages are disabled for all kernel structures not + mapped into both kernel and userspace page tables. This + feature of the MMU allows different processes to share TLB + entries mapping the kernel. Losing the feature means more +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch new file mode 100644 index 00000000..89fc7ffa --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch @@ -0,0 +1,61 @@ +From 73c4860073f5f9bce1aed5cb00de610d0a279951 Mon Sep 17 00:00:00 2001 +From: Andi Kleen <ak@linux.intel.com> +Date: Wed, 17 Jan 2018 14:53:28 -0800 +Subject: [PATCH 089/102] x86/retpoline: Optimize inline assembler for + vmexit_fill_RSB + +commit 3f7d875566d8e79c5e0b2c9a413e91b2c29e0854 upstream. + +The generated assembler for the C fill RSB inline asm operations has +several issues: + +- The C code sets up the loop register, which is then immediately + overwritten in __FILL_RETURN_BUFFER with the same value again. + +- The C code also passes in the iteration count in another register, which + is not used at all. + +Remove these two unnecessary operations. Just rely on the single constant +passed to the macro for the iterations. + +Signed-off-by: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: dave.hansen@intel.com +Cc: gregkh@linuxfoundation.org +Cc: torvalds@linux-foundation.org +Cc: arjan@linux.intel.com +Link: https://lkml.kernel.org/r/20180117225328.15414-1-andi@firstfloor.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 19ba5ad..4ad4108 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -206,16 +206,17 @@ extern char __indirect_thunk_end[]; + static inline void vmexit_fill_RSB(void) + { + #ifdef CONFIG_RETPOLINE +- unsigned long loops = RSB_CLEAR_LOOPS / 2; ++ unsigned long loops; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" +- : "=&r" (loops), ASM_CALL_CONSTRAINT +- : "r" (loops) : "memory" ); ++ : "=r" (loops), ASM_CALL_CONSTRAINT ++ : : "memory" ); + #endif + } ++ + #endif /* __ASSEMBLY__ */ + #endif /* __NOSPEC_BRANCH_H__ */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch new file mode 100644 index 00000000..eb877649 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch @@ -0,0 +1,53 @@ +From 81db9dee58bee8b742d365071433ab1c4c185777 Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Date: Wed, 24 Jan 2018 15:28:17 +0100 +Subject: [PATCH 090/102] Revert "module: Add retpoline tag to VERMAGIC" + +commit 5132ede0fe8092b043dae09a7cc32b8ae7272baa upstream. + +This reverts commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12. + +Turns out distros do not want to make retpoline as part of their "ABI", +so this patch should not have been merged. Sorry Andi, this was my +fault, I suggested it when your original patch was the "correct" way of +doing this instead. + +Reported-by: Jiri Kosina <jikos@kernel.org> +Fixes: 6cfb521ac0d5 ("module: Add retpoline tag to VERMAGIC") +Acked-by: Andi Kleen <ak@linux.intel.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: rusty@rustcorp.com.au +Cc: arjan.van.de.ven@intel.com +Cc: jeyu@kernel.org +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/vermagic.h | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h +index a3d0493..6f8fbcf 100644 +--- a/include/linux/vermagic.h ++++ b/include/linux/vermagic.h +@@ -24,16 +24,10 @@ + #ifndef MODULE_ARCH_VERMAGIC + #define MODULE_ARCH_VERMAGIC "" + #endif +-#ifdef RETPOLINE +-#define MODULE_VERMAGIC_RETPOLINE "retpoline " +-#else +-#define MODULE_VERMAGIC_RETPOLINE "" +-#endif + + #define VERMAGIC_STRING \ + UTS_RELEASE " " \ + MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ + MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ +- MODULE_ARCH_VERMAGIC \ +- MODULE_VERMAGIC_RETPOLINE ++ MODULE_ARCH_VERMAGIC + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-Map-the-vsyscall-page-with-_PAGE_USER.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-Map-the-vsyscall-page-with-_PAGE_USER.patch new file mode 100644 index 00000000..9d37565b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-Map-the-vsyscall-page-with-_PAGE_USER.patch @@ -0,0 +1,151 @@ +From 078997d9f5a841221c4090e58fcdb825a77baf9c Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Thu, 4 Jan 2018 17:42:45 +0100 +Subject: [PATCH 091/102] Map the vsyscall page with _PAGE_USER + +This needs to happen early in kaiser_pagetable_walk(), before the +hierarchy is established so that _PAGE_USER permission can be really +set. + +A proper fix would be to teach kaiser_pagetable_walk() to update those +permissions but the vsyscall page is the only exception here so ... + +Signed-off-by: Borislav Petkov <bp@suse.de> +Acked-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/vsyscall/vsyscall_64.c | 5 +++++ + arch/x86/include/asm/vsyscall.h | 2 ++ + arch/x86/mm/kaiser.c | 34 ++++++++++++++++++++++++++++++---- + 3 files changed, 37 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c +index 636c4b3..6bb7e92 100644 +--- a/arch/x86/entry/vsyscall/vsyscall_64.c ++++ b/arch/x86/entry/vsyscall/vsyscall_64.c +@@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *str) + } + early_param("vsyscall", vsyscall_setup); + ++bool vsyscall_enabled(void) ++{ ++ return vsyscall_mode != NONE; ++} ++ + static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, + const char *message) + { +diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h +index 6ba66ee..4865e10 100644 +--- a/arch/x86/include/asm/vsyscall.h ++++ b/arch/x86/include/asm/vsyscall.h +@@ -12,12 +12,14 @@ extern void map_vsyscall(void); + * Returns true if handled. + */ + extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); ++extern bool vsyscall_enabled(void); + #else + static inline void map_vsyscall(void) {} + static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) + { + return false; + } ++static inline bool vsyscall_enabled(void) { return false; } + #endif + + #endif /* _ASM_X86_VSYSCALL_H */ +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 42a5307..a8ade08 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -19,6 +19,7 @@ + #include <asm/pgalloc.h> + #include <asm/desc.h> + #include <asm/cmdline.h> ++#include <asm/vsyscall.h> + + int kaiser_enabled __read_mostly = 1; + EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ +@@ -110,12 +111,13 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +-static pte_t *kaiser_pagetable_walk(unsigned long address) ++static pte_t *kaiser_pagetable_walk(unsigned long address, bool user) + { + pmd_t *pmd; + pud_t *pud; + pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); ++ unsigned long prot = _KERNPG_TABLE; + + if (pgd_none(*pgd)) { + WARN_ONCE(1, "All shadow pgds should have been populated"); +@@ -123,6 +125,17 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) + } + BUILD_BUG_ON(pgd_large(*pgd) != 0); + ++ if (user) { ++ /* ++ * The vsyscall page is the only page that will have ++ * _PAGE_USER set. Catch everything else. ++ */ ++ BUG_ON(address != VSYSCALL_ADDR); ++ ++ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); ++ prot = _PAGE_TABLE; ++ } ++ + pud = pud_offset(pgd, address); + /* The shadow page tables do not use large mappings: */ + if (pud_large(*pud)) { +@@ -135,7 +148,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) + return NULL; + spin_lock(&shadow_table_allocation_lock); + if (pud_none(*pud)) { +- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); ++ set_pud(pud, __pud(prot | __pa(new_pmd_page))); + __inc_zone_page_state(virt_to_page((void *) + new_pmd_page), NR_KAISERTABLE); + } else +@@ -155,7 +168,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) + return NULL; + spin_lock(&shadow_table_allocation_lock); + if (pmd_none(*pmd)) { +- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); ++ set_pmd(pmd, __pmd(prot | __pa(new_pte_page))); + __inc_zone_page_state(virt_to_page((void *) + new_pte_page), NR_KAISERTABLE); + } else +@@ -193,7 +206,7 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, + ret = -EIO; + break; + } +- pte = kaiser_pagetable_walk(address); ++ pte = kaiser_pagetable_walk(address, flags & _PAGE_USER); + if (!pte) { + ret = -ENOMEM; + break; +@@ -320,6 +333,19 @@ void __init kaiser_init(void) + + kaiser_init_all_pgds(); + ++ /* ++ * Note that this sets _PAGE_USER and it needs to happen when the ++ * pagetable hierarchy gets created, i.e., early. Otherwise ++ * kaiser_pagetable_walk() will encounter initialized PTEs in the ++ * hierarchy and not set the proper permissions, leading to the ++ * pagefaults with page-protection violations when trying to read the ++ * vsyscall page. For example. ++ */ ++ if (vsyscall_enabled()) ++ kaiser_add_user_map_early((void *)VSYSCALL_ADDR, ++ PAGE_SIZE, ++ __PAGE_KERNEL_VSYSCALL); ++ + for_each_possible_cpu(cpu) { + void *percpu_vaddr = __per_cpu_user_mapped_start + + per_cpu_offset(cpu); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch new file mode 100644 index 00000000..f26d4b42 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch @@ -0,0 +1,75 @@ +From bce3b705d80c6ce8b6f87a29d28cc1ec99665442 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings <ben.hutchings@codethink.co.uk> +Date: Fri, 26 Jan 2018 16:23:02 +0000 +Subject: [PATCH 092/102] vsyscall: Fix permissions for emulate mode with + KAISER/PTI + +The backport of KAISER to 4.4 turned vsyscall emulate mode into native +mode. Add a vsyscall_pgprot variable to hold the correct page +protections, like Borislav and Hugh did for 3.2 and 3.18. + +Cc: Borislav Petkov <bp@suse.de> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/vsyscall/vsyscall_64.c | 7 ++++--- + arch/x86/include/asm/vsyscall.h | 1 + + arch/x86/mm/kaiser.c | 2 +- + 3 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c +index 6bb7e92..0174290 100644 +--- a/arch/x86/entry/vsyscall/vsyscall_64.c ++++ b/arch/x86/entry/vsyscall/vsyscall_64.c +@@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode = + #else + EMULATE; + #endif ++unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL; + + static int __init vsyscall_setup(char *str) + { +@@ -336,11 +337,11 @@ void __init map_vsyscall(void) + extern char __vsyscall_page; + unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); + ++ if (vsyscall_mode != NATIVE) ++ vsyscall_pgprot = __PAGE_KERNEL_VVAR; + if (vsyscall_mode != NONE) + __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, +- vsyscall_mode == NATIVE +- ? PAGE_KERNEL_VSYSCALL +- : PAGE_KERNEL_VVAR); ++ __pgprot(vsyscall_pgprot)); + + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != + (unsigned long)VSYSCALL_ADDR); +diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h +index 4865e10..9ee8506 100644 +--- a/arch/x86/include/asm/vsyscall.h ++++ b/arch/x86/include/asm/vsyscall.h +@@ -13,6 +13,7 @@ extern void map_vsyscall(void); + */ + extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); + extern bool vsyscall_enabled(void); ++extern unsigned long vsyscall_pgprot; + #else + static inline void map_vsyscall(void) {} + static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index a8ade08..ec678aa 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -344,7 +344,7 @@ void __init kaiser_init(void) + if (vsyscall_enabled()) + kaiser_add_user_map_early((void *)VSYSCALL_ADDR, + PAGE_SIZE, +- __PAGE_KERNEL_VSYSCALL); ++ vsyscall_pgprot); + + for_each_possible_cpu(cpu) { + void *percpu_vaddr = __per_cpu_user_mapped_start + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch new file mode 100644 index 00000000..8dce43f7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch @@ -0,0 +1,463 @@ +From ae18a063a2a05514cf0821c68eecf75831c6200f Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Fri, 21 Jul 2017 00:00:21 +0200 +Subject: [PATCH 093/102] bpf: fix mixed signed/unsigned derived min/max value + bounds + +[ Upstream commit 4cabc5b186b5427b9ee5a7495172542af105f02b ] + +Edward reported that there's an issue in min/max value bounds +tracking when signed and unsigned compares both provide hints +on limits when having unknown variables. E.g. a program such +as the following should have been rejected: + + 0: (7a) *(u64 *)(r10 -8) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -8 + 3: (18) r1 = 0xffff8a94cda93400 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+7 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp + 7: (7a) *(u64 *)(r10 -16) = -8 + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = -1 + 10: (2d) if r1 > r2 goto pc+3 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0 + R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 11: (65) if r1 s> 0x1 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1 + R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 12: (0f) r0 += r1 + 13: (72) *(u8 *)(r0 +0) = 0 + R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1 + R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 14: (b7) r0 = 0 + 15: (95) exit + +What happens is that in the first part ... + + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = -1 + 10: (2d) if r1 > r2 goto pc+3 + +... r1 carries an unsigned value, and is compared as unsigned +against a register carrying an immediate. Verifier deduces in +reg_set_min_max() that since the compare is unsigned and operation +is greater than (>), that in the fall-through/false case, r1's +minimum bound must be 0 and maximum bound must be r2. Latter is +larger than the bound and thus max value is reset back to being +'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now +'R1=inv,min_value=0'. The subsequent test ... + + 11: (65) if r1 s> 0x1 goto pc+2 + +... is a signed compare of r1 with immediate value 1. Here, +verifier deduces in reg_set_min_max() that since the compare +is signed this time and operation is greater than (>), that +in the fall-through/false case, we can deduce that r1's maximum +bound must be 1, meaning with prior test, we result in r1 having +the following state: R1=inv,min_value=0,max_value=1. Given that +the actual value this holds is -8, the bounds are wrongly deduced. +When this is being added to r0 which holds the map_value(_adj) +type, then subsequent store access in above case will go through +check_mem_access() which invokes check_map_access_adj(), that +will then probe whether the map memory is in bounds based +on the min_value and max_value as well as access size since +the actual unknown value is min_value <= x <= max_value; commit +fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{, +_adj} register types") provides some more explanation on the +semantics. + +It's worth to note in this context that in the current code, +min_value and max_value tracking are used for two things, i) +dynamic map value access via check_map_access_adj() and since +commit 06c1c049721a ("bpf: allow helpers access to variable memory") +ii) also enforced at check_helper_mem_access() when passing a +memory address (pointer to packet, map value, stack) and length +pair to a helper and the length in this case is an unknown value +defining an access range through min_value/max_value in that +case. The min_value/max_value tracking is /not/ used in the +direct packet access case to track ranges. However, the issue +also affects case ii), for example, the following crafted program +based on the same principle must be rejected as well: + + 0: (b7) r2 = 0 + 1: (bf) r3 = r10 + 2: (07) r3 += -512 + 3: (7a) *(u64 *)(r10 -16) = -8 + 4: (79) r4 = *(u64 *)(r10 -16) + 5: (b7) r6 = -1 + 6: (2d) if r4 > r6 goto pc+5 + R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 + R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 7: (65) if r4 s> 0x1 goto pc+4 + R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 + R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1 + R10=fp + 8: (07) r4 += 1 + 9: (b7) r5 = 0 + 10: (6a) *(u16 *)(r10 -512) = 0 + 11: (85) call bpf_skb_load_bytes#26 + 12: (b7) r0 = 0 + 13: (95) exit + +Meaning, while we initialize the max_value stack slot that the +verifier thinks we access in the [1,2] range, in reality we +pass -7 as length which is interpreted as u32 in the helper. +Thus, this issue is relevant also for the case of helper ranges. +Resetting both bounds in check_reg_overflow() in case only one +of them exceeds limits is also not enough as similar test can be +created that uses values which are within range, thus also here +learned min value in r1 is incorrect when mixed with later signed +test to create a range: + + 0: (7a) *(u64 *)(r10 -8) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -8 + 3: (18) r1 = 0xffff880ad081fa00 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+7 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp + 7: (7a) *(u64 *)(r10 -16) = -8 + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = 2 + 10: (3d) if r2 >= r1 goto pc+3 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 11: (65) if r1 s> 0x4 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 + R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 12: (0f) r0 += r1 + 13: (72) *(u8 *)(r0 +0) = 0 + R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4 + R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 14: (b7) r0 = 0 + 15: (95) exit + +This leaves us with two options for fixing this: i) to invalidate +all prior learned information once we switch signed context, ii) +to track min/max signed and unsigned boundaries separately as +done in [0]. (Given latter introduces major changes throughout +the whole verifier, it's rather net-next material, thus this +patch follows option i), meaning we can derive bounds either +from only signed tests or only unsigned tests.) There is still the +case of adjust_reg_min_max_vals(), where we adjust bounds on ALU +operations, meaning programs like the following where boundaries +on the reg get mixed in context later on when bounds are merged +on the dst reg must get rejected, too: + + 0: (7a) *(u64 *)(r10 -8) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -8 + 3: (18) r1 = 0xffff89b2bf87ce00 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+6 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp + 7: (7a) *(u64 *)(r10 -16) = -8 + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = 2 + 10: (3d) if r2 >= r1 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 11: (b7) r7 = 1 + 12: (65) if r7 s> 0x0 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp + 13: (b7) r0 = 0 + 14: (95) exit + + from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 + R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp + 15: (0f) r7 += r1 + 16: (65) if r7 s> 0x4 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp + 17: (0f) r0 += r7 + 18: (72) *(u8 *)(r0 +0) = 0 + R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp + 19: (b7) r0 = 0 + 20: (95) exit + +Meaning, in adjust_reg_min_max_vals() we must also reset range +values on the dst when src/dst registers have mixed signed/ +unsigned derived min/max value bounds with one unbounded value +as otherwise they can be added together deducing false boundaries. +Once both boundaries are established from either ALU ops or +compare operations w/o mixing signed/unsigned insns, then they +can safely be added to other regs also having both boundaries +established. Adding regs with one unbounded side to a map value +where the bounded side has been learned w/o mixing ops is +possible, but the resulting map value won't recover from that, +meaning such op is considered invalid on the time of actual +access. Invalid bounds are set on the dst reg in case i) src reg, +or ii) in case dst reg already had them. The only way to recover +would be to perform i) ALU ops but only 'add' is allowed on map +value types or ii) comparisons, but these are disallowed on +pointers in case they span a range. This is fine as only BPF_JEQ +and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers +which potentially turn them into PTR_TO_MAP_VALUE type depending +on the branch, so only here min/max value cannot be invalidated +for them. + +In terms of state pruning, value_from_signed is considered +as well in states_equal() when dealing with adjusted map values. +With regards to breaking existing programs, there is a small +risk, but use-cases are rather quite narrow where this could +occur and mixing compares probably unlikely. + +Joint work with Josef and Edward. + + [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html + +Fixes: 484611357c19 ("bpf: allow access into map value arrays") +Reported-by: Edward Cree <ecree@solarflare.com> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Edward Cree <ecree@solarflare.com> +Signed-off-by: Josef Bacik <jbacik@fb.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf_verifier.h | 1 + + kernel/bpf/verifier.c | 110 +++++++++++++++++++++++++++++++++++++------ + 2 files changed, 97 insertions(+), 14 deletions(-) + +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index 2edf8de..070fc49 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -40,6 +40,7 @@ struct bpf_reg_state { + */ + s64 min_value; + u64 max_value; ++ bool value_from_signed; + }; + + enum bpf_stack_slot_type { +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 56a867f..5f274c6 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -671,12 +671,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, + return -EACCES; + } + +-static bool is_pointer_value(struct bpf_verifier_env *env, int regno) ++static bool __is_pointer_value(bool allow_ptr_leaks, ++ const struct bpf_reg_state *reg) + { +- if (env->allow_ptr_leaks) ++ if (allow_ptr_leaks) + return false; + +- switch (env->cur_state.regs[regno].type) { ++ switch (reg->type) { + case UNKNOWN_VALUE: + case CONST_IMM: + return false; +@@ -685,6 +686,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) + } + } + ++static bool is_pointer_value(struct bpf_verifier_env *env, int regno) ++{ ++ return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); ++} ++ + static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) + { +@@ -1521,10 +1527,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, + } + + /* We don't know anything about what was done to this register, mark it +- * as unknown. ++ * as unknown. Also, if both derived bounds came from signed/unsigned ++ * mixed compares and one side is unbounded, we cannot really do anything ++ * with them as boundaries cannot be trusted. Thus, arithmetic of two ++ * regs of such kind will get invalidated bounds on the dst side. + */ +- if (min_val == BPF_REGISTER_MIN_RANGE && +- max_val == BPF_REGISTER_MAX_RANGE) { ++ if ((min_val == BPF_REGISTER_MIN_RANGE && ++ max_val == BPF_REGISTER_MAX_RANGE) || ++ (BPF_SRC(insn->code) == BPF_X && ++ ((min_val != BPF_REGISTER_MIN_RANGE && ++ max_val == BPF_REGISTER_MAX_RANGE) || ++ (min_val == BPF_REGISTER_MIN_RANGE && ++ max_val != BPF_REGISTER_MAX_RANGE) || ++ (dst_reg->min_value != BPF_REGISTER_MIN_RANGE && ++ dst_reg->max_value == BPF_REGISTER_MAX_RANGE) || ++ (dst_reg->min_value == BPF_REGISTER_MIN_RANGE && ++ dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) && ++ regs[insn->dst_reg].value_from_signed != ++ regs[insn->src_reg].value_from_signed)) { + reset_reg_range_values(regs, insn->dst_reg); + return; + } +@@ -1855,38 +1875,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) + { ++ bool value_from_signed = true; ++ bool is_range = true; ++ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; ++ is_range = false; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; ++ is_range = false; + break; + case BPF_JGT: +- /* Unsigned comparison, the minimum value is 0. */ +- false_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGT: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGT) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ false_reg->min_value = 0; ++ } + /* If this is false then we know the maximum val is val, + * otherwise we know the min val is val+1. + */ + false_reg->max_value = val; ++ false_reg->value_from_signed = value_from_signed; + true_reg->min_value = val + 1; ++ true_reg->value_from_signed = value_from_signed; + break; + case BPF_JGE: +- /* Unsigned comparison, the minimum value is 0. */ +- false_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGE: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGE) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ false_reg->min_value = 0; ++ } + /* If this is false then we know the maximum value is val - 1, + * otherwise we know the mimimum value is val. + */ + false_reg->max_value = val - 1; ++ false_reg->value_from_signed = value_from_signed; + true_reg->min_value = val; ++ true_reg->value_from_signed = value_from_signed; + break; + default: + break; +@@ -1894,6 +1939,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); ++ if (is_range) { ++ if (__is_pointer_value(false, false_reg)) ++ reset_reg_range_values(false_reg, 0); ++ if (__is_pointer_value(false, true_reg)) ++ reset_reg_range_values(true_reg, 0); ++ } + } + + /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg +@@ -1903,39 +1954,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) + { ++ bool value_from_signed = true; ++ bool is_range = true; ++ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; ++ is_range = false; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; ++ is_range = false; + break; + case BPF_JGT: +- /* Unsigned comparison, the minimum value is 0. */ +- true_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGT: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGT) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ true_reg->min_value = 0; ++ } + /* + * If this is false, then the val is <= the register, if it is + * true the register <= to the val. + */ + false_reg->min_value = val; ++ false_reg->value_from_signed = value_from_signed; + true_reg->max_value = val - 1; ++ true_reg->value_from_signed = value_from_signed; + break; + case BPF_JGE: +- /* Unsigned comparison, the minimum value is 0. */ +- true_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGE: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGE) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ true_reg->min_value = 0; ++ } + /* If this is false then constant < register, if it is true then + * the register < constant. + */ + false_reg->min_value = val + 1; ++ false_reg->value_from_signed = value_from_signed; + true_reg->max_value = val; ++ true_reg->value_from_signed = value_from_signed; + break; + default: + break; +@@ -1943,6 +2019,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); ++ if (is_range) { ++ if (__is_pointer_value(false, false_reg)) ++ reset_reg_range_values(false_reg, 0); ++ if (__is_pointer_value(false, true_reg)) ++ reset_reg_range_values(true_reg, 0); ++ } + } + + static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0094-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0094-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch new file mode 100644 index 00000000..c34e2877 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0094-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch @@ -0,0 +1,83 @@ +From dfbc65098ac779acb9839392b72d0010149b081d Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Thu, 29 Jun 2017 03:04:59 +0200 +Subject: [PATCH 094/102] bpf: prevent leaking pointer via xadd on + unpriviledged + +commit 6bdf6abc56b53103324dfd270a86580306e1a232 upstream. + +Leaking kernel addresses on unpriviledged is generally disallowed, +for example, verifier rejects the following: + + 0: (b7) r0 = 0 + 1: (18) r2 = 0xffff897e82304400 + 3: (7b) *(u64 *)(r1 +48) = r2 + R2 leaks addr into ctx + +Doing pointer arithmetic on them is also forbidden, so that they +don't turn into unknown value and then get leaked out. However, +there's xadd as a special case, where we don't check the src reg +for being a pointer register, e.g. the following will pass: + + 0: (b7) r0 = 0 + 1: (7b) *(u64 *)(r1 +48) = r0 + 2: (18) r2 = 0xffff897e82304400 ; map + 4: (db) lock *(u64 *)(r1 +48) += r2 + 5: (95) exit + +We could store the pointer into skb->cb, loose the type context, +and then read it out from there again to leak it eventually out +of a map value. Or more easily in a different variant, too: + + 0: (bf) r6 = r1 + 1: (7a) *(u64 *)(r10 -8) = 0 + 2: (bf) r2 = r10 + 3: (07) r2 += -8 + 4: (18) r1 = 0x0 + 6: (85) call bpf_map_lookup_elem#1 + 7: (15) if r0 == 0x0 goto pc+3 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R6=ctx R10=fp + 8: (b7) r3 = 0 + 9: (7b) *(u64 *)(r0 +0) = r3 + 10: (db) lock *(u64 *)(r0 +0) += r6 + 11: (b7) r0 = 0 + 12: (95) exit + + from 7 to 11: R0=inv,min_value=0,max_value=0 R6=ctx R10=fp + 11: (b7) r0 = 0 + 12: (95) exit + +Prevent this by checking xadd src reg for pointer types. Also +add a couple of test cases related to this. + +Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") +Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Acked-by: Alexei Starovoitov <ast@kernel.org> +Acked-by: Martin KaFai Lau <kafai@fb.com> +Acked-by: Edward Cree <ecree@solarflare.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/verifier.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 5f274c6..4a336c4 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -880,6 +880,11 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) + if (err) + return err; + ++ if (is_pointer_value(env, insn->src_reg)) { ++ verbose("R%d leaks addr into mem\n", insn->src_reg); ++ return -EACCES; ++ } ++ + /* check whether atomic_add can read the memory */ + err = check_mem_access(env, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, -1); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0095-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0095-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch new file mode 100644 index 00000000..58ad8391 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0095-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch @@ -0,0 +1,71 @@ +From 1c494e6d3404c7f00f4f717f232d61aeccf9efdd Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Mon, 29 Jan 2018 02:48:54 +0100 +Subject: [PATCH 095/102] x86: bpf_jit: small optimization in + emit_bpf_tail_call() + +[ upstream commit 84ccac6e7854ebbfb56d2fc6d5bef9be49bb304c ] + +Saves 4 bytes replacing following instructions : + +lea rax, [rsi + rdx * 8 + offsetof(...)] +mov rax, qword ptr [rax] +cmp rax, 0 + +by : + +mov rax, [rsi + rdx * 8 + offsetof(...)] +test rax, rax + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Cc: Alexei Starovoitov <ast@kernel.org> +Cc: Daniel Borkmann <daniel@iogearbox.net> +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Acked-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/net/bpf_jit_comp.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 15f7436..ece29e2 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -281,7 +281,7 @@ static void emit_bpf_tail_call(u8 **pprog) + EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + offsetof(struct bpf_array, map.max_entries)); + EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ +-#define OFFSET1 47 /* number of bytes to jump */ ++#define OFFSET1 43 /* number of bytes to jump */ + EMIT2(X86_JBE, OFFSET1); /* jbe out */ + label1 = cnt; + +@@ -290,21 +290,20 @@ static void emit_bpf_tail_call(u8 **pprog) + */ + EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ +-#define OFFSET2 36 ++#define OFFSET2 32 + EMIT2(X86_JA, OFFSET2); /* ja out */ + label2 = cnt; + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ + + /* prog = array->ptrs[index]; */ +- EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ ++ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ + offsetof(struct bpf_array, ptrs)); +- EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ + + /* if (prog == NULL) + * goto out; + */ +- EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ ++ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ + #define OFFSET3 10 + EMIT2(X86_JE, OFFSET3); /* je out */ + label3 = cnt; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0096-bpf-fix-bpf_tail_call-x64-JIT.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0096-bpf-fix-bpf_tail_call-x64-JIT.patch new file mode 100644 index 00000000..fffdeb98 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0096-bpf-fix-bpf_tail_call-x64-JIT.patch @@ -0,0 +1,62 @@ +From d3e6070e05dc3cdafac5120e35cdf0a33ce45270 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov <ast@fb.com> +Date: Mon, 29 Jan 2018 02:48:55 +0100 +Subject: [PATCH 096/102] bpf: fix bpf_tail_call() x64 JIT + +[ upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ] + +- bpf prog_array just like all other types of bpf array accepts 32-bit index. + Clarify that in the comment. +- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes +- tighten corresponding check in the interpreter to stay consistent + +The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag +in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and +though JIT code is wrong it will check bounds correctly. +Hence two fixes tags. All other JITs don't have this problem. + +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") +Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Acked-by: Martin KaFai Lau <kafai@fb.com> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/net/bpf_jit_comp.c | 4 ++-- + kernel/bpf/core.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index ece29e2..7840331 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -278,9 +278,9 @@ static void emit_bpf_tail_call(u8 **pprog) + /* if (index >= array->map.max_entries) + * goto out; + */ +- EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ ++ EMIT2(0x89, 0xD2); /* mov edx, edx */ ++ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ + offsetof(struct bpf_array, map.max_entries)); +- EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ + #define OFFSET1 43 /* number of bytes to jump */ + EMIT2(X86_JBE, OFFSET1); /* jbe out */ + label1 = cnt; +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index aa6d981..ab9576b 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -715,7 +715,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) + struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog; +- u64 index = BPF_R3; ++ u32 index = BPF_R3; + + if (unlikely(index >= array->map.max_entries)) + goto out; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0097-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0097-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch new file mode 100644 index 00000000..2189d298 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0097-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch @@ -0,0 +1,222 @@ +From 26f11e73e6dcfb8c5ec2dc8afb22b5f0af3015f7 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov <ast@kernel.org> +Date: Mon, 29 Jan 2018 02:48:56 +0100 +Subject: [PATCH 097/102] bpf: introduce BPF_JIT_ALWAYS_ON config + +[ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ] + +The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715. + +A quote from goolge project zero blog: +"At this point, it would normally be necessary to locate gadgets in +the host kernel code that can be used to actually leak data by reading +from an attacker-controlled location, shifting and masking the result +appropriately and then using the result of that as offset to an +attacker-controlled address for a load. But piecing gadgets together +and figuring out which ones work in a speculation context seems annoying. +So instead, we decided to use the eBPF interpreter, which is built into +the host kernel - while there is no legitimate way to invoke it from inside +a VM, the presence of the code in the host kernel's text section is sufficient +to make it usable for the attack, just like with ordinary ROP gadgets." + +To make attacker job harder introduce BPF_JIT_ALWAYS_ON config +option that removes interpreter from the kernel in favor of JIT-only mode. +So far eBPF JIT is supported by: +x64, arm64, arm32, sparc64, s390, powerpc64, mips64 + +The start of JITed program is randomized and code page is marked as read-only. +In addition "constant blinding" can be turned on with net.core.bpf_jit_harden + +v2->v3: +- move __bpf_prog_ret0 under ifdef (Daniel) + +v1->v2: +- fix init order, test_bpf and cBPF (Daniel's feedback) +- fix offloaded bpf (Jakub's feedback) +- add 'return 0' dummy in case something can invoke prog->bpf_func +- retarget bpf tree. For bpf-next the patch would need one extra hunk. + It will be sent when the trees are merged back to net-next + +Considered doing: + int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT; +but it seems better to land the patch as-is and in bpf-next remove +bpf_jit_enable global variable from all JITs, consolidate in one place +and remove this jit_init() function. + +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + init/Kconfig | 7 +++++++ + kernel/bpf/core.c | 18 ++++++++++++++++++ + lib/test_bpf.c | 11 +++++++---- + net/core/filter.c | 6 ++---- + net/core/sysctl_net_core.c | 6 ++++++ + net/socket.c | 9 +++++++++ + 6 files changed, 49 insertions(+), 8 deletions(-) + +diff --git a/init/Kconfig b/init/Kconfig +index 7f69e2e..e9b989c 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1634,6 +1634,13 @@ config BPF_SYSCALL + Enable the bpf() system call that allows to manipulate eBPF + programs and maps via file descriptors. + ++config BPF_JIT_ALWAYS_ON ++ bool "Permanently enable BPF JIT and remove BPF interpreter" ++ depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT ++ help ++ Enables BPF JIT and removes BPF interpreter to avoid ++ speculative execution of BPF instructions by the interpreter ++ + config SHMEM + bool "Use full shmem filesystem" if EXPERT + default y +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index ab9576b..64c4b13 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -458,6 +458,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) + } + EXPORT_SYMBOL_GPL(__bpf_call_base); + ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON + /** + * __bpf_prog_run - run eBPF program on a given context + * @ctx: is the data we are operating on +@@ -923,6 +924,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) + } + STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */ + ++#else ++static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn) ++{ ++ return 0; ++} ++#endif ++ + bool bpf_prog_array_compatible(struct bpf_array *array, + const struct bpf_prog *fp) + { +@@ -970,7 +978,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) + */ + struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) + { ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON + fp->bpf_func = (void *) __bpf_prog_run; ++#else ++ fp->bpf_func = (void *) __bpf_prog_ret0; ++#endif + + /* eBPF JITs can rewrite the program in case constant + * blinding is active. However, in case of error during +@@ -979,6 +991,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) + * be JITed, but falls back to the interpreter. + */ + fp = bpf_int_jit_compile(fp); ++#ifdef CONFIG_BPF_JIT_ALWAYS_ON ++ if (!fp->jited) { ++ *err = -ENOTSUPP; ++ return fp; ++ } ++#endif + bpf_prog_lock_ro(fp); + + /* The tail call compatibility check can only be done at +diff --git a/lib/test_bpf.c b/lib/test_bpf.c +index 0362da0..158b4a3 100644 +--- a/lib/test_bpf.c ++++ b/lib/test_bpf.c +@@ -5601,9 +5601,8 @@ static struct bpf_prog *generate_filter(int which, int *err) + return NULL; + } + } +- /* We don't expect to fail. */ + if (*err) { +- pr_cont("FAIL to attach err=%d len=%d\n", ++ pr_cont("FAIL to prog_create err=%d len=%d\n", + *err, fprog.len); + return NULL; + } +@@ -5626,6 +5625,10 @@ static struct bpf_prog *generate_filter(int which, int *err) + * checks. + */ + fp = bpf_prog_select_runtime(fp, err); ++ if (*err) { ++ pr_cont("FAIL to select_runtime err=%d\n", *err); ++ return NULL; ++ } + break; + } + +@@ -5811,8 +5814,8 @@ static __init int test_bpf(void) + pass_cnt++; + continue; + } +- +- return err; ++ err_cnt++; ++ continue; + } + + pr_cont("jited:%u ", fp->jited); +diff --git a/net/core/filter.c b/net/core/filter.c +index b391209..c066b00 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1005,11 +1005,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) + */ + goto out_err_free; + +- /* We are guaranteed to never error here with cBPF to eBPF +- * transitions, since there's no issue with type compatibility +- * checks on program arrays. +- */ + fp = bpf_prog_select_runtime(fp, &err); ++ if (err) ++ goto out_err_free; + + kfree(old_prog); + return fp; +diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c +index 0df2aa6..9955d6d 100644 +--- a/net/core/sysctl_net_core.c ++++ b/net/core/sysctl_net_core.c +@@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = { + .data = &bpf_jit_enable, + .maxlen = sizeof(int), + .mode = 0644, ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON + .proc_handler = proc_dointvec ++#else ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &one, ++ .extra2 = &one, ++#endif + }, + # ifdef CONFIG_HAVE_EBPF_JIT + { +diff --git a/net/socket.c b/net/socket.c +index 6bbccf0..8a9624e 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -2547,6 +2547,15 @@ static int __init sock_init(void) + + core_initcall(sock_init); /* early initcall */ + ++static int __init jit_init(void) ++{ ++#ifdef CONFIG_BPF_JIT_ALWAYS_ON ++ bpf_jit_enable = 1; ++#endif ++ return 0; ++} ++pure_initcall(jit_init); ++ + #ifdef CONFIG_PROC_FS + void socket_seq_show(struct seq_file *seq) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0098-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0098-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch new file mode 100644 index 00000000..130b2b0f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0098-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch @@ -0,0 +1,50 @@ +From 1c28f3d820ab4bd8f8381584acd5219e083425f5 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Mon, 29 Jan 2018 02:48:57 +0100 +Subject: [PATCH 098/102] bpf: arsh is not supported in 32 bit alu thus reject + it + +[ upstream commit 7891a87efc7116590eaba57acc3c422487802c6f ] + +The following snippet was throwing an 'unknown opcode cc' warning +in BPF interpreter: + + 0: (18) r0 = 0x0 + 2: (7b) *(u64 *)(r10 -16) = r0 + 3: (cc) (u32) r0 s>>= (u32) r0 + 4: (95) exit + +Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X} +generation, not all of them do and interpreter does neither. We can +leave existing ones and implement it later in bpf-next for the +remaining ones, but reject this properly in verifier for the time +being. + +Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") +Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/verifier.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 4a336c4..4c95207 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1753,6 +1753,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) + return -EINVAL; + } + ++ if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { ++ verbose("BPF_ARSH not supported for 32 bit ALU\n"); ++ return -EINVAL; ++ } ++ + if ((opcode == BPF_LSH || opcode == BPF_RSH || + opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { + int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0099-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0099-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch new file mode 100644 index 00000000..f05ccaac --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0099-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch @@ -0,0 +1,128 @@ +From b8674025265196f22dc6f0d1e6a815ac59d64d62 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Mon, 29 Jan 2018 02:48:58 +0100 +Subject: [PATCH 099/102] bpf: avoid false sharing of map refcount with + max_entries + +[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ] + +In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds +speculation") also change the layout of struct bpf_map such that +false sharing of fast-path members like max_entries is avoided +when the maps reference counter is altered. Therefore enforce +them to be placed into separate cachelines. + +pahole dump after change: + + struct bpf_map { + const struct bpf_map_ops * ops; /* 0 8 */ + struct bpf_map * inner_map_meta; /* 8 8 */ + void * security; /* 16 8 */ + enum bpf_map_type map_type; /* 24 4 */ + u32 key_size; /* 28 4 */ + u32 value_size; /* 32 4 */ + u32 max_entries; /* 36 4 */ + u32 map_flags; /* 40 4 */ + u32 pages; /* 44 4 */ + u32 id; /* 48 4 */ + int numa_node; /* 52 4 */ + bool unpriv_array; /* 56 1 */ + + /* XXX 7 bytes hole, try to pack */ + + /* --- cacheline 1 boundary (64 bytes) --- */ + struct user_struct * user; /* 64 8 */ + atomic_t refcnt; /* 72 4 */ + atomic_t usercnt; /* 76 4 */ + struct work_struct work; /* 80 32 */ + char name[16]; /* 112 16 */ + /* --- cacheline 2 boundary (128 bytes) --- */ + + /* size: 128, cachelines: 2, members: 17 */ + /* sum members: 121, holes: 1, sum holes: 7 */ + }; + +Now all entries in the first cacheline are read only throughout +the life time of the map, set up once during map creation. Overall +struct size and number of cachelines doesn't change from the +reordering. struct bpf_map is usually first member and embedded +in map structs in specific map implementations, so also avoid those +members to sit at the end where it could potentially share the +cacheline with first map values e.g. in the array since remote +CPUs could trigger map updates just as well for those (easily +dirtying members like max_entries intentionally as well) while +having subsequent values in cache. + +Quoting from Google's Project Zero blog [1]: + + Additionally, at least on the Intel machine on which this was + tested, bouncing modified cache lines between cores is slow, + apparently because the MESI protocol is used for cache coherence + [8]. Changing the reference counter of an eBPF array on one + physical CPU core causes the cache line containing the reference + counter to be bounced over to that CPU core, making reads of the + reference counter on all other CPU cores slow until the changed + reference counter has been written back to memory. Because the + length and the reference counter of an eBPF array are stored in + the same cache line, this also means that changing the reference + counter on one physical CPU core causes reads of the eBPF array's + length to be slow on other physical CPU cores (intentional false + sharing). + +While this doesn't 'control' the out-of-bounds speculation through +masking the index as in commit b2157399cc98, triggering a manipulation +of the map's reference counter is really trivial, so lets not allow +to easily affect max_entries from it. + +Splitting to separate cachelines also generally makes sense from +a performance perspective anyway in that fast-path won't have a +cache miss if the map gets pinned, reused in other progs, etc out +of control path, thus also avoids unintentional false sharing. + + [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html + +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index 0dbb21b..80064c8 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -36,7 +36,10 @@ struct bpf_map_ops { + }; + + struct bpf_map { +- atomic_t refcnt; ++ /* 1st cacheline with read-mostly members of which some ++ * are also accessed in fast-path (e.g. ops, max_entries). ++ */ ++ const struct bpf_map_ops *ops ____cacheline_aligned; + enum bpf_map_type map_type; + u32 key_size; + u32 value_size; +@@ -44,10 +47,15 @@ struct bpf_map { + u32 map_flags; + u32 pages; + bool unpriv_array; +- struct user_struct *user; +- const struct bpf_map_ops *ops; +- struct work_struct work; ++ /* 7 bytes hole */ ++ ++ /* 2nd cacheline with misc members to avoid false sharing ++ * particularly with refcounting. ++ */ ++ struct user_struct *user ____cacheline_aligned; ++ atomic_t refcnt; + atomic_t usercnt; ++ struct work_struct work; + }; + + struct bpf_map_type_list { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-fix-divides-by-zero.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-fix-divides-by-zero.patch new file mode 100644 index 00000000..250d2bdb --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-fix-divides-by-zero.patch @@ -0,0 +1,46 @@ +From 9a13af743afd41c5fb1e0698b7638e76eaab9dcc Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Mon, 29 Jan 2018 02:48:59 +0100 +Subject: [PATCH 100/102] bpf: fix divides by zero + +[ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ] + +Divides by zero are not nice, lets avoid them if possible. + +Also do_div() seems not needed when dealing with 32bit operands, +but this seems a minor detail. + +Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reported-by: syzbot <syzkaller@googlegroups.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 64c4b13..879ca84 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -642,7 +642,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) + DST = tmp; + CONT; + ALU_MOD_X: +- if (unlikely(SRC == 0)) ++ if (unlikely((u32)SRC == 0)) + return 0; + tmp = (u32) DST; + DST = do_div(tmp, (u32) SRC); +@@ -661,7 +661,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) + DST = div64_u64(DST, SRC); + CONT; + ALU_DIV_X: +- if (unlikely(SRC == 0)) ++ if (unlikely((u32)SRC == 0)) + return 0; + tmp = (u32) DST; + do_div(tmp, (u32) SRC); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0101-bpf-fix-32-bit-divide-by-zero.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0101-bpf-fix-32-bit-divide-by-zero.patch new file mode 100644 index 00000000..800c2f56 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0101-bpf-fix-32-bit-divide-by-zero.patch @@ -0,0 +1,69 @@ +From 6468120f7928c51c0760c1368aa7ad7099f0b854 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov <ast@kernel.org> +Date: Mon, 29 Jan 2018 02:49:00 +0100 +Subject: [PATCH 101/102] bpf: fix 32-bit divide by zero + +[ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ] + +due to some JITs doing if (src_reg == 0) check in 64-bit mode +for div/mod operations mask upper 32-bits of src register +before doing the check + +Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT") +Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.") +Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/verifier.c | 18 ++++++++++++++++++ + net/core/filter.c | 4 ++++ + 2 files changed, 22 insertions(+) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 4c95207..4e9ad02 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3201,6 +3201,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) + + + for (i = 0; i < insn_cnt; i++, insn++) { ++ if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || ++ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { ++ /* due to JIT bugs clear upper 32-bits of src register ++ * before div/mod operation ++ */ ++ insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg); ++ insn_buf[1] = *insn; ++ cnt = 2; ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; + +diff --git a/net/core/filter.c b/net/core/filter.c +index c066b00..615033b 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -441,6 +441,10 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, + convert_bpf_extensions(fp, &insn)) + break; + ++ if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || ++ fp->code == (BPF_ALU | BPF_MOD | BPF_X)) ++ *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); ++ + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); + break; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0102-bpf-reject-stores-into-ctx-via-st-and-xadd.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0102-bpf-reject-stores-into-ctx-via-st-and-xadd.patch new file mode 100644 index 00000000..0d68c397 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0102-bpf-reject-stores-into-ctx-via-st-and-xadd.patch @@ -0,0 +1,72 @@ +From d78127ea6fb5f14cf2e3c1fa97ce60f2851703d7 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Mon, 29 Jan 2018 02:49:01 +0100 +Subject: [PATCH 102/102] bpf: reject stores into ctx via st and xadd + +[ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ] + +Alexei found that verifier does not reject stores into context +via BPF_ST instead of BPF_STX. And while looking at it, we +also should not allow XADD variant of BPF_STX. + +The context rewriter is only assuming either BPF_LDX_MEM- or +BPF_STX_MEM-type operations, thus reject anything other than +that so that assumptions in the rewriter properly hold. Add +test cases as well for BPF selftests. + +Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields") +Reported-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/verifier.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 4e9ad02..9f9bad8 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -691,6 +691,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) + return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); + } + ++static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) ++{ ++ const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; ++ ++ return reg->type == PTR_TO_CTX; ++} ++ + static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) + { +@@ -885,6 +892,12 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) + return -EACCES; + } + ++ if (is_ctx_reg(env, insn->dst_reg)) { ++ verbose("BPF_XADD stores into R%d context is not allowed\n", ++ insn->dst_reg); ++ return -EACCES; ++ } ++ + /* check whether atomic_add can read the memory */ + err = check_mem_access(env, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, -1); +@@ -2879,6 +2892,12 @@ static int do_check(struct bpf_verifier_env *env) + if (err) + return err; + ++ if (is_ctx_reg(env, insn->dst_reg)) { ++ verbose("BPF_ST stores into R%d context is not allowed\n", ++ insn->dst_reg); ++ return -EACCES; ++ } ++ + /* check that memory (dst_reg + off) is writeable */ + err = check_mem_access(env, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc b/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc new file mode 100644 index 00000000..3138ec93 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc @@ -0,0 +1,169 @@ +patch 0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch +patch 0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch +patch 0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch +patch 0004-KAISER-Kernel-Address-Isolation.patch +patch 0005-kaiser-merged-update.patch +patch 0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch +patch 0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch +patch 0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch +patch 0009-kaiser-KAISER-depends-on-SMP.patch +patch 0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch +patch 0011-kaiser-fix-perf-crashes.patch +patch 0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch +patch 0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch +patch 0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch +patch 0015-kaiser-align-addition-to-x86-mm-Makefile.patch +patch 0016-kaiser-cleanups-while-trying-for-gold-link.patch +patch 0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch +patch 0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch +patch 0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch +patch 0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch +patch 0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch +patch 0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch +patch 0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch +patch 0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch +patch 0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch +patch 0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch +patch 0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch +patch 0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch +patch 0029-x86-kaiser-Check-boottime-cmdline-params.patch +patch 0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch +patch 0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch +patch 0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch +patch 0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch +patch 0034-x86-paravirt-Dont-patch-flush_tlb_single.patch +patch 0035-x86-kaiser-Reenable-PARAVIRT.patch +patch 0036-kaiser-disabled-on-Xen-PV.patch +patch 0037-x86-kaiser-Move-feature-detection-up.patch +patch 0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch +patch 0039-KPTI-Report-when-enabled.patch +patch 0040-kaiser-Set-_PAGE_NX-only-if-supported.patch +patch 0041-kaiser-Set-_PAGE_NX-only-if-supported.patch +patch 0042-bpf-move-fixup_bpf_calls-function.patch +patch 0043-bpf-refactor-fixup_bpf_calls.patch +patch 0044-bpf-prevent-out-of-bounds-speculation.patch +patch 0045-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch +patch 0046-x86-Documentation-Add-PTI-description.patch +patch 0047-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch +patch 0048-x86-cpufeatures-Make-CPU-bugs-sticky.patch +patch 0049-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch +patch 0050-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch +patch 0051-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch +patch 0052-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch +patch 0053-sysfs-cpu-Add-vulnerability-folder.patch +patch 0054-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch +patch 0055-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch +patch 0056-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch +patch 0057-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch +patch 0058-x86-alternatives-Fix-optimize_nops-checking.patch +patch 0059-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch +patch 0060-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch +patch 0061-objtool-modules-Discard-objtool-annotation-sections-.patch +patch 0062-objtool-Detect-jumps-to-retpoline-thunks.patch +patch 0063-objtool-Allow-alternatives-to-be-ignored.patch +patch 0064-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch +patch 0065-x86-retpoline-Add-initial-retpoline-support.patch +patch 0066-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch +patch 0067-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch +patch 0068-x86-retpoline-entry-Convert-entry-assembler-indirect.patch +patch 0069-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch +patch 0070-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch +patch 0071-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch +patch 0072-x86-retpoline-checksum32-Convert-assembler-indirect-.patch +patch 0073-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch +patch 0074-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch +patch 0075-x86-retpoline-Remove-compile-time-warning.patch +patch 0076-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch +patch 0077-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch +patch 0078-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch +patch 0079-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch +patch 0080-objtool-Improve-error-message-for-bad-file-argument.patch +patch 0081-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch +patch 0082-module-Add-retpoline-tag-to-VERMAGIC.patch +patch 0083-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch +patch 0084-x86-mce-Make-machine-check-speculation-protected.patch +patch 0085-retpoline-Introduce-start-end-markers-of-indirect-th.patch +patch 0086-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch +patch 0087-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch +patch 0088-x86-pti-Document-fix-wrong-index.patch +patch 0089-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch +patch 0090-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch +patch 0091-Map-the-vsyscall-page-with-_PAGE_USER.patch +patch 0092-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch +patch 0093-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch +patch 0094-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch +patch 0095-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch +patch 0096-bpf-fix-bpf_tail_call-x64-JIT.patch +patch 0097-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch +patch 0098-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch +patch 0099-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch +patch 0100-bpf-fix-divides-by-zero.patch +patch 0101-bpf-fix-32-bit-divide-by-zero.patch +patch 0102-bpf-reject-stores-into-ctx-via-st-and-xadd.patch +patch 0001-kaiser-fix-intel_bts-perf-crashes.patch +patch 0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch +patch 0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch +patch 0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch +patch 0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch +patch 0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch +patch 0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch +patch 0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch +patch 0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch +patch 0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch +patch 0011-x86-msr-Add-definitions-for-new-speculation-control-.patch +patch 0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch +patch 0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch +patch 0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch +patch 0015-x86-nospec-Fix-header-guards-names.patch +patch 0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch +patch 0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch +patch 0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch +patch 0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch +patch 0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch +patch 0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch +patch 0022-x86-entry-64-Push-extra-regs-right-away.patch +patch 0023-x86-asm-Move-status-from-thread_struct-to-thread_inf.patch +patch 0024-Documentation-Document-array_index_nospec.patch +patch 0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch +patch 0026-x86-Implement-array_index_mask_nospec.patch +patch 0027-x86-Introduce-barrier_nospec.patch +patch 0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch +patch 0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch +patch 0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch +patch 0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch +patch 0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch +patch 0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch +patch 0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch +patch 0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch +patch 0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch +patch 0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch +patch 0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch +patch 0039-x86-pti-Mark-constant-arrays-as-__initconst.patch +patch 0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch +patch 0041-x86-microcode-Do-the-family-check-first.patch +patch 0001-kaiser-fix-compile-error-without-vsyscall.patch +patch 0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch +patch 0003-x86-speculation-Update-Speculation-Control-microcode.patch +patch 0004-x86-speculation-Correct-Speculation-Control-microcod.patch +patch 0005-x86-speculation-Clean-up-various-Spectre-related-det.patch +patch 0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch +patch 0007-x86-speculation-Add-asm-msr-index.h-dependency.patch +patch 0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch +patch 0009-x86-spectre-Fix-an-error-message.patch +patch 0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch +patch 0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch +patch 0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch +patch 0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch +patch 0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch +patch 0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch +patch 0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch +patch 0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch +patch 0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch +patch 0007-bpf-x64-implement-retpoline-for-tail-call.patch +patch 0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch +patch 0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch +patch 0010-x86-speculation-Use-IBRS-if-available-before-calling.patch +patch 0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch +patch 0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch +patch 0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch +patch 0014-x86-speculation-Move-firmware_restrict_branch_specul.patch diff --git a/common/recipes-kernel/linux/linux-yocto_4.9.bbappend b/common/recipes-kernel/linux/linux-yocto_4.9.bbappend new file mode 100644 index 00000000..783a4aba --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto_4.9.bbappend @@ -0,0 +1,14 @@ +FILESEXTRAPATHS_prepend := "${THISDIR}/${PN}-${LINUX_VERSION}:" + +PR := "${INC_PR}.1" + +KMACHINE_amdx86 ?= "common-pc-64" +KBRANCH_amdx86 ?= "standard/base" + +SRCREV_machine_amdx86 ?= "81055b89bd32414ecaf95156ce9a5fa6643e530a" +SRCREV_meta_amdx86 ?= "803b8d600e45afa0375459bf599fe365571a3866" +LINUX_VERSION_amdx86 ?= "4.9.21" + +SRC_URI_append_amdx86 = " file://upstream-backports.scc" + +KERNEL_FEATURES_append_amdx86 = " cfg/smp.scc" |