diff options
313 files changed, 0 insertions, 49933 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch deleted file mode 100644 index 9772c5f8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch +++ /dev/null @@ -1,165 +0,0 @@ -From af0e9ccc133f03f5150a7afba349a9f50897f793 Mon Sep 17 00:00:00 2001 -From: Wanpeng Li <wanpeng.li@hotmail.com> -Date: Thu, 14 Dec 2017 17:40:50 -0800 -Subject: [PATCH 01/33] KVM: Fix stack-out-of-bounds read in write_mmio -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit e39d200fa5bf5b94a0948db0dae44c1b73b84a56 upstream. - -Reported by syzkaller: - - BUG: KASAN: stack-out-of-bounds in write_mmio+0x11e/0x270 [kvm] - Read of size 8 at addr ffff8803259df7f8 by task syz-executor/32298 - - CPU: 6 PID: 32298 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #18 - Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016 - Call Trace: - dump_stack+0xab/0xe1 - print_address_description+0x6b/0x290 - kasan_report+0x28a/0x370 - write_mmio+0x11e/0x270 [kvm] - emulator_read_write_onepage+0x311/0x600 [kvm] - emulator_read_write+0xef/0x240 [kvm] - emulator_fix_hypercall+0x105/0x150 [kvm] - em_hypercall+0x2b/0x80 [kvm] - x86_emulate_insn+0x2b1/0x1640 [kvm] - x86_emulate_instruction+0x39a/0xb90 [kvm] - handle_exception+0x1b4/0x4d0 [kvm_intel] - vcpu_enter_guest+0x15a0/0x2640 [kvm] - kvm_arch_vcpu_ioctl_run+0x549/0x7d0 [kvm] - kvm_vcpu_ioctl+0x479/0x880 [kvm] - do_vfs_ioctl+0x142/0x9a0 - SyS_ioctl+0x74/0x80 - entry_SYSCALL_64_fastpath+0x23/0x9a - -The path of patched vmmcall will patch 3 bytes opcode 0F 01 C1(vmcall) -to the guest memory, however, write_mmio tracepoint always prints 8 bytes -through *(u64 *)val since kvm splits the mmio access into 8 bytes. This -leaks 5 bytes from the kernel stack (CVE-2017-17741). This patch fixes -it by just accessing the bytes which we operate on. - -Before patch: - -syz-executor-5567 [007] .... 51370.561696: kvm_mmio: mmio write len 3 gpa 0x10 val 0x1ffff10077c1010f - -After patch: - -syz-executor-13416 [002] .... 51302.299573: kvm_mmio: mmio write len 3 gpa 0x10 val 0xc1010f - -Reported-by: Dmitry Vyukov <dvyukov@google.com> -Reviewed-by: Darren Kenny <darren.kenny@oracle.com> -Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> -Tested-by: Marc Zyngier <marc.zyngier@arm.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: Marc Zyngier <marc.zyngier@arm.com> -Cc: Christoffer Dall <christoffer.dall@linaro.org> -Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/arm/kvm/mmio.c | 6 +++--- - arch/x86/kvm/x86.c | 8 ++++---- - include/trace/events/kvm.h | 7 +++++-- - 3 files changed, 12 insertions(+), 9 deletions(-) - -diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c -index b6e715f..dac7ceb 100644 ---- a/arch/arm/kvm/mmio.c -+++ b/arch/arm/kvm/mmio.c -@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) - } - - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, -- data); -+ &data); - data = vcpu_data_host_to_guest(vcpu, data, len); - vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); - } -@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), - len); - -- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); -+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); - kvm_mmio_write_buf(data_buf, len, data); - - ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, - data_buf); - } else { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, -- fault_ipa, 0); -+ fault_ipa, NULL); - - ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, - data_buf); -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 51a700a..9cc9117 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -4242,7 +4242,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) - addr, n, v)) - && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) - break; -- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); -+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); - handled += n; - addr += n; - len -= n; -@@ -4495,7 +4495,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) - { - if (vcpu->mmio_read_completed) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, -- vcpu->mmio_fragments[0].gpa, *(u64 *)val); -+ vcpu->mmio_fragments[0].gpa, val); - vcpu->mmio_read_completed = 0; - return 1; - } -@@ -4517,14 +4517,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, - - static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) - { -- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); -+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); - return vcpu_mmio_write(vcpu, gpa, bytes, val); - } - - static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, - void *val, int bytes) - { -- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); -+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); - return X86EMUL_IO_NEEDED; - } - -diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h -index 8ade3eb..90fce4d 100644 ---- a/include/trace/events/kvm.h -+++ b/include/trace/events/kvm.h -@@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq, - { KVM_TRACE_MMIO_WRITE, "write" } - - TRACE_EVENT(kvm_mmio, -- TP_PROTO(int type, int len, u64 gpa, u64 val), -+ TP_PROTO(int type, int len, u64 gpa, void *val), - TP_ARGS(type, len, gpa, val), - - TP_STRUCT__entry( -@@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio, - __entry->type = type; - __entry->len = len; - __entry->gpa = gpa; -- __entry->val = val; -+ __entry->val = 0; -+ if (val) -+ memcpy(&__entry->val, val, -+ min_t(u32, sizeof(__entry->val), len)); - ), - - TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch deleted file mode 100644 index 64e0004b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 122fd9dfb506c08b0a3093d6da080983cdf91e32 Mon Sep 17 00:00:00 2001 -From: Ben Hutchings <ben@decadent.org.uk> -Date: Tue, 12 Jun 2018 01:14:34 +0100 -Subject: [PATCH 01/10] KVM: VMX: Expose SSBD properly to guests, 4.9 - supplement - -Fix an additional misuse of X86_FEATURE_SSBD in -guest_cpuid_has_spec_ctrl(). This function was introduced in the -backport of SSBD support to 4.9 and is not present upstream, so it was -not fixed by commit 43462d908821 "KVM: VMX: Expose SSBD properly to -guests." - -Fixes: 52817587e706 ("x86/cpufeatures: Disentangle SSBD enumeration") -Signed-off-by: Ben Hutchings <ben@decadent.org.uk> -Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: kvm@vger.kernel.org -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/cpuid.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index d22695c..cf503df 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -171,7 +171,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) - if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) - return true; - best = kvm_find_cpuid_entry(vcpu, 7, 0); -- return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); -+ return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SPEC_CTRL_SSBD))); - } - - static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-compile-error-without-vsyscall.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-compile-error-without-vsyscall.patch deleted file mode 100644 index bb09930a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-compile-error-without-vsyscall.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 0f1e01960c3e082feac098be5b754ad3e06c820a Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Tue, 13 Feb 2018 16:45:20 +0100 -Subject: [PATCH 01/12] kaiser: fix compile error without vsyscall -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Tobias noticed a compile error on 4.4.115, and it's the same on 4.9.80: -arch/x86/mm/kaiser.c: In function ‘kaiser_init’: -arch/x86/mm/kaiser.c:348:8: error: ‘vsyscall_pgprot’ undeclared - (first use in this function) - -It seems like his combination of kernel options doesn't work for KAISER. -X86_VSYSCALL_EMULATION is not set on his system, while LEGACY_VSYSCALL -is set to NONE (LEGACY_VSYSCALL_NONE=y). He managed to get things -compiling again, by moving the 'extern unsigned long vsyscall_pgprot' -outside of the preprocessor statement. This works because the optimizer -removes that code (vsyscall_enabled() is always false) - and that's how -it was done in some older backports. - -Reported-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/vsyscall.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h -index 9ee8506..62210da 100644 ---- a/arch/x86/include/asm/vsyscall.h -+++ b/arch/x86/include/asm/vsyscall.h -@@ -13,7 +13,6 @@ extern void map_vsyscall(void); - */ - extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); - extern bool vsyscall_enabled(void); --extern unsigned long vsyscall_pgprot; - #else - static inline void map_vsyscall(void) {} - static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) -@@ -22,5 +21,6 @@ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) - } - static inline bool vsyscall_enabled(void) { return false; } - #endif -+extern unsigned long vsyscall_pgprot; - - #endif /* _ASM_X86_VSYSCALL_H */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-intel_bts-perf-crashes.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-intel_bts-perf-crashes.patch deleted file mode 100644 index 3e53e978..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-kaiser-fix-intel_bts-perf-crashes.patch +++ /dev/null @@ -1,135 +0,0 @@ -From f07b0b948b09b02e7386560ad509d1afdbd6ef0b Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Mon, 29 Jan 2018 18:16:55 -0800 -Subject: [PATCH 01/42] kaiser: fix intel_bts perf crashes -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Vince reported perf_fuzzer quickly locks up on 4.15-rc7 with PTI; -Robert reported Bad RIP with KPTI and Intel BTS also on 4.15-rc7: -honggfuzz -f /tmp/somedirectorywithatleastonefile \ - --linux_perf_bts_edge -s -- /bin/true -(honggfuzz from https://github.com/google/honggfuzz) crashed with -BUG: unable to handle kernel paging request at ffff9d3215100000 -(then narrowed it down to -perf record --per-thread -e intel_bts//u -- /bin/ls). - -The intel_bts driver does not use the 'normal' BTS buffer which is -exposed through kaiser_add_mapping(), but instead uses the memory -allocated for the perf AUX buffer. - -This obviously comes apart when using PTI, because then the kernel -mapping, which includes that AUX buffer memory, disappears while -switched to user page tables. - -Easily fixed in old-Kaiser backports, by applying kaiser_add_mapping() -to those pages; perhaps not so easy for upstream, where 4.15-rc8 commit -99a9dc98ba52 ("x86,perf: Disable intel_bts when PTI") disables for now. - -Slightly reorganized surrounding code in bts_buffer_setup_aux(), -so it can better match bts_buffer_free_aux(): free_aux with an #ifdef -to avoid the loop when PTI is off, but setup_aux needs to loop anyway -(and kaiser_add_mapping() is cheap when PTI config is off or "pti=off"). - -Reported-by: Vince Weaver <vincent.weaver@maine.edu> -Reported-by: Robert Święcki <robert@swiecki.net> -Analyzed-by: Peter Zijlstra <peterz@infradead.org> -Analyzed-by: Stephane Eranian <eranian@google.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Ingo Molnar <mingo@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Vince Weaver <vince@deater.net> -Cc: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/events/intel/bts.c | 44 +++++++++++++++++++++++++++++++++----------- - 1 file changed, 33 insertions(+), 11 deletions(-) - -diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c -index 982c9e3..21298c1 100644 ---- a/arch/x86/events/intel/bts.c -+++ b/arch/x86/events/intel/bts.c -@@ -22,6 +22,7 @@ - #include <linux/debugfs.h> - #include <linux/device.h> - #include <linux/coredump.h> -+#include <linux/kaiser.h> - - #include <asm-generic/sizes.h> - #include <asm/perf_event.h> -@@ -77,6 +78,23 @@ static size_t buf_size(struct page *page) - return 1 << (PAGE_SHIFT + page_private(page)); - } - -+static void bts_buffer_free_aux(void *data) -+{ -+#ifdef CONFIG_PAGE_TABLE_ISOLATION -+ struct bts_buffer *buf = data; -+ int nbuf; -+ -+ for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { -+ struct page *page = buf->buf[nbuf].page; -+ void *kaddr = page_address(page); -+ size_t page_size = buf_size(page); -+ -+ kaiser_remove_mapping((unsigned long)kaddr, page_size); -+ } -+#endif -+ kfree(data); -+} -+ - static void * - bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) - { -@@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) - buf->real_size = size - size % BTS_RECORD_SIZE; - - for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { -- unsigned int __nr_pages; -+ void *kaddr = pages[pg]; -+ size_t page_size; -+ -+ page = virt_to_page(kaddr); -+ page_size = buf_size(page); -+ -+ if (kaiser_add_mapping((unsigned long)kaddr, -+ page_size, __PAGE_KERNEL) < 0) { -+ buf->nr_bufs = nbuf; -+ bts_buffer_free_aux(buf); -+ return NULL; -+ } - -- page = virt_to_page(pages[pg]); -- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; - buf->buf[nbuf].page = page; - buf->buf[nbuf].offset = offset; - buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); -- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; -+ buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; - pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; - buf->buf[nbuf].size -= pad; - -- pg += __nr_pages; -- offset += __nr_pages << PAGE_SHIFT; -+ pg += page_size >> PAGE_SHIFT; -+ offset += page_size; - } - - return buf; - } - --static void bts_buffer_free_aux(void *data) --{ -- kfree(data); --} -- - static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) - { - return buf->buf[idx].offset + buf->buf[idx].displacement; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch deleted file mode 100644 index 50c1ddb6..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch +++ /dev/null @@ -1,183 +0,0 @@ -From 97be262ca58e09fd46568b01a7643a244903ae21 Mon Sep 17 00:00:00 2001 -From: Tom Lendacky <thomas.lendacky@amd.com> -Date: Mon, 17 Jul 2017 16:10:33 -0500 -Subject: [PATCH 001/103] x86/boot: Add early cmdline parsing for options with - arguments -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit e505371dd83963caae1a37ead9524e8d997341be upstream. - -Add a cmdline_find_option() function to look for cmdline options that -take arguments. The argument is returned in a supplied buffer and the -argument length (regardless of whether it fits in the supplied buffer) -is returned, with -1 indicating not found. - -Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Alexander Potapenko <glider@google.com> -Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arnd Bergmann <arnd@arndb.de> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brijesh Singh <brijesh.singh@amd.com> -Cc: Dave Young <dyoung@redhat.com> -Cc: Dmitry Vyukov <dvyukov@google.com> -Cc: Jonathan Corbet <corbet@lwn.net> -Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Larry Woodman <lwoodman@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Matt Fleming <matt@codeblueprint.co.uk> -Cc: Michael S. Tsirkin <mst@redhat.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: Rik van Riel <riel@redhat.com> -Cc: Toshimitsu Kani <toshi.kani@hpe.com> -Cc: kasan-dev@googlegroups.com -Cc: kvm@vger.kernel.org -Cc: linux-arch@vger.kernel.org -Cc: linux-doc@vger.kernel.org -Cc: linux-efi@vger.kernel.org -Cc: linux-mm@kvack.org -Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cmdline.h | 2 + - arch/x86/lib/cmdline.c | 105 +++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 107 insertions(+) - -diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h -index e01f7f7..84ae170 100644 ---- a/arch/x86/include/asm/cmdline.h -+++ b/arch/x86/include/asm/cmdline.h -@@ -2,5 +2,7 @@ - #define _ASM_X86_CMDLINE_H - - int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); -+int cmdline_find_option(const char *cmdline_ptr, const char *option, -+ char *buffer, int bufsize); - - #endif /* _ASM_X86_CMDLINE_H */ -diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c -index 5cc78bf..3261abb 100644 ---- a/arch/x86/lib/cmdline.c -+++ b/arch/x86/lib/cmdline.c -@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, - return 0; /* Buffer overrun */ - } - -+/* -+ * Find a non-boolean option (i.e. option=argument). In accordance with -+ * standard Linux practice, if this option is repeated, this returns the -+ * last instance on the command line. -+ * -+ * @cmdline: the cmdline string -+ * @max_cmdline_size: the maximum size of cmdline -+ * @option: option string to look for -+ * @buffer: memory buffer to return the option argument -+ * @bufsize: size of the supplied memory buffer -+ * -+ * Returns the length of the argument (regardless of if it was -+ * truncated to fit in the buffer), or -1 on not found. -+ */ -+static int -+__cmdline_find_option(const char *cmdline, int max_cmdline_size, -+ const char *option, char *buffer, int bufsize) -+{ -+ char c; -+ int pos = 0, len = -1; -+ const char *opptr = NULL; -+ char *bufptr = buffer; -+ enum { -+ st_wordstart = 0, /* Start of word/after whitespace */ -+ st_wordcmp, /* Comparing this word */ -+ st_wordskip, /* Miscompare, skip */ -+ st_bufcpy, /* Copying this to buffer */ -+ } state = st_wordstart; -+ -+ if (!cmdline) -+ return -1; /* No command line */ -+ -+ /* -+ * This 'pos' check ensures we do not overrun -+ * a non-NULL-terminated 'cmdline' -+ */ -+ while (pos++ < max_cmdline_size) { -+ c = *(char *)cmdline++; -+ if (!c) -+ break; -+ -+ switch (state) { -+ case st_wordstart: -+ if (myisspace(c)) -+ break; -+ -+ state = st_wordcmp; -+ opptr = option; -+ /* fall through */ -+ -+ case st_wordcmp: -+ if ((c == '=') && !*opptr) { -+ /* -+ * We matched all the way to the end of the -+ * option we were looking for, prepare to -+ * copy the argument. -+ */ -+ len = 0; -+ bufptr = buffer; -+ state = st_bufcpy; -+ break; -+ } else if (c == *opptr++) { -+ /* -+ * We are currently matching, so continue -+ * to the next character on the cmdline. -+ */ -+ break; -+ } -+ state = st_wordskip; -+ /* fall through */ -+ -+ case st_wordskip: -+ if (myisspace(c)) -+ state = st_wordstart; -+ break; -+ -+ case st_bufcpy: -+ if (myisspace(c)) { -+ state = st_wordstart; -+ } else { -+ /* -+ * Increment len, but don't overrun the -+ * supplied buffer and leave room for the -+ * NULL terminator. -+ */ -+ if (++len < bufsize) -+ *bufptr++ = c; -+ } -+ break; -+ } -+ } -+ -+ if (bufsize) -+ *bufptr = '\0'; -+ -+ return len; -+} -+ - int cmdline_find_option_bool(const char *cmdline, const char *option) - { - return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); - } -+ -+int cmdline_find_option(const char *cmdline, const char *option, char *buffer, -+ int bufsize) -+{ -+ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, -+ buffer, bufsize); -+} --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch deleted file mode 100644 index db1a2245..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 0b113edb84e5133f4844eeec2889faced402a41c Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sat, 22 Apr 2017 00:01:20 -0700 -Subject: [PATCH 01/14] x86/mm: Remove flush_tlb() and flush_tlb_current_task() - -commit 29961b59a51f8c6838a26a45e871a7ed6771809b upstream. - -I was trying to figure out what how flush_tlb_current_task() would -possibly work correctly if current->mm != current->active_mm, but I -realized I could spare myself the effort: it has no callers except -the unused flush_tlb() macro. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Michal Hocko <mhocko@suse.com> -Cc: Nadav Amit <namit@vmware.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/e52d64c11690f85e9f1d69d7b48cc2269cd2e94b.1492844372.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/tlbflush.h | 9 --------- - arch/x86/mm/tlb.c | 17 ----------------- - 2 files changed, 26 deletions(-) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 183af59..db8952a 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -261,7 +261,6 @@ static inline void __flush_tlb_one(unsigned long addr) - /* - * TLB flushing: - * -- * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page -@@ -293,11 +292,6 @@ static inline void flush_tlb_all(void) - __flush_tlb_all(); - } - --static inline void flush_tlb(void) --{ -- __flush_tlb_up(); --} -- - static inline void local_flush_tlb(void) - { - __flush_tlb_up(); -@@ -359,14 +353,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, - flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) - - extern void flush_tlb_all(void); --extern void flush_tlb_current_task(void); - extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long vmflag); - extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); - --#define flush_tlb() flush_tlb_current_task() -- - void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, unsigned long end); -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 0cf44ac..c045051 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -320,23 +320,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, - smp_call_function_many(cpumask, flush_tlb_func, &info, 1); - } - --void flush_tlb_current_task(void) --{ -- struct mm_struct *mm = current->mm; -- -- preempt_disable(); -- -- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); -- -- /* This is an implicit full barrier that synchronizes with switch_mm. */ -- local_flush_tlb(); -- -- trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); -- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) -- flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); -- preempt_enable(); --} -- - /* - * See Documentation/x86/tlb.txt for details. We choose 33 - * because it is large enough to cover the vast majority (at --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-paravirt-objtool-Annotate-indirect-calls.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-paravirt-objtool-Annotate-indirect-calls.patch deleted file mode 100644 index fddb3346..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-paravirt-objtool-Annotate-indirect-calls.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 8b18def6a2da1b716f49fad6744a41c94d31a2c5 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Wed, 17 Jan 2018 16:58:11 +0100 -Subject: [PATCH 01/93] x86/paravirt, objtool: Annotate indirect calls - -commit 3010a0663fd949d122eca0561b06b0a9453f7866 upstream. - -Paravirt emits indirect calls which get flagged by objtool retpoline -checks, annotate it away because all these indirect calls will be -patched out before we start userspace. - -This patching happens through alternative_instructions() -> -apply_paravirt() -> pv_init_ops.patch() which will eventually end up -in paravirt_patch_default(). This function _will_ write direct -alternatives. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/paravirt.h | 16 ++++++++++++---- - arch/x86/include/asm/paravirt_types.h | 5 ++++- - 2 files changed, 16 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h -index ce93281..24af8b1 100644 ---- a/arch/x86/include/asm/paravirt.h -+++ b/arch/x86/include/asm/paravirt.h -@@ -6,6 +6,7 @@ - #ifdef CONFIG_PARAVIRT - #include <asm/pgtable_types.h> - #include <asm/asm.h> -+#include <asm/nospec-branch.h> - - #include <asm/paravirt_types.h> - -@@ -869,23 +870,27 @@ extern void default_banner(void); - - #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ -- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) -+ ANNOTATE_RETPOLINE_SAFE; \ -+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) - - #define DISABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ -+ ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) - - #define ENABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ -+ ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) - - #ifdef CONFIG_X86_32 - #define GET_CR0_INTO_EAX \ - push %ecx; push %edx; \ -+ ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ - pop %edx; pop %ecx - #else /* !CONFIG_X86_32 */ -@@ -907,11 +912,13 @@ extern void default_banner(void); - */ - #define SWAPGS \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ -- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ -+ ANNOTATE_RETPOLINE_SAFE; \ -+ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ - ) - - #define GET_CR2_INTO_RAX \ -- call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) -+ ANNOTATE_RETPOLINE_SAFE; \ -+ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); - - #define PARAVIRT_ADJUST_EXCEPTION_FRAME \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ -@@ -921,7 +928,8 @@ extern void default_banner(void); - #define USERGS_SYSRET64 \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ - CLBR_NONE, \ -- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) -+ ANNOTATE_RETPOLINE_SAFE; \ -+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) - #endif /* CONFIG_X86_32 */ - - #endif /* __ASSEMBLY__ */ -diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h -index 0f400c0..04b7971 100644 ---- a/arch/x86/include/asm/paravirt_types.h -+++ b/arch/x86/include/asm/paravirt_types.h -@@ -42,6 +42,7 @@ - #include <asm/desc_defs.h> - #include <asm/kmap_types.h> - #include <asm/pgtable_types.h> -+#include <asm/nospec-branch.h> - - struct page; - struct thread_struct; -@@ -391,7 +392,9 @@ int paravirt_disable_iospace(void); - * offset into the paravirt_patch_template structure, and can therefore be - * freely converted back into a structure offset. - */ --#define PARAVIRT_CALL "call *%c[paravirt_opptr];" -+#define PARAVIRT_CALL \ -+ ANNOTATE_RETPOLINE_SAFE \ -+ "call *%c[paravirt_opptr];" - - /* - * These macros are intended to wrap calls through one of the paravirt --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-complete-e390f9a-port-for-v4.9.106.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-complete-e390f9a-port-for-v4.9.106.patch deleted file mode 100644 index dbde0c07..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-complete-e390f9a-port-for-v4.9.106.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 22510b00481d95adc62292797fe98fbfe215a649 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philip=20M=C3=BCller?= <philm@manjaro.org> -Date: Sat, 9 Jun 2018 13:42:05 +0200 -Subject: [PATCH 02/10] complete e390f9a port for v4.9.106 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -objtool ports introduced in v4.9.106 were not totally complete. Therefore -they resulted in issues like: - - module: overflow in relocation type 10 val XXXXXXXXXXX - ‘usbcore’ likely not compiled with -mcmodel=kernel - module: overflow in relocation type 10 val XXXXXXXXXXX - ‘scsi_mod’ likely not compiled with -mcmodel=kernel - -Missing part was the complete backport of commit e390f9a. - -Original notes by Josh Poimboeuf: - -The '__unreachable' and '__func_stack_frame_non_standard' sections are -only used at compile time. They're discarded for vmlinux but they -should also be discarded for modules. - -Since this is a recurring pattern, prefix the section names with -".discard.". It's a nice convention and vmlinux.lds.h already discards -such sections. - -Also remove the 'a' (allocatable) flag from the __unreachable section -since it doesn't make sense for a discarded section. - -Signed-off-by: Philip Müller <philm@manjaro.org> -Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") -Link: https://gitlab.manjaro.org/packages/core/linux49/issues/2 -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/vmlinux.lds.S | 2 -- - include/linux/compiler-gcc.h | 2 +- - 2 files changed, 1 insertion(+), 3 deletions(-) - -diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S -index 4ef267f..e783a5d 100644 ---- a/arch/x86/kernel/vmlinux.lds.S -+++ b/arch/x86/kernel/vmlinux.lds.S -@@ -352,8 +352,6 @@ SECTIONS - DISCARDS - /DISCARD/ : { - *(.eh_frame) -- *(__func_stack_frame_non_standard) -- *(__unreachable) - } - } - -diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h -index b69d102..b62cfb9 100644 ---- a/include/linux/compiler-gcc.h -+++ b/include/linux/compiler-gcc.h -@@ -202,7 +202,7 @@ - #ifdef CONFIG_STACK_VALIDATION - #define annotate_unreachable() ({ \ - asm("1:\t\n" \ -- ".pushsection __unreachable, \"a\"\t\n" \ -+ ".pushsection .discard.unreachable\t\n" \ - ".long 1b\t\n" \ - ".popsection\t\n"); \ - }) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch deleted file mode 100644 index 406a79d3..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 1cd771013c357075c745f99419bdaf31503c5a51 Mon Sep 17 00:00:00 2001 -From: Jim Mattson <jmattson@google.com> -Date: Wed, 3 Jan 2018 14:31:38 -0800 -Subject: [PATCH 02/33] kvm: vmx: Scrub hardware GPRs at VM-exit - -commit 0cb5b30698fdc8f6b4646012e3acb4ddce430788 upstream. - -Guest GPR values are live in the hardware GPRs at VM-exit. Do not -leave any guest values in hardware GPRs after the guest GPR values are -saved to the vcpu_vmx structure. - -This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753. -Specifically, it defeats the Project Zero PoC for CVE 2017-5715. - -Suggested-by: Eric Northup <digitaleric@google.com> -Signed-off-by: Jim Mattson <jmattson@google.com> -Reviewed-by: Eric Northup <digitaleric@google.com> -Reviewed-by: Benjamin Serebrin <serebrin@google.com> -Reviewed-by: Andrew Honig <ahonig@google.com> -[Paolo: Add AMD bits, Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>] -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/svm.c | 19 +++++++++++++++++++ - arch/x86/kvm/vmx.c | 14 +++++++++++++- - 2 files changed, 32 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 975ea99..491f077 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -4858,6 +4858,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - "mov %%r14, %c[r14](%[svm]) \n\t" - "mov %%r15, %c[r15](%[svm]) \n\t" - #endif -+ /* -+ * Clear host registers marked as clobbered to prevent -+ * speculative use. -+ */ -+ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" -+ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" -+ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" -+ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" -+ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" -+#ifdef CONFIG_X86_64 -+ "xor %%r8, %%r8 \n\t" -+ "xor %%r9, %%r9 \n\t" -+ "xor %%r10, %%r10 \n\t" -+ "xor %%r11, %%r11 \n\t" -+ "xor %%r12, %%r12 \n\t" -+ "xor %%r13, %%r13 \n\t" -+ "xor %%r14, %%r14 \n\t" -+ "xor %%r15, %%r15 \n\t" -+#endif - "pop %%" _ASM_BP - : - : [svm]"a"(svm), -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 4ead27f..91ae4e2 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -8932,6 +8932,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - /* Save guest registers, load host registers, keep flags */ - "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" - "pop %0 \n\t" -+ "setbe %c[fail](%0)\n\t" - "mov %%" _ASM_AX ", %c[rax](%0) \n\t" - "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" - __ASM_SIZE(pop) " %c[rcx](%0) \n\t" -@@ -8948,12 +8949,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - "mov %%r13, %c[r13](%0) \n\t" - "mov %%r14, %c[r14](%0) \n\t" - "mov %%r15, %c[r15](%0) \n\t" -+ "xor %%r8d, %%r8d \n\t" -+ "xor %%r9d, %%r9d \n\t" -+ "xor %%r10d, %%r10d \n\t" -+ "xor %%r11d, %%r11d \n\t" -+ "xor %%r12d, %%r12d \n\t" -+ "xor %%r13d, %%r13d \n\t" -+ "xor %%r14d, %%r14d \n\t" -+ "xor %%r15d, %%r15d \n\t" - #endif - "mov %%cr2, %%" _ASM_AX " \n\t" - "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" - -+ "xor %%eax, %%eax \n\t" -+ "xor %%ebx, %%ebx \n\t" -+ "xor %%esi, %%esi \n\t" -+ "xor %%edi, %%edi \n\t" - "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" -- "setbe %c[fail](%0) \n\t" - ".pushsection .rodata \n\t" - ".global vmx_return \n\t" - "vmx_return: " _ASM_PTR " 2b \n\t" --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch deleted file mode 100644 index 1006a947..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 5b4a083e3f13f1bbea53075da6dc33b1e9dc3b62 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 5 Feb 2018 17:18:17 -0800 -Subject: [PATCH 02/12] x86/entry/64/compat: Clear registers for compat - syscalls, to reduce speculation attack surface - -commit 6b8cf5cc9965673951f1ab3f0e3cf23d06e3e2ee upstream. - -At entry userspace may have populated registers with values that could -otherwise be useful in a speculative execution attack. Clear them to -minimize the kernel's attack surface. - -Originally-From: Andi Kleen <ak@linux.intel.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Cc: <stable@vger.kernel.org> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/151787989697.7847.4083702787288600552.stgit@dwillia2-desk3.amr.corp.intel.com -[ Made small improvements to the changelog. ] -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64_compat.S | 30 ++++++++++++++++++++++++++++++ - 1 file changed, 30 insertions(+) - -diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S -index d76a976..92c5573 100644 ---- a/arch/x86/entry/entry_64_compat.S -+++ b/arch/x86/entry/entry_64_compat.S -@@ -83,15 +83,25 @@ ENTRY(entry_SYSENTER_compat) - pushq %rcx /* pt_regs->cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ -+ xorq %r8, %r8 /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ -+ xorq %r9, %r9 /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ -+ xorq %r10, %r10 /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ -+ xorq %r11, %r11 /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ -+ xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp (will be overwritten) */ -+ xorl %ebp, %ebp /* nospec rbp */ - pushq $0 /* pt_regs->r12 = 0 */ -+ xorq %r12, %r12 /* nospec r12 */ - pushq $0 /* pt_regs->r13 = 0 */ -+ xorq %r13, %r13 /* nospec r13 */ - pushq $0 /* pt_regs->r14 = 0 */ -+ xorq %r14, %r14 /* nospec r14 */ - pushq $0 /* pt_regs->r15 = 0 */ -+ xorq %r15, %r15 /* nospec r15 */ - cld - - /* -@@ -209,15 +219,25 @@ ENTRY(entry_SYSCALL_compat) - pushq %rbp /* pt_regs->cx (stashed in bp) */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ -+ xorq %r8, %r8 /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ -+ xorq %r9, %r9 /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ -+ xorq %r10, %r10 /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ -+ xorq %r11, %r11 /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ -+ xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp (will be overwritten) */ -+ xorl %ebp, %ebp /* nospec rbp */ - pushq $0 /* pt_regs->r12 = 0 */ -+ xorq %r12, %r12 /* nospec r12 */ - pushq $0 /* pt_regs->r13 = 0 */ -+ xorq %r13, %r13 /* nospec r13 */ - pushq $0 /* pt_regs->r14 = 0 */ -+ xorq %r14, %r14 /* nospec r14 */ - pushq $0 /* pt_regs->r15 = 0 */ -+ xorq %r15, %r15 /* nospec r15 */ - - /* - * User mode is traced as though IRQs are on, and SYSENTER -@@ -320,15 +340,25 @@ ENTRY(entry_INT80_compat) - pushq %rcx /* pt_regs->cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ -+ xorq %r8, %r8 /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ -+ xorq %r9, %r9 /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ -+ xorq %r10, %r10 /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ -+ xorq %r11, %r11 /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ -+ xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp */ -+ xorl %ebp, %ebp /* nospec rbp */ - pushq %r12 /* pt_regs->r12 */ -+ xorq %r12, %r12 /* nospec r12 */ - pushq %r13 /* pt_regs->r13 */ -+ xorq %r13, %r13 /* nospec r13 */ - pushq %r14 /* pt_regs->r14 */ -+ xorq %r14, %r14 /* nospec r14 */ - pushq %r15 /* pt_regs->r15 */ -+ xorq %r15, %r15 /* nospec r15 */ - cld - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch deleted file mode 100644 index 545ec3ea..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch +++ /dev/null @@ -1,77 +0,0 @@ -From ec0d53f307bb0f6155e68ff262e9eb773dc99975 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Thu, 29 Jun 2017 08:53:20 -0700 -Subject: [PATCH 002/103] x86/mm: Add the 'nopcid' boot option to turn off PCID - -commit 0790c9aad84901ca1bdc14746175549c8b5da215 upstream. - -The parameter is only present on x86_64 systems to save a few bytes, -as PCID is always disabled on x86_32. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Reviewed-by: Nadav Amit <nadav.amit@gmail.com> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Mel Gorman <mgorman@suse.de> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: linux-mm@kvack.org -Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 2 ++ - arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++ - 2 files changed, 20 insertions(+) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 65b05ba..a303387 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -2785,6 +2785,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - nopat [X86] Disable PAT (page attribute table extension of - pagetables) support. - -+ nopcid [X86-64] Disable the PCID cpu feature. -+ - norandmaps Don't use address space randomization. Equivalent to - echo 0 > /proc/sys/kernel/randomize_va_space - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 4eece91..81c8a53 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -163,6 +163,24 @@ static int __init x86_mpx_setup(char *s) - } - __setup("nompx", x86_mpx_setup); - -+#ifdef CONFIG_X86_64 -+static int __init x86_pcid_setup(char *s) -+{ -+ /* require an exact match without trailing characters */ -+ if (strlen(s)) -+ return 0; -+ -+ /* do not emit a message if the feature is not present */ -+ if (!boot_cpu_has(X86_FEATURE_PCID)) -+ return 1; -+ -+ setup_clear_cpu_cap(X86_FEATURE_PCID); -+ pr_info("nopcid: PCID feature disabled\n"); -+ return 1; -+} -+__setup("nopcid", x86_pcid_setup); -+#endif -+ - static int __init x86_noinvpcid_setup(char *s) - { - /* noinvpcid doesn't accept parameters */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch deleted file mode 100644 index 125c9159..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch +++ /dev/null @@ -1,83 +0,0 @@ -From d7185b4bc1a4bb697f514e447697bd535979dac3 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sat, 22 Apr 2017 00:01:21 -0700 -Subject: [PATCH 02/14] x86/mm: Make flush_tlb_mm_range() more predictable - -commit ce27374fabf553153c3f53efcaa9bfab9216bd8c upstream. - -I'm about to rewrite the function almost completely, but first I -want to get a functional change out of the way. Currently, if -flush_tlb_mm_range() does not flush the local TLB at all, it will -never do individual page flushes on remote CPUs. This seems to be -an accident, and preserving it will be awkward. Let's change it -first so that any regressions in the rewrite will be easier to -bisect and so that the rewrite can attempt to change no visible -behavior at all. - -The fix is simple: we can simply avoid short-circuiting the -calculation of base_pages_to_flush. - -As a side effect, this also eliminates a potential corner case: if -tlb_single_page_flush_ceiling == TLB_FLUSH_ALL, flush_tlb_mm_range() -could have ended up flushing the entire address space one page at a -time. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Acked-by: Dave Hansen <dave.hansen@intel.com> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Michal Hocko <mhocko@suse.com> -Cc: Nadav Amit <namit@vmware.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/4b29b771d9975aad7154c314534fec235618175a.1492844372.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/tlb.c | 12 +++++++----- - 1 file changed, 7 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index c045051..2f9d41f 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -340,6 +340,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - unsigned long base_pages_to_flush = TLB_FLUSH_ALL; - - preempt_disable(); -+ -+ if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) -+ base_pages_to_flush = (end - start) >> PAGE_SHIFT; -+ if (base_pages_to_flush > tlb_single_page_flush_ceiling) -+ base_pages_to_flush = TLB_FLUSH_ALL; -+ - if (current->active_mm != mm) { - /* Synchronize with switch_mm. */ - smp_mb(); -@@ -356,15 +362,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - goto out; - } - -- if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) -- base_pages_to_flush = (end - start) >> PAGE_SHIFT; -- - /* - * Both branches below are implicit full barriers (MOV to CR or - * INVLPG) that synchronize with switch_mm. - */ -- if (base_pages_to_flush > tlb_single_page_flush_ceiling) { -- base_pages_to_flush = TLB_FLUSH_ALL; -+ if (base_pages_to_flush == TLB_FLUSH_ALL) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - local_flush_tlb(); - } else { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-module-Detect-and-skip-invalid-relocations.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-module-Detect-and-skip-invalid-relocations.patch deleted file mode 100644 index 3035344f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-module-Detect-and-skip-invalid-relocations.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 23f4b6492ade30e2f7fc21acfb162e46851cf0f0 Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Fri, 3 Nov 2017 07:58:54 -0500 -Subject: [PATCH 02/93] x86/module: Detect and skip invalid relocations - -commit eda9cec4c9a12208a6f69fbe68f72a6311d50032 upstream. - -There have been some cases where external tooling (e.g., kpatch-build) -creates a corrupt relocation which targets the wrong address. This is a -silent failure which can corrupt memory in unexpected places. - -On x86, the bytes of data being overwritten by relocations are always -initialized to zero beforehand. Use that knowledge to add sanity checks -to detect such cases before they corrupt memory. - -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: jeyu@kernel.org -Cc: live-patching@vger.kernel.org -Link: http://lkml.kernel.org/r/37450d6c6225e54db107fba447ce9e56e5f758e9.1509713553.git.jpoimboe@redhat.com -[ Restructured the messages, as it's unclear whether the relocation or the target is corrupted. ] -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Matthias Kaehlcke <mka@chromium.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/module.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c -index 477ae80..87f30a8 100644 ---- a/arch/x86/kernel/module.c -+++ b/arch/x86/kernel/module.c -@@ -171,19 +171,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, - case R_X86_64_NONE: - break; - case R_X86_64_64: -+ if (*(u64 *)loc != 0) -+ goto invalid_relocation; - *(u64 *)loc = val; - break; - case R_X86_64_32: -+ if (*(u32 *)loc != 0) -+ goto invalid_relocation; - *(u32 *)loc = val; - if (val != *(u32 *)loc) - goto overflow; - break; - case R_X86_64_32S: -+ if (*(s32 *)loc != 0) -+ goto invalid_relocation; - *(s32 *)loc = val; - if ((s64)val != *(s32 *)loc) - goto overflow; - break; - case R_X86_64_PC32: -+ if (*(u32 *)loc != 0) -+ goto invalid_relocation; - val -= (u64)loc; - *(u32 *)loc = val; - #if 0 -@@ -199,6 +207,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, - } - return 0; - -+invalid_relocation: -+ pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", -+ (int)ELF64_R_TYPE(rel[i].r_info), loc, val); -+ return -ENOEXEC; -+ - overflow: - pr_err("overflow in relocation type %d val %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), val); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch deleted file mode 100644 index 730dc7cc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 3474ee0a656102dc872ccffc8a80eeb87a9ce502 Mon Sep 17 00:00:00 2001 -From: Dave Hansen <dave.hansen@linux.intel.com> -Date: Mon, 29 Jan 2018 18:17:26 -0800 -Subject: [PATCH 02/42] x86/pti: Make unpoison of pgd for trusted boot work for - real - -commit 445b69e3b75e42362a5bdc13c8b8f61599e2228a upstream - -The inital fix for trusted boot and PTI potentially misses the pgd clearing -if pud_alloc() sets a PGD. It probably works in *practice* because for two -adjacent calls to map_tboot_page() that share a PGD entry, the first will -clear NX, *then* allocate and set the PGD (without NX clear). The second -call will *not* allocate but will clear the NX bit. - -Defer the NX clearing to a point after it is known that all top-level -allocations have occurred. Add a comment to clarify why. - -[ tglx: Massaged changelog ] - -[ hughd notes: I have not tested tboot, but this looks to me as necessary -and as safe in old-Kaiser backports as it is upstream; I'm not submitting -the commit-to-be-fixed 262b6b30087, since it was undone by 445b69e3b75e, -and makes conflict trouble because of 5-level's p4d versus 4-level's pgd.] - -Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled") -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> -Cc: Jon Masters <jcm@redhat.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: peterz@infradead.org -Cc: ning.sun@intel.com -Cc: tboot-devel@lists.sourceforge.net -Cc: andi@firstfloor.org -Cc: luto@kernel.org -Cc: law@redhat.com -Cc: pbonzini@redhat.com -Cc: torvalds@linux-foundation.org -Cc: gregkh@linux-foundation.org -Cc: dwmw@amazon.co.uk -Cc: nickc@redhat.com -Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com -Cc: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/tboot.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c -index 8402907..21454e2 100644 ---- a/arch/x86/kernel/tboot.c -+++ b/arch/x86/kernel/tboot.c -@@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, - return -1; - set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); - pte_unmap(pte); -+ -+ /* -+ * PTI poisons low addresses in the kernel page tables in the -+ * name of making them unusable for userspace. To execute -+ * code at such a low address, the poison must be cleared. -+ * -+ * Note: 'pgd' actually gets set in pud_alloc(). -+ */ -+ pgd->pgd &= ~_PAGE_NX; -+ - return 0; - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch deleted file mode 100644 index b53db2f4..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch +++ /dev/null @@ -1,45 +0,0 @@ -From ab442dfc820b6ebdbb1c135e6fad66130d44e5a8 Mon Sep 17 00:00:00 2001 -From: Andrew Honig <ahonig@google.com> -Date: Wed, 10 Jan 2018 10:12:03 -0800 -Subject: [PATCH 03/33] KVM: x86: Add memory barrier on vmcs field lookup - -commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream. - -This adds a memory barrier when performing a lookup into -the vmcs_field_to_offset_table. This is related to -CVE-2017-5753. - -Signed-off-by: Andrew Honig <ahonig@google.com> -Reviewed-by: Jim Mattson <jmattson@google.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 91ae4e2..ee766c2 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -858,8 +858,16 @@ static inline short vmcs_field_to_offset(unsigned long field) - { - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); - -- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || -- vmcs_field_to_offset_table[field] == 0) -+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) -+ return -ENOENT; -+ -+ /* -+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a -+ * generic mechanism. -+ */ -+ asm("lfence"); -+ -+ if (vmcs_field_to_offset_table[field] == 0) - return -ENOENT; - - return vmcs_field_to_offset_table[field]; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-introduce-linear_-read-write-_system.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-introduce-linear_-read-write-_system.patch deleted file mode 100644 index cb9af0b2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-introduce-linear_-read-write-_system.patch +++ /dev/null @@ -1,187 +0,0 @@ -From 9dd58f6cbef90d8a962b6365db32391f4a6ac4f9 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Wed, 6 Jun 2018 16:43:02 +0200 -Subject: [PATCH 03/10] KVM: x86: introduce linear_{read,write}_system - -commit 79367a65743975e5cac8d24d08eccc7fdae832b0 upstream. - -Wrap the common invocation of ctxt->ops->read_std and ctxt->ops->write_std, so -as to have a smaller patch when the functions grow another argument. - -Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12) -Cc: stable@vger.kernel.org -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/emulate.c | 64 +++++++++++++++++++++++++------------------------- - 1 file changed, 32 insertions(+), 32 deletions(-) - -diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c -index 6faac71..b6ec3e9 100644 ---- a/arch/x86/kvm/emulate.c -+++ b/arch/x86/kvm/emulate.c -@@ -802,6 +802,19 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) - return assign_eip_near(ctxt, ctxt->_eip + rel); - } - -+static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear, -+ void *data, unsigned size) -+{ -+ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); -+} -+ -+static int linear_write_system(struct x86_emulate_ctxt *ctxt, -+ ulong linear, void *data, -+ unsigned int size) -+{ -+ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); -+} -+ - static int segmented_read_std(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr, - void *data, -@@ -1500,8 +1513,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, - return emulate_gp(ctxt, index << 3 | 0x2); - - addr = dt.address + index * 8; -- return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, -- &ctxt->exception); -+ return linear_read_system(ctxt, addr, desc, sizeof *desc); - } - - static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, -@@ -1564,8 +1576,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, - if (rc != X86EMUL_CONTINUE) - return rc; - -- return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc), -- &ctxt->exception); -+ return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc)); - } - - /* allowed just for 8 bytes segments */ -@@ -1579,8 +1590,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, - if (rc != X86EMUL_CONTINUE) - return rc; - -- return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, -- &ctxt->exception); -+ return linear_write_system(ctxt, addr, desc, sizeof *desc); - } - - static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, -@@ -1741,8 +1751,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - return ret; - } - } else if (ctxt->mode == X86EMUL_MODE_PROT64) { -- ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, -- sizeof(base3), &ctxt->exception); -+ ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3)); - if (ret != X86EMUL_CONTINUE) - return ret; - if (is_noncanonical_address(get_desc_base(&seg_desc) | -@@ -2055,11 +2064,11 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) - eip_addr = dt.address + (irq << 2); - cs_addr = dt.address + (irq << 2) + 2; - -- rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); -+ rc = linear_read_system(ctxt, cs_addr, &cs, 2); - if (rc != X86EMUL_CONTINUE) - return rc; - -- rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); -+ rc = linear_read_system(ctxt, eip_addr, &eip, 2); - if (rc != X86EMUL_CONTINUE) - return rc; - -@@ -3018,35 +3027,30 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, u16 old_tss_sel, - ulong old_tss_base, struct desc_struct *new_desc) - { -- const struct x86_emulate_ops *ops = ctxt->ops; - struct tss_segment_16 tss_seg; - int ret; - u32 new_tss_base = get_desc_base(new_desc); - -- ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, -- &ctxt->exception); -+ ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); - if (ret != X86EMUL_CONTINUE) - return ret; - - save_state_to_tss16(ctxt, &tss_seg); - -- ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, -- &ctxt->exception); -+ ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); - if (ret != X86EMUL_CONTINUE) - return ret; - -- ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, -- &ctxt->exception); -+ ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); - if (ret != X86EMUL_CONTINUE) - return ret; - - if (old_tss_sel != 0xffff) { - tss_seg.prev_task_link = old_tss_sel; - -- ret = ops->write_std(ctxt, new_tss_base, -- &tss_seg.prev_task_link, -- sizeof tss_seg.prev_task_link, -- &ctxt->exception); -+ ret = linear_write_system(ctxt, new_tss_base, -+ &tss_seg.prev_task_link, -+ sizeof tss_seg.prev_task_link); - if (ret != X86EMUL_CONTINUE) - return ret; - } -@@ -3162,38 +3166,34 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, u16 old_tss_sel, - ulong old_tss_base, struct desc_struct *new_desc) - { -- const struct x86_emulate_ops *ops = ctxt->ops; - struct tss_segment_32 tss_seg; - int ret; - u32 new_tss_base = get_desc_base(new_desc); - u32 eip_offset = offsetof(struct tss_segment_32, eip); - u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector); - -- ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, -- &ctxt->exception); -+ ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); - if (ret != X86EMUL_CONTINUE) - return ret; - - save_state_to_tss32(ctxt, &tss_seg); - - /* Only GP registers and segment selectors are saved */ -- ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip, -- ldt_sel_offset - eip_offset, &ctxt->exception); -+ ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip, -+ ldt_sel_offset - eip_offset); - if (ret != X86EMUL_CONTINUE) - return ret; - -- ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, -- &ctxt->exception); -+ ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); - if (ret != X86EMUL_CONTINUE) - return ret; - - if (old_tss_sel != 0xffff) { - tss_seg.prev_task_link = old_tss_sel; - -- ret = ops->write_std(ctxt, new_tss_base, -- &tss_seg.prev_task_link, -- sizeof tss_seg.prev_task_link, -- &ctxt->exception); -+ ret = linear_write_system(ctxt, new_tss_base, -+ &tss_seg.prev_task_link, -+ sizeof tss_seg.prev_task_link); - if (ret != X86EMUL_CONTINUE) - return ret; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch deleted file mode 100644 index df60ee58..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch +++ /dev/null @@ -1,69 +0,0 @@ -From cff1c9cfd81b8a7cc350a02d37668b1e3896287e Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Mon, 29 Jan 2018 18:17:58 -0800 -Subject: [PATCH 03/42] kaiser: allocate pgd with order 0 when pti=off - -The 4.9.77 version of "x86/pti/efi: broken conversion from efi to kernel -page table" looked nicer than the 4.4.112 version, but was suboptimal on -machines booted with "pti=off" (or on AMD machines): it allocated pgd -with an order 1 page whatever the setting of kaiser_enabled. - -Fix that by moving the definition of PGD_ALLOCATION_ORDER from -asm/pgalloc.h to asm/pgtable.h, which already defines kaiser_enabled. - -Fixes: 1b92c48a2eeb ("x86/pti/efi: broken conversion from efi to kernel page table") -Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com> -Cc: Steven Sistare <steven.sistare@oracle.com> -Cc: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/pgalloc.h | 11 ----------- - arch/x86/include/asm/pgtable.h | 6 ++++++ - 2 files changed, 6 insertions(+), 11 deletions(-) - -diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h -index 1178a51..b6d4259 100644 ---- a/arch/x86/include/asm/pgalloc.h -+++ b/arch/x86/include/asm/pgalloc.h -@@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} - */ - extern gfp_t __userpte_alloc_gfp; - --#ifdef CONFIG_PAGE_TABLE_ISOLATION --/* -- * Instead of one PGD, we acquire two PGDs. Being order-1, it is -- * both 8k in size and 8k-aligned. That lets us just flip bit 12 -- * in a pointer to swap between the two 4k halves. -- */ --#define PGD_ALLOCATION_ORDER 1 --#else --#define PGD_ALLOCATION_ORDER 0 --#endif -- - /* - * Allocate and free page tables. - */ -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index 2536f90..5af0401 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -20,9 +20,15 @@ - - #ifdef CONFIG_PAGE_TABLE_ISOLATION - extern int kaiser_enabled; -+/* -+ * Instead of one PGD, we acquire two PGDs. Being order-1, it is -+ * both 8k in size and 8k-aligned. That lets us just flip bit 12 -+ * in a pointer to swap between the two 4k halves. -+ */ - #else - #define kaiser_enabled 0 - #endif -+#define PGD_ALLOCATION_ORDER kaiser_enabled - - void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); - void ptdump_walk_pgd_level_checkwx(void); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch deleted file mode 100644 index d1b9f3df..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch +++ /dev/null @@ -1,54 +0,0 @@ -From de05b6da8c54ed0aa2158ad3112ac582c88f0676 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Sun, 26 Mar 2017 23:51:24 +0200 -Subject: [PATCH 03/93] kvm/svm: Setup MCG_CAP on AMD properly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 74f169090b6f36b867c9df0454366dd9af6f62d1 ] - -MCG_CAP[63:9] bits are reserved on AMD. However, on an AMD guest, this -MSR returns 0x100010a. More specifically, bit 24 is set, which is simply -wrong. That bit is MCG_SER_P and is present only on Intel. Thus, clean -up the reserved bits in order not to confuse guests. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Cc: Joerg Roedel <joro@8bytes.org> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/svm.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index b82bb66..2d96e30 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -5437,6 +5437,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) - avic_handle_ldr_update(vcpu); - } - -+static void svm_setup_mce(struct kvm_vcpu *vcpu) -+{ -+ /* [63:9] are reserved. */ -+ vcpu->arch.mcg_cap &= 0x1ff; -+} -+ - static struct kvm_x86_ops svm_x86_ops __ro_after_init = { - .cpu_has_kvm_support = has_svm, - .disabled_by_bios = is_disabled, -@@ -5552,6 +5558,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { - .pmu_ops = &amd_pmu_ops, - .deliver_posted_interrupt = svm_deliver_avic_intr, - .update_pi_irte = svm_update_pi_irte, -+ .setup_mce = svm_setup_mce, - }; - - static int __init svm_init(void) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch deleted file mode 100644 index 78e29b3c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 387470df93a2da429be36b0f31af62bf38cd17bc Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Thu, 29 Jun 2017 08:53:21 -0700 -Subject: [PATCH 003/103] x86/mm: Enable CR4.PCIDE on supported systems - -commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream. - -We can use PCID if the CPU has PCID and PGE and we're not on Xen. - -By itself, this has no effect. A followup patch will start using PCID. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Reviewed-by: Nadav Amit <nadav.amit@gmail.com> -Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Juergen Gross <jgross@suse.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Mel Gorman <mgorman@suse.de> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: linux-mm@kvack.org -Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/tlbflush.h | 8 ++++++++ - arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++ - arch/x86/xen/enlighten.c | 6 ++++++ - 3 files changed, 36 insertions(+) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index fc5abff..c13041e 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -192,6 +192,14 @@ static inline void __flush_tlb_all(void) - __flush_tlb_global(); - else - __flush_tlb(); -+ -+ /* -+ * Note: if we somehow had PCID but not PGE, then this wouldn't work -- -+ * we'd end up flushing kernel translations for the current ASID but -+ * we might fail to flush kernel translations for other cached ASIDs. -+ * -+ * To avoid this issue, we force PCID off if PGE is off. -+ */ - } - - static inline void __flush_tlb_one(unsigned long addr) -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 81c8a53..91588be 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -324,6 +324,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) - } - } - -+static void setup_pcid(struct cpuinfo_x86 *c) -+{ -+ if (cpu_has(c, X86_FEATURE_PCID)) { -+ if (cpu_has(c, X86_FEATURE_PGE)) { -+ cr4_set_bits(X86_CR4_PCIDE); -+ } else { -+ /* -+ * flush_tlb_all(), as currently implemented, won't -+ * work if PCID is on but PGE is not. Since that -+ * combination doesn't exist on real hardware, there's -+ * no reason to try to fully support it, but it's -+ * polite to avoid corrupting data if we're on -+ * an improperly configured VM. -+ */ -+ clear_cpu_cap(c, X86_FEATURE_PCID); -+ } -+ } -+} -+ - /* - * Protection Keys are not available in 32-bit mode. - */ -@@ -1082,6 +1101,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) - setup_smep(c); - setup_smap(c); - -+ /* Set up PCID */ -+ setup_pcid(c); -+ - /* - * The vendor-specific functions might have changed features. - * Now we do "generic changes." -diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index bdd8556..5226379 100644 ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -442,6 +442,12 @@ static void __init xen_init_cpuid_mask(void) - ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ - -+ /* -+ * Xen PV would need some work to support PCID: CR3 handling as well -+ * as xen_flush_tlb_others() would need updating. -+ */ -+ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */ -+ - if (!xen_initial_domain()) - cpuid_leaf1_edx_mask &= - ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch deleted file mode 100644 index 07dd1bf0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch +++ /dev/null @@ -1,109 +0,0 @@ -From f34570e1f6c56f5557b9a3acd73fce47f5727479 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Mon, 22 May 2017 15:30:01 -0700 -Subject: [PATCH 03/14] x86/mm: Reimplement flush_tlb_page() using - flush_tlb_mm_range() - -commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream. - -flush_tlb_page() was very similar to flush_tlb_mm_range() except that -it had a couple of issues: - - - It was missing an smp_mb() in the case where - current->active_mm != mm. (This is a longstanding bug reported by Nadav Amit) - - - It was missing tracepoints and vm counter updates. - -The only reason that I can see for keeping it at as a separate -function is that it could avoid a few branches that -flush_tlb_mm_range() needs to decide to flush just one page. This -hardly seems worthwhile. If we decide we want to get rid of those -branches again, a better way would be to introduce an -__flush_tlb_mm_range() helper and make both flush_tlb_page() and -flush_tlb_mm_range() use it. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Acked-by: Kees Cook <keescook@chromium.org> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Borislav Petkov <bpetkov@suse.de> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Mel Gorman <mgorman@suse.de> -Cc: Michal Hocko <mhocko@suse.com> -Cc: Nadav Amit <nadav.amit@gmail.com> -Cc: Nadav Amit <namit@vmware.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-mm@kvack.org -Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/tlbflush.h | 6 +++++- - arch/x86/mm/tlb.c | 27 --------------------------- - 2 files changed, 5 insertions(+), 28 deletions(-) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index db8952a..eb5b512 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -353,11 +353,15 @@ static inline void flush_tlb_kernel_range(unsigned long start, - flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) - - extern void flush_tlb_all(void); --extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long vmflag); - extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); - -+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) -+{ -+ flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); -+} -+ - void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, unsigned long end); -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 2f9d41f..6884228 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -387,33 +387,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - preempt_enable(); - } - --void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) --{ -- struct mm_struct *mm = vma->vm_mm; -- -- preempt_disable(); -- -- if (current->active_mm == mm) { -- if (current->mm) { -- /* -- * Implicit full barrier (INVLPG) that synchronizes -- * with switch_mm. -- */ -- __flush_tlb_one(start); -- } else { -- leave_mm(smp_processor_id()); -- -- /* Synchronize with switch_mm. */ -- smp_mb(); -- } -- } -- -- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) -- flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); -- -- preempt_enable(); --} -- - static void do_flush_tlb_all(void *info) - { - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-speculation-Update-Speculation-Control-microcode.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-speculation-Update-Speculation-Control-microcode.patch deleted file mode 100644 index c78b3e80..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-x86-speculation-Update-Speculation-Control-microcode.patch +++ /dev/null @@ -1,69 +0,0 @@ -From f01ffef1901eda027651aba518686d44ed9fccf3 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Sat, 10 Feb 2018 23:39:22 +0000 -Subject: [PATCH 03/12] x86/speculation: Update Speculation Control microcode - blacklist - -commit 1751342095f0d2b36fa8114d8e12c5688c455ac4 upstream. - -Intel have retroactively blessed the 0xc2 microcode on Skylake mobile -and desktop parts, and the Gemini Lake 0x22 microcode is apparently fine -too. We blacklisted the latter purely because it was present with all -the other problematic ones in the 2018-01-08 release, but now it's -explicitly listed as OK. - -We still list 0x84 for the various Kaby Lake / Coffee Lake parts, as -that appeared in one version of the blacklist and then reverted to -0x80 again. We can change it if 0x84 is actually announced to be safe. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: arjan.van.de.ven@intel.com -Cc: jmattson@google.com -Cc: karahmed@amazon.de -Cc: kvm@vger.kernel.org -Cc: pbonzini@redhat.com -Cc: rkrcmar@redhat.com -Cc: sironi@amazon.de -Link: http://lkml.kernel.org/r/1518305967-31356-2-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/intel.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 4097b43..e3b00ac 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -82,8 +82,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, - { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, - { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, -- { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, -- { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, - { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, - { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, - { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, -@@ -95,8 +93,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, - { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, - { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, -- /* Updated in the 20180108 release; blacklist until we know otherwise */ -- { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, - /* Observed in the wild */ - { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, - { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch deleted file mode 100644 index d61b397e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch +++ /dev/null @@ -1,1025 +0,0 @@ -From ff1ce9f00432d65859fd923ce7eb86d605386f17 Mon Sep 17 00:00:00 2001 -From: Richard Fellner <richard.fellner@student.tugraz.at> -Date: Thu, 4 May 2017 14:26:50 +0200 -Subject: [PATCH 004/103] KAISER: Kernel Address Isolation - -This patch introduces our implementation of KAISER (Kernel Address Isolation to -have Side-channels Efficiently Removed), a kernel isolation technique to close -hardware side channels on kernel address information. - -More information about the patch can be found on: - - https://github.com/IAIK/KAISER - -From: Richard Fellner <richard.fellner@student.tugraz.at> -From: Daniel Gruss <daniel.gruss@iaik.tugraz.at> -Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode -Date: Thu, 4 May 2017 14:26:50 +0200 -Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2 -Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5 - -To: <linux-kernel@vger.kernel.org> -To: <kernel-hardening@lists.openwall.com> -Cc: <clementine.maurice@iaik.tugraz.at> -Cc: <moritz.lipp@iaik.tugraz.at> -Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at> -Cc: Richard Fellner <richard.fellner@student.tugraz.at> -Cc: Ingo Molnar <mingo@kernel.org> -Cc: <kirill.shutemov@linux.intel.com> -Cc: <anders.fogh@gdata-adan.de> - -After several recent works [1,2,3] KASLR on x86_64 was basically -considered dead by many researchers. We have been working on an -efficient but effective fix for this problem and found that not mapping -the kernel space when running in user mode is the solution to this -problem [4] (the corresponding paper [5] will be presented at ESSoS17). - -With this RFC patch we allow anybody to configure their kernel with the -flag CONFIG_KAISER to add our defense mechanism. - -If there are any questions we would love to answer them. -We also appreciate any comments! - -Cheers, -Daniel (+ the KAISER team from Graz University of Technology) - -[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf -[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf -[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf -[4] https://github.com/IAIK/KAISER -[5] https://gruss.cc/files/kaiser.pdf - -[patch based also on -https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch] - -Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at> -Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at> -Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at> -Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 17 ++++ - arch/x86/entry/entry_64_compat.S | 7 +- - arch/x86/include/asm/hw_irq.h | 2 +- - arch/x86/include/asm/kaiser.h | 113 +++++++++++++++++++++++++ - arch/x86/include/asm/pgtable.h | 4 + - arch/x86/include/asm/pgtable_64.h | 21 +++++ - arch/x86/include/asm/pgtable_types.h | 12 ++- - arch/x86/include/asm/processor.h | 7 +- - arch/x86/kernel/cpu/common.c | 4 +- - arch/x86/kernel/espfix_64.c | 6 ++ - arch/x86/kernel/head_64.S | 16 +++- - arch/x86/kernel/irqinit.c | 2 +- - arch/x86/kernel/process.c | 2 +- - arch/x86/mm/Makefile | 2 +- - arch/x86/mm/kaiser.c | 160 +++++++++++++++++++++++++++++++++++ - arch/x86/mm/pageattr.c | 2 +- - arch/x86/mm/pgtable.c | 26 ++++++ - include/asm-generic/vmlinux.lds.h | 11 ++- - include/linux/percpu-defs.h | 30 +++++++ - init/main.c | 6 ++ - kernel/fork.c | 8 ++ - security/Kconfig | 7 ++ - 22 files changed, 449 insertions(+), 16 deletions(-) - create mode 100644 arch/x86/include/asm/kaiser.h - create mode 100644 arch/x86/mm/kaiser.c - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index ef766a3..6c880dc 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -36,6 +36,7 @@ - #include <asm/smap.h> - #include <asm/pgtable_types.h> - #include <asm/export.h> -+#include <asm/kaiser.h> - #include <linux/err.h> - - /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ -@@ -146,6 +147,7 @@ ENTRY(entry_SYSCALL_64) - * it is too small to ever cause noticeable irq latency. - */ - SWAPGS_UNSAFE_STACK -+ SWITCH_KERNEL_CR3_NO_STACK - /* - * A hypervisor implementation might want to use a label - * after the swapgs, so that it can do the swapgs -@@ -228,6 +230,7 @@ entry_SYSCALL_64_fastpath: - movq RIP(%rsp), %rcx - movq EFLAGS(%rsp), %r11 - RESTORE_C_REGS_EXCEPT_RCX_R11 -+ SWITCH_USER_CR3 - movq RSP(%rsp), %rsp - USERGS_SYSRET64 - -@@ -323,10 +326,12 @@ return_from_SYSCALL_64: - syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_RCX_R11 -+ SWITCH_USER_CR3 - movq RSP(%rsp), %rsp - USERGS_SYSRET64 - - opportunistic_sysret_failed: -+ SWITCH_USER_CR3 - SWAPGS - jmp restore_c_regs_and_iret - END(entry_SYSCALL_64) -@@ -424,6 +429,7 @@ ENTRY(ret_from_fork) - movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ - TRACE_IRQS_ON /* user mode is traced as IRQS on */ -+ SWITCH_USER_CR3 - SWAPGS - jmp restore_regs_and_iret - -@@ -478,6 +484,7 @@ END(irq_entries_start) - * tracking that we're in kernel mode. - */ - SWAPGS -+ SWITCH_KERNEL_CR3 - - /* - * We need to tell lockdep that IRQs are off. We can't do this until -@@ -535,6 +542,7 @@ GLOBAL(retint_user) - mov %rsp,%rdi - call prepare_exit_to_usermode - TRACE_IRQS_IRETQ -+ SWITCH_USER_CR3 - SWAPGS - jmp restore_regs_and_iret - -@@ -612,6 +620,7 @@ native_irq_return_ldt: - - pushq %rdi /* Stash user RDI */ - SWAPGS -+ SWITCH_KERNEL_CR3 - movq PER_CPU_VAR(espfix_waddr), %rdi - movq %rax, (0*8)(%rdi) /* user RAX */ - movq (1*8)(%rsp), %rax /* user RIP */ -@@ -638,6 +647,7 @@ native_irq_return_ldt: - * still points to an RO alias of the ESPFIX stack. - */ - orq PER_CPU_VAR(espfix_stack), %rax -+ SWITCH_USER_CR3 - SWAPGS - movq %rax, %rsp - -@@ -1034,6 +1044,7 @@ ENTRY(paranoid_entry) - testl %edx, %edx - js 1f /* negative -> in kernel */ - SWAPGS -+ SWITCH_KERNEL_CR3 - xorl %ebx, %ebx - 1: ret - END(paranoid_entry) -@@ -1056,6 +1067,7 @@ ENTRY(paranoid_exit) - testl %ebx, %ebx /* swapgs needed? */ - jnz paranoid_exit_no_swapgs - TRACE_IRQS_IRETQ -+ SWITCH_USER_CR3_NO_STACK - SWAPGS_UNSAFE_STACK - jmp paranoid_exit_restore - paranoid_exit_no_swapgs: -@@ -1084,6 +1096,7 @@ ENTRY(error_entry) - * from user mode due to an IRET fault. - */ - SWAPGS -+ SWITCH_KERNEL_CR3 - - .Lerror_entry_from_usermode_after_swapgs: - /* -@@ -1135,6 +1148,7 @@ ENTRY(error_entry) - * Switch to kernel gsbase: - */ - SWAPGS -+ SWITCH_KERNEL_CR3 - - /* - * Pretend that the exception came from user mode: set up pt_regs -@@ -1233,6 +1247,7 @@ ENTRY(nmi) - */ - - SWAPGS_UNSAFE_STACK -+ SWITCH_KERNEL_CR3_NO_STACK - cld - movq %rsp, %rdx - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp -@@ -1273,6 +1288,7 @@ ENTRY(nmi) - * work, because we don't want to enable interrupts. Fortunately, - * do_nmi doesn't modify pt_regs. - */ -+ SWITCH_USER_CR3 - SWAPGS - jmp restore_c_regs_and_iret - -@@ -1484,6 +1500,7 @@ end_repeat_nmi: - testl %ebx, %ebx /* swapgs needed? */ - jnz nmi_restore - nmi_swapgs: -+ SWITCH_USER_CR3_NO_STACK - SWAPGS_UNSAFE_STACK - nmi_restore: - RESTORE_EXTRA_REGS -diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S -index e1721da..f0e384e 100644 ---- a/arch/x86/entry/entry_64_compat.S -+++ b/arch/x86/entry/entry_64_compat.S -@@ -13,6 +13,7 @@ - #include <asm/irqflags.h> - #include <asm/asm.h> - #include <asm/smap.h> -+#include <asm/kaiser.h> - #include <linux/linkage.h> - #include <linux/err.h> - -@@ -48,6 +49,7 @@ - ENTRY(entry_SYSENTER_compat) - /* Interrupts are off on entry. */ - SWAPGS_UNSAFE_STACK -+ SWITCH_KERNEL_CR3_NO_STACK - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - /* -@@ -184,6 +186,7 @@ ENDPROC(entry_SYSENTER_compat) - ENTRY(entry_SYSCALL_compat) - /* Interrupts are off on entry. */ - SWAPGS_UNSAFE_STACK -+ SWITCH_KERNEL_CR3_NO_STACK - - /* Stash user ESP and switch to the kernel stack. */ - movl %esp, %r8d -@@ -259,6 +262,7 @@ sysret32_from_system_call: - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 -+ SWITCH_USER_CR3 - movq RSP-ORIG_RAX(%rsp), %rsp - swapgs - sysretl -@@ -297,7 +301,7 @@ ENTRY(entry_INT80_compat) - PARAVIRT_ADJUST_EXCEPTION_FRAME - ASM_CLAC /* Do this early to minimize exposure */ - SWAPGS -- -+ SWITCH_KERNEL_CR3_NO_STACK - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit -@@ -338,6 +342,7 @@ ENTRY(entry_INT80_compat) - - /* Go back to user mode. */ - TRACE_IRQS_ON -+ SWITCH_USER_CR3_NO_STACK - SWAPGS - jmp restore_regs_and_iret - END(entry_INT80_compat) -diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h -index b90e105..0817d63 100644 ---- a/arch/x86/include/asm/hw_irq.h -+++ b/arch/x86/include/asm/hw_irq.h -@@ -178,7 +178,7 @@ extern char irq_entries_start[]; - #define VECTOR_RETRIGGERED ((void *)~0UL) - - typedef struct irq_desc* vector_irq_t[NR_VECTORS]; --DECLARE_PER_CPU(vector_irq_t, vector_irq); -+DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq); - - #endif /* !ASSEMBLY_ */ - -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -new file mode 100644 -index 0000000..63ee830 ---- /dev/null -+++ b/arch/x86/include/asm/kaiser.h -@@ -0,0 +1,113 @@ -+#ifndef _ASM_X86_KAISER_H -+#define _ASM_X86_KAISER_H -+ -+/* This file includes the definitions for the KAISER feature. -+ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory. -+ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped, -+ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled, -+ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled. -+ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory. -+ * -+ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions -+ * of the user space, or the stacks. -+ */ -+#ifdef __ASSEMBLY__ -+#ifdef CONFIG_KAISER -+ -+.macro _SWITCH_TO_KERNEL_CR3 reg -+movq %cr3, \reg -+andq $(~0x1000), \reg -+movq \reg, %cr3 -+.endm -+ -+.macro _SWITCH_TO_USER_CR3 reg -+movq %cr3, \reg -+orq $(0x1000), \reg -+movq \reg, %cr3 -+.endm -+ -+.macro SWITCH_KERNEL_CR3 -+pushq %rax -+_SWITCH_TO_KERNEL_CR3 %rax -+popq %rax -+.endm -+ -+.macro SWITCH_USER_CR3 -+pushq %rax -+_SWITCH_TO_USER_CR3 %rax -+popq %rax -+.endm -+ -+.macro SWITCH_KERNEL_CR3_NO_STACK -+movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) -+_SWITCH_TO_KERNEL_CR3 %rax -+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax -+.endm -+ -+ -+.macro SWITCH_USER_CR3_NO_STACK -+ -+movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) -+_SWITCH_TO_USER_CR3 %rax -+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax -+ -+.endm -+ -+#else /* CONFIG_KAISER */ -+ -+.macro SWITCH_KERNEL_CR3 reg -+.endm -+.macro SWITCH_USER_CR3 reg -+.endm -+.macro SWITCH_USER_CR3_NO_STACK -+.endm -+.macro SWITCH_KERNEL_CR3_NO_STACK -+.endm -+ -+#endif /* CONFIG_KAISER */ -+#else /* __ASSEMBLY__ */ -+ -+ -+#ifdef CONFIG_KAISER -+// Upon kernel/user mode switch, it may happen that -+// the address space has to be switched before the registers have been stored. -+// To change the address space, another register is needed. -+// A register therefore has to be stored/restored. -+// -+DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); -+ -+#endif /* CONFIG_KAISER */ -+ -+/** -+ * shadowmem_add_mapping - map a virtual memory part to the shadow mapping -+ * @addr: the start address of the range -+ * @size: the size of the range -+ * @flags: The mapping flags of the pages -+ * -+ * the mapping is done on a global scope, so no bigger synchronization has to be done. -+ * the pages have to be manually unmapped again when they are not needed any longer. -+ */ -+extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); -+ -+ -+/** -+ * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping -+ * @addr: the start address of the range -+ * @size: the size of the range -+ */ -+extern void kaiser_remove_mapping(unsigned long start, unsigned long size); -+ -+/** -+ * shadowmem_initialize_mapping - Initalize the shadow mapping -+ * -+ * most parts of the shadow mapping can be mapped upon boot time. -+ * only the thread stacks have to be mapped on runtime. -+ * the mapped regions are not unmapped at all. -+ */ -+extern void kaiser_init(void); -+ -+#endif -+ -+ -+ -+#endif /* _ASM_X86_KAISER_H */ -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index 437feb4..4b479c9 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -904,6 +904,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, - static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) - { - memcpy(dst, src, count * sizeof(pgd_t)); -+#ifdef CONFIG_KAISER -+ // clone the shadow pgd part as well -+ memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t)); -+#endif - } - - #define PTE_SHIFT ilog2(PTRS_PER_PTE) -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index 1cc82ec..e6ea39f 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_t *pud) - native_set_pud(pud, native_make_pud(0)); - } - -+#ifdef CONFIG_KAISER -+static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) { -+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); -+} -+ -+static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) { -+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); -+} -+#endif /* CONFIG_KAISER */ -+ - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) - { -+#ifdef CONFIG_KAISER -+ // We know that a pgd is page aligned. -+ // Therefore the lower indices have to be mapped to user space. -+ // These pages are mapped to the shadow mapping. -+ if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) { -+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; -+ } -+ -+ pgdp->pgd = pgd.pgd & ~_PAGE_USER; -+#else /* CONFIG_KAISER */ - *pgdp = pgd; -+#endif - } - - static inline void native_pgd_clear(pgd_t *pgd) -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index 8b4de22..00fecbb 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -45,7 +45,11 @@ - #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) - #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) - #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) --#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -+#ifdef CONFIG_KAISER -+#define _PAGE_GLOBAL (_AT(pteval_t, 0)) -+#else -+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -+#endif - #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) - #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) - #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) -@@ -119,7 +123,11 @@ - #define _PAGE_DEVMAP (_AT(pteval_t, 0)) - #endif - --#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -+#ifdef CONFIG_KAISER -+#define _PAGE_PROTNONE (_AT(pteval_t, 0)) -+#else -+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -+#endif - - #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_DIRTY) -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index 83db0ea..3d4784e2 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -308,7 +308,7 @@ struct tss_struct { - - } ____cacheline_aligned; - --DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); -+DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss); - - #ifdef CONFIG_X86_32 - DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); -@@ -335,6 +335,11 @@ union irq_stack_union { - char gs_base[40]; - unsigned long stack_canary; - }; -+ -+ struct { -+ char irq_stack_pointer[64]; -+ char unused[IRQ_STACK_SIZE - 64]; -+ }; - }; - - DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 91588be..3efde13 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -93,7 +93,7 @@ static const struct cpu_dev default_cpu = { - - static const struct cpu_dev *this_cpu = &default_cpu; - --DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { -+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = { - #ifdef CONFIG_X86_64 - /* - * We need valid kernel segments for data and code in long mode too -@@ -1365,7 +1365,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [DEBUG_STACK - 1] = DEBUG_STKSZ - }; - --static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks -+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); - - /* May not be marked __init: used by software suspend */ -diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c -index 04f89ca..9ff875a 100644 ---- a/arch/x86/kernel/espfix_64.c -+++ b/arch/x86/kernel/espfix_64.c -@@ -41,6 +41,7 @@ - #include <asm/pgalloc.h> - #include <asm/setup.h> - #include <asm/espfix.h> -+#include <asm/kaiser.h> - - /* - * Note: we only need 6*8 = 48 bytes for the espfix stack, but round -@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void) - /* Install the espfix pud into the kernel page directory */ - pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; - pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); -+#ifdef CONFIG_KAISER -+ // add the esp stack pud to the shadow mapping here. -+ // This can be done directly, because the fixup stack has its own pud -+ set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page))); -+#endif - - /* Randomize the locations */ - init_espfix_random(); -diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S -index b4421cc..9e849b5 100644 ---- a/arch/x86/kernel/head_64.S -+++ b/arch/x86/kernel/head_64.S -@@ -405,6 +405,14 @@ GLOBAL(early_recursion_flag) - .balign PAGE_SIZE; \ - GLOBAL(name) - -+#ifdef CONFIG_KAISER -+#define NEXT_PGD_PAGE(name) \ -+ .balign 2 * PAGE_SIZE; \ -+GLOBAL(name) -+#else -+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) -+#endif -+ - /* Automate the creation of 1 to 1 mapping pmd entries */ - #define PMDS(START, PERM, COUNT) \ - i = 0 ; \ -@@ -414,7 +422,7 @@ GLOBAL(name) - .endr - - __INITDATA --NEXT_PAGE(early_level4_pgt) -+NEXT_PGD_PAGE(early_level4_pgt) - .fill 511,8,0 - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE - -@@ -424,10 +432,10 @@ NEXT_PAGE(early_dynamic_pgts) - .data - - #ifndef CONFIG_XEN --NEXT_PAGE(init_level4_pgt) -- .fill 512,8,0 -+NEXT_PGD_PAGE(init_level4_pgt) -+ .fill 2*512,8,0 - #else --NEXT_PAGE(init_level4_pgt) -+NEXT_PGD_PAGE(init_level4_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE -diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c -index 1423ab1..f480b38 100644 ---- a/arch/x86/kernel/irqinit.c -+++ b/arch/x86/kernel/irqinit.c -@@ -51,7 +51,7 @@ static struct irqaction irq2 = { - .flags = IRQF_NO_THREAD, - }; - --DEFINE_PER_CPU(vector_irq_t, vector_irq) = { -+DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = { - [0 ... NR_VECTORS - 1] = VECTOR_UNUSED, - }; - -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 8e10e72..a55b320 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -41,7 +41,7 @@ - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ --__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { -+__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = { - .x86_tss = { - .sp0 = TOP_OF_INIT_STACK, - #ifdef CONFIG_X86_32 -diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile -index 96d2b84..682c162 100644 ---- a/arch/x86/mm/Makefile -+++ b/arch/x86/mm/Makefile -@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o - obj-$(CONFIG_X86_INTEL_MPX) += mpx.o - obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o - obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o -- -+obj-$(CONFIG_KAISER) += kaiser.o -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -new file mode 100644 -index 0000000..cf1bb92 ---- /dev/null -+++ b/arch/x86/mm/kaiser.c -@@ -0,0 +1,160 @@ -+ -+ -+#include <linux/kernel.h> -+#include <linux/errno.h> -+#include <linux/string.h> -+#include <linux/types.h> -+#include <linux/bug.h> -+#include <linux/init.h> -+#include <linux/spinlock.h> -+#include <linux/mm.h> -+ -+#include <linux/uaccess.h> -+#include <asm/pgtable.h> -+#include <asm/pgalloc.h> -+#include <asm/desc.h> -+#ifdef CONFIG_KAISER -+ -+__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); -+ -+/** -+ * Get the real ppn from a address in kernel mapping. -+ * @param address The virtual adrress -+ * @return the physical address -+ */ -+static inline unsigned long get_pa_from_mapping (unsigned long address) -+{ -+ pgd_t *pgd; -+ pud_t *pud; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ pgd = pgd_offset_k(address); -+ BUG_ON(pgd_none(*pgd) || pgd_large(*pgd)); -+ -+ pud = pud_offset(pgd, address); -+ BUG_ON(pud_none(*pud)); -+ -+ if (pud_large(*pud)) { -+ return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK); -+ } -+ -+ pmd = pmd_offset(pud, address); -+ BUG_ON(pmd_none(*pmd)); -+ -+ if (pmd_large(*pmd)) { -+ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK); -+ } -+ -+ pte = pte_offset_kernel(pmd, address); -+ BUG_ON(pte_none(*pte)); -+ -+ return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK); -+} -+ -+void _kaiser_copy (unsigned long start_addr, unsigned long size, -+ unsigned long flags) -+{ -+ pgd_t *pgd; -+ pud_t *pud; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long address; -+ unsigned long end_addr = start_addr + size; -+ unsigned long target_address; -+ -+ for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1)); -+ address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) { -+ target_address = get_pa_from_mapping(address); -+ -+ pgd = native_get_shadow_pgd(pgd_offset_k(address)); -+ -+ BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n"); -+ BUG_ON(pgd_large(*pgd)); -+ -+ pud = pud_offset(pgd, address); -+ if (pud_none(*pud)) { -+ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address)))); -+ } -+ BUG_ON(pud_large(*pud)); -+ -+ pmd = pmd_offset(pud, address); -+ if (pmd_none(*pmd)) { -+ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address)))); -+ } -+ BUG_ON(pmd_large(*pmd)); -+ -+ pte = pte_offset_kernel(pmd, address); -+ if (pte_none(*pte)) { -+ set_pte(pte, __pte(flags | target_address)); -+ } else { -+ BUG_ON(__pa(pte_page(*pte)) != target_address); -+ } -+ } -+} -+ -+// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping -+static inline void __init _kaiser_init(void) -+{ -+ pgd_t *pgd; -+ int i = 0; -+ -+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); -+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { -+ set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0)))); -+ } -+} -+ -+extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; -+spinlock_t shadow_table_lock; -+void __init kaiser_init(void) -+{ -+ int cpu; -+ spin_lock_init(&shadow_table_lock); -+ -+ spin_lock(&shadow_table_lock); -+ -+ _kaiser_init(); -+ -+ for_each_possible_cpu(cpu) { -+ // map the per cpu user variables -+ _kaiser_copy( -+ (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)), -+ (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start, -+ __PAGE_KERNEL); -+ } -+ -+ // map the entry/exit text section, which is responsible to switch between user- and kernel mode -+ _kaiser_copy( -+ (unsigned long) __entry_text_start, -+ (unsigned long) __entry_text_end - (unsigned long) __entry_text_start, -+ __PAGE_KERNEL_RX); -+ -+ // the fixed map address of the idt_table -+ _kaiser_copy( -+ (unsigned long) idt_descr.address, -+ sizeof(gate_desc) * NR_VECTORS, -+ __PAGE_KERNEL_RO); -+ -+ spin_unlock(&shadow_table_lock); -+} -+ -+// add a mapping to the shadow-mapping, and synchronize the mappings -+void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) -+{ -+ spin_lock(&shadow_table_lock); -+ _kaiser_copy(addr, size, flags); -+ spin_unlock(&shadow_table_lock); -+} -+ -+extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end); -+void kaiser_remove_mapping(unsigned long start, unsigned long size) -+{ -+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start)); -+ spin_lock(&shadow_table_lock); -+ do { -+ unmap_pud_range(pgd, start, start + size); -+ } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size))); -+ spin_unlock(&shadow_table_lock); -+} -+#endif /* CONFIG_KAISER */ -diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c -index e3353c9..c17412f 100644 ---- a/arch/x86/mm/pageattr.c -+++ b/arch/x86/mm/pageattr.c -@@ -823,7 +823,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) - pud_clear(pud); - } - --static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) -+void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) - { - pud_t *pud = pud_offset(pgd, start); - -diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index 3feec5a..27d218b 100644 ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -346,12 +346,38 @@ static inline void _pgd_free(pgd_t *pgd) - #else - static inline pgd_t *_pgd_alloc(void) - { -+#ifdef CONFIG_KAISER -+ // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory -+ // block. Therefore, we have to allocate at least 3 pages. However, the -+ // __get_free_pages returns us 4 pages. Hence, we store the base pointer at -+ // the beginning of the page of our 8kb-aligned memory block in order to -+ // correctly free it afterwars. -+ -+ unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE)); -+ -+ if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages) -+ { -+ *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages; -+ return (pgd_t *) pages; -+ } -+ else -+ { -+ *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages; -+ return (pgd_t *) (pages + PAGE_SIZE); -+ } -+#else - return (pgd_t *)__get_free_page(PGALLOC_GFP); -+#endif - } - - static inline void _pgd_free(pgd_t *pgd) - { -+#ifdef CONFIG_KAISER -+ unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE)); -+ free_pages(pages, get_order(4*PAGE_SIZE)); -+#else - free_page((unsigned long)pgd); -+#endif - } - #endif /* CONFIG_X86_PAE */ - -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index 31e1d63..0b16b5d 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -764,7 +764,16 @@ - */ - #define PERCPU_INPUT(cacheline) \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ -- *(.data..percpu..first) \ -+ \ -+ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ -+ *(.data..percpu..first) \ -+ . = ALIGN(cacheline); \ -+ *(.data..percpu..user_mapped) \ -+ *(.data..percpu..user_mapped..shared_aligned) \ -+ . = ALIGN(PAGE_SIZE); \ -+ *(.data..percpu..user_mapped..page_aligned) \ -+ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ -+ \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ -diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h -index 8f16299..8ea945f 100644 ---- a/include/linux/percpu-defs.h -+++ b/include/linux/percpu-defs.h -@@ -35,6 +35,12 @@ - - #endif - -+#ifdef CONFIG_KAISER -+#define USER_MAPPED_SECTION "..user_mapped" -+#else -+#define USER_MAPPED_SECTION "" -+#endif -+ - /* - * Base implementations of per-CPU variable declarations and definitions, where - * the section in which the variable is to be placed is provided by the -@@ -115,6 +121,12 @@ - #define DEFINE_PER_CPU(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, "") - -+#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ -+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) -+ -+#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ -+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) -+ - /* - * Declaration/definition used for per-CPU variables that must come first in - * the set of variables. -@@ -144,6 +156,14 @@ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - -+#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ -+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ -+ ____cacheline_aligned_in_smp -+ -+#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ -+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ -+ ____cacheline_aligned_in_smp -+ - #define DECLARE_PER_CPU_ALIGNED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \ - ____cacheline_aligned -@@ -162,6 +182,16 @@ - #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ - __aligned(PAGE_SIZE) -+/* -+ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. -+ */ -+#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ -+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ -+ __aligned(PAGE_SIZE) -+ -+#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ -+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ -+ __aligned(PAGE_SIZE) - - /* - * Declaration/definition used for per-CPU variables that must be read mostly. -diff --git a/init/main.c b/init/main.c -index f23b7fa..d2c8c23 100644 ---- a/init/main.c -+++ b/init/main.c -@@ -87,6 +87,9 @@ - #include <asm/setup.h> - #include <asm/sections.h> - #include <asm/cacheflush.h> -+#ifdef CONFIG_KAISER -+#include <asm/kaiser.h> -+#endif - - static int kernel_init(void *); - -@@ -474,6 +477,9 @@ static void __init mm_init(void) - pgtable_init(); - vmalloc_init(); - ioremap_huge_init(); -+#ifdef CONFIG_KAISER -+ kaiser_init(); -+#endif - } - - asmlinkage __visible void __init start_kernel(void) -diff --git a/kernel/fork.c b/kernel/fork.c -index f1751cb..61748d1 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -211,8 +211,12 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) - #endif - } - -+extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size); - static inline void free_thread_stack(struct task_struct *tsk) - { -+#ifdef CONFIG_KAISER -+ kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE); -+#endif - #ifdef CONFIG_VMAP_STACK - if (task_stack_vm_area(tsk)) { - unsigned long flags; -@@ -468,6 +472,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) - *stackend = STACK_END_MAGIC; /* for overflow detection */ - } - -+extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); - static struct task_struct *dup_task_struct(struct task_struct *orig, int node) - { - struct task_struct *tsk; -@@ -495,6 +500,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) - * functions again. - */ - tsk->stack = stack; -+#ifdef CONFIG_KAISER -+ kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); -+#endif - #ifdef CONFIG_VMAP_STACK - tsk->stack_vm_area = stack_vm_area; - #endif -diff --git a/security/Kconfig b/security/Kconfig -index 118f454..f515ac3 100644 ---- a/security/Kconfig -+++ b/security/Kconfig -@@ -30,6 +30,13 @@ config SECURITY - model will be used. - - If you are unsure how to answer this question, answer N. -+config KAISER -+ bool "Remove the kernel mapping in user mode" -+ depends on X86_64 -+ depends on !PARAVIRT -+ help -+ This enforces a strict kernel and user space isolation in order to close -+ hardware side channels on kernel address information. - - config SECURITYFS - bool "Enable the securityfs filesystem" --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch deleted file mode 100644 index dd1f4c29..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch +++ /dev/null @@ -1,48 +0,0 @@ -From ce7bea11dfe01825a2ced79b5bcc04b7e781e63b Mon Sep 17 00:00:00 2001 -From: Liran Alon <liran.alon@oracle.com> -Date: Sun, 5 Nov 2017 16:56:33 +0200 -Subject: [PATCH 04/33] KVM: x86: emulator: Return to user-mode on L1 CPL=0 - emulation failure -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 1f4dcb3b213235e642088709a1c54964d23365e9 ] - -On this case, handle_emulation_failure() fills kvm_run with -internal-error information which it expects to be delivered -to user-mode for further processing. -However, the code reports a wrong return-value which makes KVM to never -return to user-mode on this scenario. - -Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to -userspace") - -Signed-off-by: Liran Alon <liran.alon@oracle.com> -Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/x86.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 9cc9117..abbb37a 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -5265,7 +5265,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; -- r = EMULATE_FAIL; -+ r = EMULATE_USER_EXIT; - } - kvm_queue_exception(vcpu, UD_VECTOR); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch deleted file mode 100644 index b1c3c02d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch +++ /dev/null @@ -1,200 +0,0 @@ -From 1ea42745a9e721d08413cd0c6728934da385010b Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Wed, 6 Jun 2018 17:37:49 +0200 -Subject: [PATCH 04/10] KVM: x86: pass kvm_vcpu to kvm_read_guest_virt and - kvm_write_guest_virt_system - -commit ce14e868a54edeb2e30cb7a7b104a2fc4b9d76ca upstream. - -Int the next patch the emulator's .read_std and .write_std callbacks will -grow another argument, which is not needed in kvm_read_guest_virt and -kvm_write_guest_virt_system's callers. Since we have to make separate -functions, let's give the currently existing names a nicer interface, too. - -Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12) -Cc: stable@vger.kernel.org -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 23 ++++++++++------------- - arch/x86/kvm/x86.c | 39 ++++++++++++++++++++++++++------------- - arch/x86/kvm/x86.h | 4 ++-- - 3 files changed, 38 insertions(+), 28 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index d39062c..a81463d 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -6906,8 +6906,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, - vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) - return 1; - -- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, -- sizeof(vmptr), &e)) { -+ if (kvm_read_guest_virt(vcpu, gva, &vmptr, sizeof(vmptr), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } -@@ -7455,8 +7454,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu) - vmx_instruction_info, true, &gva)) - return 1; - /* _system ok, as nested_vmx_check_permission verified cpl=0 */ -- kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva, -- &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL); -+ kvm_write_guest_virt_system(vcpu, gva, &field_value, -+ (is_long_mode(vcpu) ? 8 : 4), NULL); - } - - nested_vmx_succeed(vcpu); -@@ -7491,8 +7490,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) - if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, false, &gva)) - return 1; -- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, -- &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { -+ if (kvm_read_guest_virt(vcpu, gva, &field_value, -+ (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } -@@ -7589,9 +7588,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) - vmx_instruction_info, true, &vmcs_gva)) - return 1; - /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */ -- if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva, -- (void *)&to_vmx(vcpu)->nested.current_vmptr, -- sizeof(u64), &e)) { -+ if (kvm_write_guest_virt_system(vcpu, vmcs_gva, -+ (void *)&to_vmx(vcpu)->nested.current_vmptr, -+ sizeof(u64), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } -@@ -7645,8 +7644,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, &gva)) - return 1; -- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, -- sizeof(operand), &e)) { -+ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } -@@ -7709,8 +7707,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, &gva)) - return 1; -- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, -- sizeof(u32), &e)) { -+ if (kvm_read_guest_virt(vcpu, gva, &vpid, sizeof(u32), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index d7974fc..af8e120 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -4370,11 +4370,10 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, - return X86EMUL_CONTINUE; - } - --int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, -+int kvm_read_guest_virt(struct kvm_vcpu *vcpu, - gva_t addr, void *val, unsigned int bytes, - struct x86_exception *exception) - { -- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; - - return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, -@@ -4382,9 +4381,9 @@ int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, - } - EXPORT_SYMBOL_GPL(kvm_read_guest_virt); - --static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, -- gva_t addr, void *val, unsigned int bytes, -- struct x86_exception *exception) -+static int emulator_read_std(struct x86_emulate_ctxt *ctxt, -+ gva_t addr, void *val, unsigned int bytes, -+ struct x86_exception *exception) - { - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); -@@ -4399,18 +4398,16 @@ static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, - return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; - } - --int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, -- gva_t addr, void *val, -- unsigned int bytes, -- struct x86_exception *exception) -+static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, -+ struct kvm_vcpu *vcpu, u32 access, -+ struct x86_exception *exception) - { -- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - void *data = val; - int r = X86EMUL_CONTINUE; - - while (bytes) { - gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, -- PFERR_WRITE_MASK, -+ access, - exception); - unsigned offset = addr & (PAGE_SIZE-1); - unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); -@@ -4431,6 +4428,22 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, - out: - return r; - } -+ -+static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, -+ unsigned int bytes, struct x86_exception *exception) -+{ -+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); -+ -+ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, -+ PFERR_WRITE_MASK, exception); -+} -+ -+int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, -+ unsigned int bytes, struct x86_exception *exception) -+{ -+ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, -+ PFERR_WRITE_MASK, exception); -+} - EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); - - static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, -@@ -5137,8 +5150,8 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) - static const struct x86_emulate_ops emulate_ops = { - .read_gpr = emulator_read_gpr, - .write_gpr = emulator_write_gpr, -- .read_std = kvm_read_guest_virt_system, -- .write_std = kvm_write_guest_virt_system, -+ .read_std = emulator_read_std, -+ .write_std = emulator_write_std, - .read_phys = kvm_read_guest_phys_system, - .fetch = kvm_fetch_guest_virt, - .read_emulated = emulator_read_emulated, -diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h -index e8ff3e4..2133a18 100644 ---- a/arch/x86/kvm/x86.h -+++ b/arch/x86/kvm/x86.h -@@ -161,11 +161,11 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); - void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); - u64 get_kvmclock_ns(struct kvm *kvm); - --int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, -+int kvm_read_guest_virt(struct kvm_vcpu *vcpu, - gva_t addr, void *val, unsigned int bytes, - struct x86_exception *exception); - --int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, -+int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, - gva_t addr, void *val, unsigned int bytes, - struct x86_exception *exception); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch deleted file mode 100644 index 3d7259ab..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 230ca3c5a44c752650e6bac9a4fe0eefc5ff0758 Mon Sep 17 00:00:00 2001 -From: Jim Mattson <jmattson@google.com> -Date: Wed, 5 Apr 2017 09:14:40 -0700 -Subject: [PATCH 04/93] kvm: nVMX: Disallow userspace-injected exceptions in - guest mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 28d06353881939703c34d82a1465136af176c620 ] - -The userspace exception injection API and code path are entirely -unprepared for exceptions that might cause a VM-exit from L2 to L1, so -the best course of action may be to simply disallow this for now. - -1. The API provides no mechanism for userspace to specify the new DR6 -bits for a #DB exception or the new CR2 value for a #PF -exception. Presumably, userspace is expected to modify these registers -directly with KVM_SET_SREGS before the next KVM_RUN ioctl. However, in -the event that L1 intercepts the exception, these registers should not -be changed. Instead, the new values should be provided in the -exit_qualification field of vmcs12 (Intel SDM vol 3, section 27.1). - -2. In the case of a userspace-injected #DB, inject_pending_event() -clears DR7.GD before calling vmx_queue_exception(). However, in the -event that L1 intercepts the exception, this is too early, because -DR7.GD should not be modified by a #DB that causes a VM-exit directly -(Intel SDM vol 3, section 27.1). - -3. If the injected exception is a #PF, nested_vmx_check_exception() -doesn't properly check whether or not L1 is interested in the -associated error code (using the #PF error code mask and match fields -from vmcs12). It may either return 0 when it should call -nested_vmx_vmexit() or vice versa. - -4. nested_vmx_check_exception() assumes that it is dealing with a -hardware-generated exception intercept from L2, with some of the -relevant details (the VM-exit interruption-information and the exit -qualification) live in vmcs02. For userspace-injected exceptions, this -is not the case. - -5. prepare_vmcs12() assumes that when its exit_intr_info argument -specifies valid information with a valid error code that it can VMREAD -the VM-exit interruption error code from vmcs02. For -userspace-injected exceptions, this is not the case. - -Signed-off-by: Jim Mattson <jmattson@google.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/x86.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 9f0f7e2..b27b93d 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -3056,7 +3056,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, - return -EINVAL; - - if (events->exception.injected && -- (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) -+ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || -+ is_guest_mode(vcpu))) - return -EINVAL; - - process_nmi(vcpu); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch deleted file mode 100644 index 990cb048..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 06424642a3712e54821ac22bba000779c0004faa Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Thu, 28 Sep 2017 16:58:26 -0500 -Subject: [PATCH 04/42] x86/asm: Fix inline asm call constraints for GCC 4.4 - -commit 520a13c530aeb5f63e011d668c42db1af19ed349 upstream. - -The kernel test bot (run by Xiaolong Ye) reported that the following commit: - - f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") - -is causing double faults in a kernel compiled with GCC 4.4. - -Linus subsequently diagnosed the crash pattern and the buggy commit and found that -the issue is with this code: - - register unsigned int __asm_call_sp asm("esp"); - #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) - -Even on a 64-bit kernel, it's using ESP instead of RSP. That causes GCC -to produce the following bogus code: - - ffffffff8147461d: 89 e0 mov %esp,%eax - ffffffff8147461f: 4c 89 f7 mov %r14,%rdi - ffffffff81474622: 4c 89 fe mov %r15,%rsi - ffffffff81474625: ba 20 00 00 00 mov $0x20,%edx - ffffffff8147462a: 89 c4 mov %eax,%esp - ffffffff8147462c: e8 bf 52 05 00 callq ffffffff814c98f0 <copy_user_generic_unrolled> - -Despite the absurdity of it backing up and restoring the stack pointer -for no reason, the bug is actually the fact that it's only backing up -and restoring the lower 32 bits of the stack pointer. The upper 32 bits -are getting cleared out, corrupting the stack pointer. - -So change the '__asm_call_sp' register variable to be associated with -the actual full-size stack pointer. - -This also requires changing the __ASM_SEL() macro to be based on the -actual compiled arch size, rather than the CONFIG value, because -CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso). -Otherwise Clang fails to build the kernel because it complains about the -use of a 64-bit register (RSP) in a 32-bit file. - -Reported-and-Bisected-and-Tested-by: kernel test robot <xiaolong.ye@intel.com> -Diagnosed-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Alexander Potapenko <glider@google.com> -Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arnd Bergmann <arnd@arndb.de> -Cc: Dmitriy Vyukov <dvyukov@google.com> -Cc: LKP <lkp@01.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Matthias Kaehlcke <mka@chromium.org> -Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") -Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Matthias Kaehlcke <mka@chromium.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/asm.h | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h -index 0052352..7bb29a4 100644 ---- a/arch/x86/include/asm/asm.h -+++ b/arch/x86/include/asm/asm.h -@@ -11,10 +11,12 @@ - # define __ASM_FORM_COMMA(x) " " #x "," - #endif - --#ifdef CONFIG_X86_32 -+#ifndef __x86_64__ -+/* 32 bit */ - # define __ASM_SEL(a,b) __ASM_FORM(a) - # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) - #else -+/* 64 bit */ - # define __ASM_SEL(a,b) __ASM_FORM(b) - # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) - #endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch deleted file mode 100644 index 24b7bdc8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch +++ /dev/null @@ -1,314 +0,0 @@ -From e55eb19b04f78aa3343a6eae99fd557f613ccd99 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sun, 28 May 2017 10:00:14 -0700 -Subject: [PATCH 04/14] x86/mm: Remove the UP asm/tlbflush.h code, always use - the (formerly) SMP code - -commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream. - -The UP asm/tlbflush.h generates somewhat nicer code than the SMP version. -Aside from that, it's fallen quite a bit behind the SMP code: - - - flush_tlb_mm_range() didn't flush individual pages if the range - was small. - - - The lazy TLB code was much weaker. This usually wouldn't matter, - but, if a kernel thread flushed its lazy "active_mm" more than - once (due to reclaim or similar), it wouldn't be unlazied and - would instead pointlessly flush repeatedly. - - - Tracepoints were missing. - -Aside from that, simply having the UP code around was a maintanence -burden, since it means that any change to the TLB flush code had to -make sure not to break it. - -Simplify everything by deleting the UP code. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bpetkov@suse.de> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Mel Gorman <mgorman@suse.de> -Cc: Michal Hocko <mhocko@suse.com> -Cc: Nadav Amit <nadav.amit@gmail.com> -Cc: Nadav Amit <namit@vmware.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-mm@kvack.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Cc: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/Kconfig | 2 +- - arch/x86/include/asm/hardirq.h | 2 +- - arch/x86/include/asm/mmu.h | 6 --- - arch/x86/include/asm/mmu_context.h | 2 - - arch/x86/include/asm/tlbflush.h | 78 +------------------------------------- - arch/x86/mm/init.c | 2 - - arch/x86/mm/tlb.c | 17 +-------- - 7 files changed, 5 insertions(+), 104 deletions(-) - -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 7132252..f0bcf23 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -45,7 +45,7 @@ config X86 - select ARCH_USE_CMPXCHG_LOCKREF if X86_64 - select ARCH_USE_QUEUED_RWLOCKS - select ARCH_USE_QUEUED_SPINLOCKS -- select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP -+ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH - select ARCH_WANTS_DYNAMIC_TASK_STRUCT - select ARCH_WANT_FRAME_POINTERS - select ARCH_WANT_IPC_PARSE_VERSION if X86_32 -diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h -index 59405a2..9b76cd3 100644 ---- a/arch/x86/include/asm/hardirq.h -+++ b/arch/x86/include/asm/hardirq.h -@@ -22,8 +22,8 @@ typedef struct { - #ifdef CONFIG_SMP - unsigned int irq_resched_count; - unsigned int irq_call_count; -- unsigned int irq_tlb_count; - #endif -+ unsigned int irq_tlb_count; - #ifdef CONFIG_X86_THERMAL_VECTOR - unsigned int irq_thermal_count; - #endif -diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h -index 72198c6..8b272a0 100644 ---- a/arch/x86/include/asm/mmu.h -+++ b/arch/x86/include/asm/mmu.h -@@ -33,12 +33,6 @@ typedef struct { - #endif - } mm_context_t; - --#ifdef CONFIG_SMP - void leave_mm(int cpu); --#else --static inline void leave_mm(int cpu) --{ --} --#endif - - #endif /* _ASM_X86_MMU_H */ -diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h -index 8e0a9fe..762d6c6 100644 ---- a/arch/x86/include/asm/mmu_context.h -+++ b/arch/x86/include/asm/mmu_context.h -@@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) - - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) - { --#ifdef CONFIG_SMP - if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); --#endif - } - - static inline int init_new_context(struct task_struct *tsk, -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index eb5b512..94146f6 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -7,6 +7,7 @@ - #include <asm/processor.h> - #include <asm/cpufeature.h> - #include <asm/special_insns.h> -+#include <asm/smp.h> - - static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) -@@ -65,10 +66,8 @@ static inline void invpcid_flush_all_nonglobals(void) - #endif - - struct tlb_state { --#ifdef CONFIG_SMP - struct mm_struct *active_mm; - int state; --#endif - - /* - * Access to this CR4 shadow and to H/W CR4 is protected by -@@ -272,79 +271,6 @@ static inline void __flush_tlb_one(unsigned long addr) - * and page-granular flushes are available only on i486 and up. - */ - --#ifndef CONFIG_SMP -- --/* "_up" is for UniProcessor. -- * -- * This is a helper for other header functions. *Not* intended to be called -- * directly. All global TLB flushes need to either call this, or to bump the -- * vm statistics themselves. -- */ --static inline void __flush_tlb_up(void) --{ -- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); -- __flush_tlb(); --} -- --static inline void flush_tlb_all(void) --{ -- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); -- __flush_tlb_all(); --} -- --static inline void local_flush_tlb(void) --{ -- __flush_tlb_up(); --} -- --static inline void flush_tlb_mm(struct mm_struct *mm) --{ -- if (mm == current->active_mm) -- __flush_tlb_up(); --} -- --static inline void flush_tlb_page(struct vm_area_struct *vma, -- unsigned long addr) --{ -- if (vma->vm_mm == current->active_mm) -- __flush_tlb_one(addr); --} -- --static inline void flush_tlb_range(struct vm_area_struct *vma, -- unsigned long start, unsigned long end) --{ -- if (vma->vm_mm == current->active_mm) -- __flush_tlb_up(); --} -- --static inline void flush_tlb_mm_range(struct mm_struct *mm, -- unsigned long start, unsigned long end, unsigned long vmflag) --{ -- if (mm == current->active_mm) -- __flush_tlb_up(); --} -- --static inline void native_flush_tlb_others(const struct cpumask *cpumask, -- struct mm_struct *mm, -- unsigned long start, -- unsigned long end) --{ --} -- --static inline void reset_lazy_tlbstate(void) --{ --} -- --static inline void flush_tlb_kernel_range(unsigned long start, -- unsigned long end) --{ -- flush_tlb_all(); --} -- --#else /* SMP */ -- --#include <asm/smp.h> -- - #define local_flush_tlb() __flush_tlb() - - #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) -@@ -375,8 +301,6 @@ static inline void reset_lazy_tlbstate(void) - this_cpu_write(cpu_tlbstate.active_mm, &init_mm); - } - --#endif /* SMP */ -- - #ifndef CONFIG_PARAVIRT - #define flush_tlb_others(mask, mm, start, end) \ - native_flush_tlb_others(mask, mm, start, end) -diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c -index 05a9855..a5e79b4 100644 ---- a/arch/x86/mm/init.c -+++ b/arch/x86/mm/init.c -@@ -745,10 +745,8 @@ void __init zone_sizes_init(void) - } - - DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { --#ifdef CONFIG_SMP - .active_mm = &init_mm, - .state = 0, --#endif - .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ - }; - EXPORT_SYMBOL_GPL(cpu_tlbstate); -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 6884228..613d07e 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -16,7 +16,7 @@ - #include <asm/kaiser.h> - - /* -- * Smarter SMP flushing macros. -+ * TLB flushing, formerly SMP-only - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about -@@ -29,8 +29,6 @@ - * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi - */ - --#ifdef CONFIG_SMP -- - struct flush_tlb_info { - struct mm_struct *flush_mm; - unsigned long flush_start; -@@ -90,8 +88,6 @@ void leave_mm(int cpu) - } - EXPORT_SYMBOL_GPL(leave_mm); - --#endif /* CONFIG_SMP */ -- - void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) - { -@@ -122,10 +118,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - set_pgd(pgd, init_mm.pgd[stack_pgd_index]); - } - --#ifdef CONFIG_SMP - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - this_cpu_write(cpu_tlbstate.active_mm, next); --#endif - - cpumask_set_cpu(cpu, mm_cpumask(next)); - -@@ -183,9 +177,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - if (unlikely(prev->context.ldt != next->context.ldt)) - load_mm_ldt(next); - #endif -- } --#ifdef CONFIG_SMP -- else { -+ } else { - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); - -@@ -212,11 +204,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - load_mm_ldt(next); - } - } --#endif - } - --#ifdef CONFIG_SMP -- - /* - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] -@@ -471,5 +460,3 @@ static int __init create_tlb_single_page_flush_ceiling(void) - return 0; - } - late_initcall(create_tlb_single_page_flush_ceiling); -- --#endif /* CONFIG_SMP */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-speculation-Correct-Speculation-Control-microcod.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-speculation-Correct-Speculation-Control-microcod.patch deleted file mode 100644 index 20c32ab8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-x86-speculation-Correct-Speculation-Control-microcod.patch +++ /dev/null @@ -1,78 +0,0 @@ -From d0ed9c041b4312a7245912bee08d0c6e7631c9a1 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Mon, 12 Feb 2018 15:27:34 +0000 -Subject: [PATCH 04/12] x86/speculation: Correct Speculation Control microcode - blacklist again - -commit d37fc6d360a404b208547ba112e7dabb6533c7fc upstream. - -Arjan points out that the Intel document only clears the 0xc2 microcode -on *some* parts with CPUID 506E3 (INTEL_FAM6_SKYLAKE_DESKTOP stepping 3). -For the Skylake H/S platform it's OK but for Skylake E3 which has the -same CPUID it isn't (yet) cleared. - -So removing it from the blacklist was premature. Put it back for now. - -Also, Arjan assures me that the 0x84 microcode for Kaby Lake which was -featured in one of the early revisions of the Intel document was never -released to the public, and won't be until/unless it is also validated -as safe. So those can change to 0x80 which is what all *other* versions -of the doc have identified. - -Once the retrospective testing of existing public microcodes is done, we -should be back into a mode where new microcodes are only released in -batches and we shouldn't even need to update the blacklist for those -anyway, so this tweaking of the list isn't expected to be a thing which -keeps happening. - -Requested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: arjan.van.de.ven@intel.com -Cc: dave.hansen@intel.com -Cc: kvm@vger.kernel.org -Cc: pbonzini@redhat.com -Link: http://lkml.kernel.org/r/1518449255-2182-1-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/intel.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index e3b00ac..02cb2e3 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -75,13 +75,14 @@ struct sku_microcode { - u32 microcode; - }; - static const struct sku_microcode spectre_bad_microcodes[] = { -- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, -- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, -- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, -- { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, -- { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, -+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, -+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, -+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, -+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, -+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, - { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, - { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, -+ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, - { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, - { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, - { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch deleted file mode 100644 index 49770e88..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 585df9100649b5038250e1c33cf8af019a77844c Mon Sep 17 00:00:00 2001 -From: Liran Alon <liran.alon@oracle.com> -Date: Sun, 5 Nov 2017 16:56:34 +0200 -Subject: [PATCH 05/33] KVM: x86: Don't re-execute instruction when not passing - CR2 value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 9b8ae63798cb97e785a667ff27e43fa6220cb734 ] - -In case of instruction-decode failure or emulation failure, -x86_emulate_instruction() will call reexecute_instruction() which will -attempt to use the cr2 value passed to x86_emulate_instruction(). -However, when x86_emulate_instruction() is called from -emulate_instruction(), cr2 is not passed (passed as 0) and therefore -it doesn't make sense to execute reexecute_instruction() logic at all. - -Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") - -Signed-off-by: Liran Alon <liran.alon@oracle.com> -Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/kvm_host.h | 3 ++- - arch/x86/kvm/vmx.c | 2 +- - 2 files changed, 3 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index bdde807..6f6ee68 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -1113,7 +1113,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, - static inline int emulate_instruction(struct kvm_vcpu *vcpu, - int emulation_type) - { -- return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); -+ return x86_emulate_instruction(vcpu, 0, -+ emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); - } - - void kvm_enable_efer_bits(u64); -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index ee766c2..8e5001d 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -6232,7 +6232,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) - if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) - return 1; - -- err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); -+ err = emulate_instruction(vcpu, 0); - - if (err == EMULATE_USER_EXIT) { - ++vcpu->stat.mmio_exits; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kaiser-merged-update.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kaiser-merged-update.patch deleted file mode 100644 index 0a554805..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kaiser-merged-update.patch +++ /dev/null @@ -1,1327 +0,0 @@ -From 63e6d8f6f8a48f02da9fbd55819b1154efad82ba Mon Sep 17 00:00:00 2001 -From: Dave Hansen <dave.hansen@linux.intel.com> -Date: Wed, 30 Aug 2017 16:23:00 -0700 -Subject: [PATCH 005/103] kaiser: merged update - -Merged fixes and cleanups, rebased to 4.9.51 tree (no 5-level paging). - -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 105 ++++++++++-- - arch/x86/include/asm/kaiser.h | 43 +++-- - arch/x86/include/asm/pgtable.h | 18 +- - arch/x86/include/asm/pgtable_64.h | 48 +++++- - arch/x86/include/asm/pgtable_types.h | 6 +- - arch/x86/kernel/espfix_64.c | 13 +- - arch/x86/kernel/head_64.S | 19 ++- - arch/x86/kernel/ldt.c | 27 ++- - arch/x86/kernel/tracepoint.c | 2 + - arch/x86/mm/kaiser.c | 313 +++++++++++++++++++++++++---------- - arch/x86/mm/pageattr.c | 63 +++++-- - arch/x86/mm/pgtable.c | 40 ++--- - include/linux/kaiser.h | 26 +++ - kernel/fork.c | 9 +- - security/Kconfig | 5 + - 15 files changed, 549 insertions(+), 188 deletions(-) - create mode 100644 include/linux/kaiser.h - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 6c880dc..d84e3a7 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -230,6 +230,13 @@ entry_SYSCALL_64_fastpath: - movq RIP(%rsp), %rcx - movq EFLAGS(%rsp), %r11 - RESTORE_C_REGS_EXCEPT_RCX_R11 -+ /* -+ * This opens a window where we have a user CR3, but are -+ * running in the kernel. This makes using the CS -+ * register useless for telling whether or not we need to -+ * switch CR3 in NMIs. Normal interrupts are OK because -+ * they are off here. -+ */ - SWITCH_USER_CR3 - movq RSP(%rsp), %rsp - USERGS_SYSRET64 -@@ -326,11 +333,25 @@ return_from_SYSCALL_64: - syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_RCX_R11 -+ /* -+ * This opens a window where we have a user CR3, but are -+ * running in the kernel. This makes using the CS -+ * register useless for telling whether or not we need to -+ * switch CR3 in NMIs. Normal interrupts are OK because -+ * they are off here. -+ */ - SWITCH_USER_CR3 - movq RSP(%rsp), %rsp - USERGS_SYSRET64 - - opportunistic_sysret_failed: -+ /* -+ * This opens a window where we have a user CR3, but are -+ * running in the kernel. This makes using the CS -+ * register useless for telling whether or not we need to -+ * switch CR3 in NMIs. Normal interrupts are OK because -+ * they are off here. -+ */ - SWITCH_USER_CR3 - SWAPGS - jmp restore_c_regs_and_iret -@@ -1087,6 +1108,13 @@ ENTRY(error_entry) - cld - SAVE_C_REGS 8 - SAVE_EXTRA_REGS 8 -+ /* -+ * error_entry() always returns with a kernel gsbase and -+ * CR3. We must also have a kernel CR3/gsbase before -+ * calling TRACE_IRQS_*. Just unconditionally switch to -+ * the kernel CR3 here. -+ */ -+ SWITCH_KERNEL_CR3 - xorl %ebx, %ebx - testb $3, CS+8(%rsp) - jz .Lerror_kernelspace -@@ -1096,7 +1124,6 @@ ENTRY(error_entry) - * from user mode due to an IRET fault. - */ - SWAPGS -- SWITCH_KERNEL_CR3 - - .Lerror_entry_from_usermode_after_swapgs: - /* -@@ -1148,7 +1175,6 @@ ENTRY(error_entry) - * Switch to kernel gsbase: - */ - SWAPGS -- SWITCH_KERNEL_CR3 - - /* - * Pretend that the exception came from user mode: set up pt_regs -@@ -1247,7 +1273,10 @@ ENTRY(nmi) - */ - - SWAPGS_UNSAFE_STACK -- SWITCH_KERNEL_CR3_NO_STACK -+ /* -+ * percpu variables are mapped with user CR3, so no need -+ * to switch CR3 here. -+ */ - cld - movq %rsp, %rdx - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp -@@ -1281,14 +1310,33 @@ ENTRY(nmi) - - movq %rsp, %rdi - movq $-1, %rsi -+#ifdef CONFIG_KAISER -+ /* Unconditionally use kernel CR3 for do_nmi() */ -+ /* %rax is saved above, so OK to clobber here */ -+ movq %cr3, %rax -+ pushq %rax -+#ifdef CONFIG_KAISER_REAL_SWITCH -+ andq $(~0x1000), %rax -+#endif -+ movq %rax, %cr3 -+#endif - call do_nmi -+ /* -+ * Unconditionally restore CR3. I know we return to -+ * kernel code that needs user CR3, but do we ever return -+ * to "user mode" where we need the kernel CR3? -+ */ -+#ifdef CONFIG_KAISER -+ popq %rax -+ mov %rax, %cr3 -+#endif - - /* - * Return back to user mode. We must *not* do the normal exit -- * work, because we don't want to enable interrupts. Fortunately, -- * do_nmi doesn't modify pt_regs. -+ * work, because we don't want to enable interrupts. Do not -+ * switch to user CR3: we might be going back to kernel code -+ * that had a user CR3 set. - */ -- SWITCH_USER_CR3 - SWAPGS - jmp restore_c_regs_and_iret - -@@ -1484,23 +1532,54 @@ end_repeat_nmi: - ALLOC_PT_GPREGS_ON_STACK - - /* -- * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit -- * as we should not be calling schedule in NMI context. -- * Even with normal interrupts enabled. An NMI should not be -- * setting NEED_RESCHED or anything that normal interrupts and -- * exceptions might do. -+ * Use the same approach as paranoid_entry to handle SWAPGS, but -+ * without CR3 handling since we do that differently in NMIs. No -+ * need to use paranoid_exit as we should not be calling schedule -+ * in NMI context. Even with normal interrupts enabled. An NMI -+ * should not be setting NEED_RESCHED or anything that normal -+ * interrupts and exceptions might do. - */ -- call paranoid_entry -+ cld -+ SAVE_C_REGS -+ SAVE_EXTRA_REGS -+ movl $1, %ebx -+ movl $MSR_GS_BASE, %ecx -+ rdmsr -+ testl %edx, %edx -+ js 1f /* negative -> in kernel */ -+ SWAPGS -+ xorl %ebx, %ebx -+1: -+#ifdef CONFIG_KAISER -+ /* Unconditionally use kernel CR3 for do_nmi() */ -+ /* %rax is saved above, so OK to clobber here */ -+ movq %cr3, %rax -+ pushq %rax -+#ifdef CONFIG_KAISER_REAL_SWITCH -+ andq $(~0x1000), %rax -+#endif -+ movq %rax, %cr3 -+#endif - - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ - movq %rsp, %rdi -+ addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */ - movq $-1, %rsi - call do_nmi -+ /* -+ * Unconditionally restore CR3. We might be returning to -+ * kernel code that needs user CR3, like just just before -+ * a sysret. -+ */ -+#ifdef CONFIG_KAISER -+ popq %rax -+ mov %rax, %cr3 -+#endif - - testl %ebx, %ebx /* swapgs needed? */ - jnz nmi_restore - nmi_swapgs: -- SWITCH_USER_CR3_NO_STACK -+ /* We fixed up CR3 above, so no need to switch it here */ - SWAPGS_UNSAFE_STACK - nmi_restore: - RESTORE_EXTRA_REGS -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 63ee830..0703f48 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -16,13 +16,17 @@ - - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg -+#ifdef CONFIG_KAISER_REAL_SWITCH - andq $(~0x1000), \reg -+#endif - movq \reg, %cr3 - .endm - - .macro _SWITCH_TO_USER_CR3 reg - movq %cr3, \reg -+#ifdef CONFIG_KAISER_REAL_SWITCH - orq $(0x1000), \reg -+#endif - movq \reg, %cr3 - .endm - -@@ -65,48 +69,53 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - .endm - - #endif /* CONFIG_KAISER */ -+ - #else /* __ASSEMBLY__ */ - - - #ifdef CONFIG_KAISER --// Upon kernel/user mode switch, it may happen that --// the address space has to be switched before the registers have been stored. --// To change the address space, another register is needed. --// A register therefore has to be stored/restored. --// --DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); -+/* -+ * Upon kernel/user mode switch, it may happen that the address -+ * space has to be switched before the registers have been -+ * stored. To change the address space, another register is -+ * needed. A register therefore has to be stored/restored. -+*/ - --#endif /* CONFIG_KAISER */ -+DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - - /** -- * shadowmem_add_mapping - map a virtual memory part to the shadow mapping -+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping - * @addr: the start address of the range - * @size: the size of the range - * @flags: The mapping flags of the pages - * -- * the mapping is done on a global scope, so no bigger synchronization has to be done. -- * the pages have to be manually unmapped again when they are not needed any longer. -+ * The mapping is done on a global scope, so no bigger -+ * synchronization has to be done. the pages have to be -+ * manually unmapped again when they are not needed any longer. - */ --extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); -+extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); - - - /** -- * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping -+ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping - * @addr: the start address of the range - * @size: the size of the range - */ - extern void kaiser_remove_mapping(unsigned long start, unsigned long size); - - /** -- * shadowmem_initialize_mapping - Initalize the shadow mapping -+ * kaiser_initialize_mapping - Initalize the shadow mapping - * -- * most parts of the shadow mapping can be mapped upon boot time. -- * only the thread stacks have to be mapped on runtime. -- * the mapped regions are not unmapped at all. -+ * Most parts of the shadow mapping can be mapped upon boot -+ * time. Only per-process things like the thread stacks -+ * or a new LDT have to be mapped at runtime. These boot- -+ * time mappings are permanent and nevertunmapped. - */ - extern void kaiser_init(void); - --#endif -+#endif /* CONFIG_KAISER */ -+ -+#endif /* __ASSEMBLY */ - - - -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index 4b479c9..1cee98e 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -690,7 +690,17 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) - - static inline int pgd_bad(pgd_t pgd) - { -- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; -+ pgdval_t ignore_flags = _PAGE_USER; -+ /* -+ * We set NX on KAISER pgds that map userspace memory so -+ * that userspace can not meaningfully use the kernel -+ * page table by accident; it will fault on the first -+ * instruction it tries to run. See native_set_pgd(). -+ */ -+ if (IS_ENABLED(CONFIG_KAISER)) -+ ignore_flags |= _PAGE_NX; -+ -+ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; - } - - static inline int pgd_none(pgd_t pgd) -@@ -905,8 +915,10 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) - { - memcpy(dst, src, count * sizeof(pgd_t)); - #ifdef CONFIG_KAISER -- // clone the shadow pgd part as well -- memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t)); -+ /* Clone the shadow pgd part as well */ -+ memcpy(native_get_shadow_pgd(dst), -+ native_get_shadow_pgd(src), -+ count * sizeof(pgd_t)); - #endif - } - -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index e6ea39f..000265c 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -107,26 +107,58 @@ static inline void native_pud_clear(pud_t *pud) - } - - #ifdef CONFIG_KAISER --static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) { -+static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) -+{ - return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); - } - --static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) { -+static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) -+{ - return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); - } -+#else -+static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) -+{ -+ BUILD_BUG_ON(1); -+ return NULL; -+} -+static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) -+{ -+ return pgdp; -+} - #endif /* CONFIG_KAISER */ - -+/* -+ * Page table pages are page-aligned. The lower half of the top -+ * level is used for userspace and the top half for the kernel. -+ * This returns true for user pages that need to get copied into -+ * both the user and kernel copies of the page tables, and false -+ * for kernel pages that should only be in the kernel copy. -+ */ -+static inline bool is_userspace_pgd(void *__ptr) -+{ -+ unsigned long ptr = (unsigned long)__ptr; -+ -+ return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); -+} -+ - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) - { - #ifdef CONFIG_KAISER -- // We know that a pgd is page aligned. -- // Therefore the lower indices have to be mapped to user space. -- // These pages are mapped to the shadow mapping. -- if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) { -+ pteval_t extra_kern_pgd_flags = 0; -+ /* Do we need to also populate the shadow pgd? */ -+ if (is_userspace_pgd(pgdp)) { - native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; -+ /* -+ * Even if the entry is *mapping* userspace, ensure -+ * that userspace can not use it. This way, if we -+ * get out to userspace running on the kernel CR3, -+ * userspace will crash instead of running. -+ */ -+ extra_kern_pgd_flags = _PAGE_NX; - } -- -- pgdp->pgd = pgd.pgd & ~_PAGE_USER; -+ pgdp->pgd = pgd.pgd; -+ pgdp->pgd |= extra_kern_pgd_flags; - #else /* CONFIG_KAISER */ - *pgdp = pgd; - #endif -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index 00fecbb..8bc8d02 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -48,7 +48,7 @@ - #ifdef CONFIG_KAISER - #define _PAGE_GLOBAL (_AT(pteval_t, 0)) - #else --#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) - #endif - #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) - #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) -@@ -123,11 +123,7 @@ - #define _PAGE_DEVMAP (_AT(pteval_t, 0)) - #endif - --#ifdef CONFIG_KAISER --#define _PAGE_PROTNONE (_AT(pteval_t, 0)) --#else - #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) --#endif - - #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_DIRTY) -diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c -index 9ff875a..560c2fd 100644 ---- a/arch/x86/kernel/espfix_64.c -+++ b/arch/x86/kernel/espfix_64.c -@@ -127,11 +127,14 @@ void __init init_espfix_bsp(void) - /* Install the espfix pud into the kernel page directory */ - pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; - pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); --#ifdef CONFIG_KAISER -- // add the esp stack pud to the shadow mapping here. -- // This can be done directly, because the fixup stack has its own pud -- set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page))); --#endif -+ /* -+ * Just copy the top-level PGD that is mapping the espfix -+ * area to ensure it is mapped into the shadow user page -+ * tables. -+ */ -+ if (IS_ENABLED(CONFIG_KAISER)) -+ set_pgd(native_get_shadow_pgd(pgd_p), -+ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); - - /* Randomize the locations */ - init_espfix_random(); -diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S -index 9e849b5..5775379 100644 ---- a/arch/x86/kernel/head_64.S -+++ b/arch/x86/kernel/head_64.S -@@ -406,11 +406,24 @@ GLOBAL(early_recursion_flag) - GLOBAL(name) - - #ifdef CONFIG_KAISER -+/* -+ * Each PGD needs to be 8k long and 8k aligned. We do not -+ * ever go out to userspace with these, so we do not -+ * strictly *need* the second page, but this allows us to -+ * have a single set_pgd() implementation that does not -+ * need to worry about whether it has 4k or 8k to work -+ * with. -+ * -+ * This ensures PGDs are 8k long: -+ */ -+#define KAISER_USER_PGD_FILL 512 -+/* This ensures they are 8k-aligned: */ - #define NEXT_PGD_PAGE(name) \ - .balign 2 * PAGE_SIZE; \ - GLOBAL(name) - #else - #define NEXT_PGD_PAGE(name) NEXT_PAGE(name) -+#define KAISER_USER_PGD_FILL 0 - #endif - - /* Automate the creation of 1 to 1 mapping pmd entries */ -@@ -425,6 +438,7 @@ GLOBAL(name) - NEXT_PGD_PAGE(early_level4_pgt) - .fill 511,8,0 - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE -+ .fill KAISER_USER_PGD_FILL,8,0 - - NEXT_PAGE(early_dynamic_pgts) - .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 -@@ -433,7 +447,8 @@ NEXT_PAGE(early_dynamic_pgts) - - #ifndef CONFIG_XEN - NEXT_PGD_PAGE(init_level4_pgt) -- .fill 2*512,8,0 -+ .fill 512,8,0 -+ .fill KAISER_USER_PGD_FILL,8,0 - #else - NEXT_PGD_PAGE(init_level4_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE -@@ -442,6 +457,7 @@ NEXT_PGD_PAGE(init_level4_pgt) - .org init_level4_pgt + L4_START_KERNEL*8, 0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE -+ .fill KAISER_USER_PGD_FILL,8,0 - - NEXT_PAGE(level3_ident_pgt) - .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE -@@ -452,6 +468,7 @@ NEXT_PAGE(level2_ident_pgt) - */ - PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) - #endif -+ .fill KAISER_USER_PGD_FILL,8,0 - - NEXT_PAGE(level3_kernel_pgt) - .fill L3_START_KERNEL,8,0 -diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c -index 6707039..3c2d55b 100644 ---- a/arch/x86/kernel/ldt.c -+++ b/arch/x86/kernel/ldt.c -@@ -17,6 +17,7 @@ - #include <linux/uaccess.h> - - #include <asm/ldt.h> -+#include <asm/kaiser.h> - #include <asm/desc.h> - #include <asm/mmu_context.h> - #include <asm/syscalls.h> -@@ -33,11 +34,21 @@ static void flush_ldt(void *current_mm) - set_ldt(pc->ldt->entries, pc->ldt->size); - } - -+static void __free_ldt_struct(struct ldt_struct *ldt) -+{ -+ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) -+ vfree(ldt->entries); -+ else -+ free_page((unsigned long)ldt->entries); -+ kfree(ldt); -+} -+ - /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ - static struct ldt_struct *alloc_ldt_struct(int size) - { - struct ldt_struct *new_ldt; - int alloc_size; -+ int ret = 0; - - if (size > LDT_ENTRIES) - return NULL; -@@ -65,6 +76,14 @@ static struct ldt_struct *alloc_ldt_struct(int size) - return NULL; - } - -+ // FIXME: make kaiser_add_mapping() return an error code -+ // when it fails -+ kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, -+ __PAGE_KERNEL); -+ if (ret) { -+ __free_ldt_struct(new_ldt); -+ return NULL; -+ } - new_ldt->size = size; - return new_ldt; - } -@@ -91,12 +110,10 @@ static void free_ldt_struct(struct ldt_struct *ldt) - if (likely(!ldt)) - return; - -+ kaiser_remove_mapping((unsigned long)ldt->entries, -+ ldt->size * LDT_ENTRY_SIZE); - paravirt_free_ldt(ldt->entries, ldt->size); -- if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) -- vfree(ldt->entries); -- else -- free_page((unsigned long)ldt->entries); -- kfree(ldt); -+ __free_ldt_struct(ldt); - } - - /* -diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c -index 1c113db..2bb5ee4 100644 ---- a/arch/x86/kernel/tracepoint.c -+++ b/arch/x86/kernel/tracepoint.c -@@ -9,10 +9,12 @@ - #include <linux/atomic.h> - - atomic_t trace_idt_ctr = ATOMIC_INIT(0); -+__aligned(PAGE_SIZE) - struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, - (unsigned long) trace_idt_table }; - - /* No need to be aligned, but done to keep all IDTs defined the same way. */ -+__aligned(PAGE_SIZE) - gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss; - - static int trace_irq_vector_refcount; -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index cf1bb92..7270a29 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -1,160 +1,305 @@ -- -- -+#include <linux/bug.h> - #include <linux/kernel.h> - #include <linux/errno.h> - #include <linux/string.h> - #include <linux/types.h> - #include <linux/bug.h> - #include <linux/init.h> -+#include <linux/interrupt.h> - #include <linux/spinlock.h> - #include <linux/mm.h> -- - #include <linux/uaccess.h> -+ -+#include <asm/kaiser.h> - #include <asm/pgtable.h> - #include <asm/pgalloc.h> - #include <asm/desc.h> - #ifdef CONFIG_KAISER - - __visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); -+/* -+ * At runtime, the only things we map are some things for CPU -+ * hotplug, and stacks for new processes. No two CPUs will ever -+ * be populating the same addresses, so we only need to ensure -+ * that we protect between two CPUs trying to allocate and -+ * populate the same page table page. -+ * -+ * Only take this lock when doing a set_p[4um]d(), but it is not -+ * needed for doing a set_pte(). We assume that only the *owner* -+ * of a given allocation will be doing this for _their_ -+ * allocation. -+ * -+ * This ensures that once a system has been running for a while -+ * and there have been stacks all over and these page tables -+ * are fully populated, there will be no further acquisitions of -+ * this lock. -+ */ -+static DEFINE_SPINLOCK(shadow_table_allocation_lock); - --/** -- * Get the real ppn from a address in kernel mapping. -- * @param address The virtual adrress -- * @return the physical address -+/* -+ * Returns -1 on error. - */ --static inline unsigned long get_pa_from_mapping (unsigned long address) -+static inline unsigned long get_pa_from_mapping(unsigned long vaddr) - { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - -- pgd = pgd_offset_k(address); -- BUG_ON(pgd_none(*pgd) || pgd_large(*pgd)); -- -- pud = pud_offset(pgd, address); -- BUG_ON(pud_none(*pud)); -+ pgd = pgd_offset_k(vaddr); -+ /* -+ * We made all the kernel PGDs present in kaiser_init(). -+ * We expect them to stay that way. -+ */ -+ BUG_ON(pgd_none(*pgd)); -+ /* -+ * PGDs are either 512GB or 128TB on all x86_64 -+ * configurations. We don't handle these. -+ */ -+ BUG_ON(pgd_large(*pgd)); - -- if (pud_large(*pud)) { -- return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK); -+ pud = pud_offset(pgd, vaddr); -+ if (pud_none(*pud)) { -+ WARN_ON_ONCE(1); -+ return -1; - } - -- pmd = pmd_offset(pud, address); -- BUG_ON(pmd_none(*pmd)); -+ if (pud_large(*pud)) -+ return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK); - -- if (pmd_large(*pmd)) { -- return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK); -+ pmd = pmd_offset(pud, vaddr); -+ if (pmd_none(*pmd)) { -+ WARN_ON_ONCE(1); -+ return -1; - } - -- pte = pte_offset_kernel(pmd, address); -- BUG_ON(pte_none(*pte)); -+ if (pmd_large(*pmd)) -+ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK); - -- return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK); -+ pte = pte_offset_kernel(pmd, vaddr); -+ if (pte_none(*pte)) { -+ WARN_ON_ONCE(1); -+ return -1; -+ } -+ -+ return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); - } - --void _kaiser_copy (unsigned long start_addr, unsigned long size, -- unsigned long flags) -+/* -+ * This is a relatively normal page table walk, except that it -+ * also tries to allocate page tables pages along the way. -+ * -+ * Returns a pointer to a PTE on success, or NULL on failure. -+ */ -+static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) - { -- pgd_t *pgd; -- pud_t *pud; - pmd_t *pmd; -- pte_t *pte; -- unsigned long address; -- unsigned long end_addr = start_addr + size; -- unsigned long target_address; -+ pud_t *pud; -+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); -+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - -- for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1)); -- address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) { -- target_address = get_pa_from_mapping(address); -+ might_sleep(); -+ if (is_atomic) { -+ gfp &= ~GFP_KERNEL; -+ gfp |= __GFP_HIGH | __GFP_ATOMIC; -+ } - -- pgd = native_get_shadow_pgd(pgd_offset_k(address)); -+ if (pgd_none(*pgd)) { -+ WARN_ONCE(1, "All shadow pgds should have been populated"); -+ return NULL; -+ } -+ BUILD_BUG_ON(pgd_large(*pgd) != 0); - -- BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n"); -- BUG_ON(pgd_large(*pgd)); -+ pud = pud_offset(pgd, address); -+ /* The shadow page tables do not use large mappings: */ -+ if (pud_large(*pud)) { -+ WARN_ON(1); -+ return NULL; -+ } -+ if (pud_none(*pud)) { -+ unsigned long new_pmd_page = __get_free_page(gfp); -+ if (!new_pmd_page) -+ return NULL; -+ spin_lock(&shadow_table_allocation_lock); -+ if (pud_none(*pud)) -+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); -+ else -+ free_page(new_pmd_page); -+ spin_unlock(&shadow_table_allocation_lock); -+ } - -- pud = pud_offset(pgd, address); -- if (pud_none(*pud)) { -- set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address)))); -- } -- BUG_ON(pud_large(*pud)); -+ pmd = pmd_offset(pud, address); -+ /* The shadow page tables do not use large mappings: */ -+ if (pmd_large(*pmd)) { -+ WARN_ON(1); -+ return NULL; -+ } -+ if (pmd_none(*pmd)) { -+ unsigned long new_pte_page = __get_free_page(gfp); -+ if (!new_pte_page) -+ return NULL; -+ spin_lock(&shadow_table_allocation_lock); -+ if (pmd_none(*pmd)) -+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); -+ else -+ free_page(new_pte_page); -+ spin_unlock(&shadow_table_allocation_lock); -+ } - -- pmd = pmd_offset(pud, address); -- if (pmd_none(*pmd)) { -- set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address)))); -- } -- BUG_ON(pmd_large(*pmd)); -+ return pte_offset_kernel(pmd, address); -+} - -- pte = pte_offset_kernel(pmd, address); -+int kaiser_add_user_map(const void *__start_addr, unsigned long size, -+ unsigned long flags) -+{ -+ int ret = 0; -+ pte_t *pte; -+ unsigned long start_addr = (unsigned long )__start_addr; -+ unsigned long address = start_addr & PAGE_MASK; -+ unsigned long end_addr = PAGE_ALIGN(start_addr + size); -+ unsigned long target_address; -+ -+ for (;address < end_addr; address += PAGE_SIZE) { -+ target_address = get_pa_from_mapping(address); -+ if (target_address == -1) { -+ ret = -EIO; -+ break; -+ } -+ pte = kaiser_pagetable_walk(address, false); - if (pte_none(*pte)) { - set_pte(pte, __pte(flags | target_address)); - } else { -- BUG_ON(__pa(pte_page(*pte)) != target_address); -+ pte_t tmp; -+ set_pte(&tmp, __pte(flags | target_address)); -+ WARN_ON_ONCE(!pte_same(*pte, tmp)); - } - } -+ return ret; -+} -+ -+static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags) -+{ -+ unsigned long size = end - start; -+ -+ return kaiser_add_user_map(start, size, flags); - } - --// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping --static inline void __init _kaiser_init(void) -+/* -+ * Ensure that the top level of the (shadow) page tables are -+ * entirely populated. This ensures that all processes that get -+ * forked have the same entries. This way, we do not have to -+ * ever go set up new entries in older processes. -+ * -+ * Note: we never free these, so there are no updates to them -+ * after this. -+ */ -+static void __init kaiser_init_all_pgds(void) - { - pgd_t *pgd; - int i = 0; - - pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); - for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { -- set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0)))); -+ pgd_t new_pgd; -+ pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); -+ if (!pud) { -+ WARN_ON(1); -+ break; -+ } -+ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); -+ /* -+ * Make sure not to stomp on some other pgd entry. -+ */ -+ if (!pgd_none(pgd[i])) { -+ WARN_ON(1); -+ continue; -+ } -+ set_pgd(pgd + i, new_pgd); - } - } - -+#define kaiser_add_user_map_early(start, size, flags) do { \ -+ int __ret = kaiser_add_user_map(start, size, flags); \ -+ WARN_ON(__ret); \ -+} while (0) -+ -+#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \ -+ int __ret = kaiser_add_user_map_ptrs(start, end, flags); \ -+ WARN_ON(__ret); \ -+} while (0) -+ - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; --spinlock_t shadow_table_lock; -+/* -+ * If anything in here fails, we will likely die on one of the -+ * first kernel->user transitions and init will die. But, we -+ * will have most of the kernel up by then and should be able to -+ * get a clean warning out of it. If we BUG_ON() here, we run -+ * the risk of being before we have good console output. -+ */ - void __init kaiser_init(void) - { - int cpu; -- spin_lock_init(&shadow_table_lock); -- -- spin_lock(&shadow_table_lock); - -- _kaiser_init(); -+ kaiser_init_all_pgds(); - - for_each_possible_cpu(cpu) { -- // map the per cpu user variables -- _kaiser_copy( -- (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)), -- (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start, -- __PAGE_KERNEL); -+ void *percpu_vaddr = __per_cpu_user_mapped_start + -+ per_cpu_offset(cpu); -+ unsigned long percpu_sz = __per_cpu_user_mapped_end - -+ __per_cpu_user_mapped_start; -+ kaiser_add_user_map_early(percpu_vaddr, percpu_sz, -+ __PAGE_KERNEL); - } - -- // map the entry/exit text section, which is responsible to switch between user- and kernel mode -- _kaiser_copy( -- (unsigned long) __entry_text_start, -- (unsigned long) __entry_text_end - (unsigned long) __entry_text_start, -- __PAGE_KERNEL_RX); -+ /* -+ * Map the entry/exit text section, which is needed at -+ * switches from user to and from kernel. -+ */ -+ kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end, -+ __PAGE_KERNEL_RX); - -- // the fixed map address of the idt_table -- _kaiser_copy( -- (unsigned long) idt_descr.address, -- sizeof(gate_desc) * NR_VECTORS, -- __PAGE_KERNEL_RO); -- -- spin_unlock(&shadow_table_lock); -+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) -+ kaiser_add_user_map_ptrs_early(__irqentry_text_start, -+ __irqentry_text_end, -+ __PAGE_KERNEL_RX); -+#endif -+ kaiser_add_user_map_early((void *)idt_descr.address, -+ sizeof(gate_desc) * NR_VECTORS, -+ __PAGE_KERNEL_RO); -+#ifdef CONFIG_TRACING -+ kaiser_add_user_map_early(&trace_idt_descr, -+ sizeof(trace_idt_descr), -+ __PAGE_KERNEL); -+ kaiser_add_user_map_early(&trace_idt_table, -+ sizeof(gate_desc) * NR_VECTORS, -+ __PAGE_KERNEL); -+#endif -+ kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr), -+ __PAGE_KERNEL); -+ kaiser_add_user_map_early(&debug_idt_table, -+ sizeof(gate_desc) * NR_VECTORS, -+ __PAGE_KERNEL); - } - -+extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end); - // add a mapping to the shadow-mapping, and synchronize the mappings --void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) -+int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) - { -- spin_lock(&shadow_table_lock); -- _kaiser_copy(addr, size, flags); -- spin_unlock(&shadow_table_lock); -+ return kaiser_add_user_map((const void *)addr, size, flags); - } - --extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end); - void kaiser_remove_mapping(unsigned long start, unsigned long size) - { -- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start)); -- spin_lock(&shadow_table_lock); -- do { -- unmap_pud_range(pgd, start, start + size); -- } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size))); -- spin_unlock(&shadow_table_lock); -+ unsigned long end = start + size; -+ unsigned long addr; -+ -+ for (addr = start; addr < end; addr += PGDIR_SIZE) { -+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); -+ /* -+ * unmap_p4d_range() handles > P4D_SIZE unmaps, -+ * so no need to trim 'end'. -+ */ -+ unmap_pud_range_nofree(pgd, addr, end); -+ } - } - #endif /* CONFIG_KAISER */ -diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c -index c17412f..73dcb0e1 100644 ---- a/arch/x86/mm/pageattr.c -+++ b/arch/x86/mm/pageattr.c -@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock); - #define CPA_FLUSHTLB 1 - #define CPA_ARRAY 2 - #define CPA_PAGES_ARRAY 4 -+#define CPA_FREE_PAGETABLES 8 - - #ifdef CONFIG_PROC_FS - static unsigned long direct_pages_count[PG_LEVEL_NUM]; -@@ -729,10 +730,13 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, - return 0; - } - --static bool try_to_free_pte_page(pte_t *pte) -+static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte) - { - int i; - -+ if (!(cpa->flags & CPA_FREE_PAGETABLES)) -+ return false; -+ - for (i = 0; i < PTRS_PER_PTE; i++) - if (!pte_none(pte[i])) - return false; -@@ -741,10 +745,13 @@ static bool try_to_free_pte_page(pte_t *pte) - return true; - } - --static bool try_to_free_pmd_page(pmd_t *pmd) -+static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd) - { - int i; - -+ if (!(cpa->flags & CPA_FREE_PAGETABLES)) -+ return false; -+ - for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_none(pmd[i])) - return false; -@@ -753,7 +760,9 @@ static bool try_to_free_pmd_page(pmd_t *pmd) - return true; - } - --static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) -+static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd, -+ unsigned long start, -+ unsigned long end) - { - pte_t *pte = pte_offset_kernel(pmd, start); - -@@ -764,22 +773,23 @@ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) - pte++; - } - -- if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { -+ if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) { - pmd_clear(pmd); - return true; - } - return false; - } - --static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, -+static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd, - unsigned long start, unsigned long end) - { -- if (unmap_pte_range(pmd, start, end)) -- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) -+ if (unmap_pte_range(cpa, pmd, start, end)) -+ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) - pud_clear(pud); - } - --static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) -+static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, -+ unsigned long start, unsigned long end) - { - pmd_t *pmd = pmd_offset(pud, start); - -@@ -790,7 +800,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) - unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; - unsigned long pre_end = min_t(unsigned long, end, next_page); - -- __unmap_pmd_range(pud, pmd, start, pre_end); -+ __unmap_pmd_range(cpa, pud, pmd, start, pre_end); - - start = pre_end; - pmd++; -@@ -803,7 +813,8 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) - if (pmd_large(*pmd)) - pmd_clear(pmd); - else -- __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); -+ __unmap_pmd_range(cpa, pud, pmd, -+ start, start + PMD_SIZE); - - start += PMD_SIZE; - pmd++; -@@ -813,17 +824,19 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) - * 4K leftovers? - */ - if (start < end) -- return __unmap_pmd_range(pud, pmd, start, end); -+ return __unmap_pmd_range(cpa, pud, pmd, start, end); - - /* - * Try again to free the PMD page if haven't succeeded above. - */ - if (!pud_none(*pud)) -- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) -+ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud))) - pud_clear(pud); - } - --void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) -+static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd, -+ unsigned long start, -+ unsigned long end) - { - pud_t *pud = pud_offset(pgd, start); - -@@ -834,7 +847,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) - unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; - unsigned long pre_end = min_t(unsigned long, end, next_page); - -- unmap_pmd_range(pud, start, pre_end); -+ unmap_pmd_range(cpa, pud, start, pre_end); - - start = pre_end; - pud++; -@@ -848,7 +861,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) - if (pud_large(*pud)) - pud_clear(pud); - else -- unmap_pmd_range(pud, start, start + PUD_SIZE); -+ unmap_pmd_range(cpa, pud, start, start + PUD_SIZE); - - start += PUD_SIZE; - pud++; -@@ -858,7 +871,7 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) - * 2M leftovers? - */ - if (start < end) -- unmap_pmd_range(pud, start, end); -+ unmap_pmd_range(cpa, pud, start, end); - - /* - * No need to try to free the PUD page because we'll free it in -@@ -866,6 +879,24 @@ void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) - */ - } - -+static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) -+{ -+ struct cpa_data cpa = { -+ .flags = CPA_FREE_PAGETABLES, -+ }; -+ -+ __unmap_pud_range(&cpa, pgd, start, end); -+} -+ -+void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end) -+{ -+ struct cpa_data cpa = { -+ .flags = 0, -+ }; -+ -+ __unmap_pud_range(&cpa, pgd, start, end); -+} -+ - static int alloc_pte_page(pmd_t *pmd) - { - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); -diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index 27d218b..352fd01 100644 ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -344,40 +344,26 @@ static inline void _pgd_free(pgd_t *pgd) - kmem_cache_free(pgd_cache, pgd); - } - #else --static inline pgd_t *_pgd_alloc(void) --{ -+ - #ifdef CONFIG_KAISER -- // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory -- // block. Therefore, we have to allocate at least 3 pages. However, the -- // __get_free_pages returns us 4 pages. Hence, we store the base pointer at -- // the beginning of the page of our 8kb-aligned memory block in order to -- // correctly free it afterwars. -- -- unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE)); -- -- if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages) -- { -- *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages; -- return (pgd_t *) pages; -- } -- else -- { -- *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages; -- return (pgd_t *) (pages + PAGE_SIZE); -- } -+/* -+ * Instead of one pmd, we aquire two pmds. Being order-1, it is -+ * both 8k in size and 8k-aligned. That lets us just flip bit 12 -+ * in a pointer to swap between the two 4k halves. -+ */ -+#define PGD_ALLOCATION_ORDER 1 - #else -- return (pgd_t *)__get_free_page(PGALLOC_GFP); -+#define PGD_ALLOCATION_ORDER 0 - #endif -+ -+static inline pgd_t *_pgd_alloc(void) -+{ -+ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); - } - - static inline void _pgd_free(pgd_t *pgd) - { --#ifdef CONFIG_KAISER -- unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE)); -- free_pages(pages, get_order(4*PAGE_SIZE)); --#else -- free_page((unsigned long)pgd); --#endif -+ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); - } - #endif /* CONFIG_X86_PAE */ - -diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h -new file mode 100644 -index 0000000..9db5433 ---- /dev/null -+++ b/include/linux/kaiser.h -@@ -0,0 +1,26 @@ -+#ifndef _INCLUDE_KAISER_H -+#define _INCLUDE_KAISER_H -+ -+#ifdef CONFIG_KAISER -+#include <asm/kaiser.h> -+#else -+ -+/* -+ * These stubs are used whenever CONFIG_KAISER is off, which -+ * includes architectures that support KAISER, but have it -+ * disabled. -+ */ -+ -+static inline void kaiser_init(void) -+{ -+} -+static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) -+{ -+} -+static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) -+{ -+ return 0; -+} -+ -+#endif /* !CONFIG_KAISER */ -+#endif /* _INCLUDE_KAISER_H */ -diff --git a/kernel/fork.c b/kernel/fork.c -index 61748d1..7ba50f1 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -58,6 +58,7 @@ - #include <linux/tsacct_kern.h> - #include <linux/cn_proc.h> - #include <linux/freezer.h> -+#include <linux/kaiser.h> - #include <linux/delayacct.h> - #include <linux/taskstats_kern.h> - #include <linux/random.h> -@@ -472,7 +473,6 @@ void set_task_stack_end_magic(struct task_struct *tsk) - *stackend = STACK_END_MAGIC; /* for overflow detection */ - } - --extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); - static struct task_struct *dup_task_struct(struct task_struct *orig, int node) - { - struct task_struct *tsk; -@@ -500,9 +500,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) - * functions again. - */ - tsk->stack = stack; --#ifdef CONFIG_KAISER -- kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); --#endif -+ -+ err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); -+ if (err) -+ goto free_stack; - #ifdef CONFIG_VMAP_STACK - tsk->stack_vm_area = stack_vm_area; - #endif -diff --git a/security/Kconfig b/security/Kconfig -index f515ac3..334d2e8 100644 ---- a/security/Kconfig -+++ b/security/Kconfig -@@ -32,12 +32,17 @@ config SECURITY - If you are unsure how to answer this question, answer N. - config KAISER - bool "Remove the kernel mapping in user mode" -+ default y - depends on X86_64 - depends on !PARAVIRT - help - This enforces a strict kernel and user space isolation in order to close - hardware side channels on kernel address information. - -+config KAISER_REAL_SWITCH -+ bool "KAISER: actually switch page tables" -+ default y -+ - config SECURITYFS - bool "Enable the securityfs filesystem" - help --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch deleted file mode 100644 index 5cff1af9..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 45e0a2316524254692219fce805e247dc8dadb20 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Wed, 6 Jun 2018 17:38:09 +0200 -Subject: [PATCH 05/10] kvm: x86: use correct privilege level for - sgdt/sidt/fxsave/fxrstor access - -commit 3c9fa24ca7c9c47605672916491f79e8ccacb9e6 upstream. - -The functions that were used in the emulation of fxrstor, fxsave, sgdt and -sidt were originally meant for task switching, and as such they did not -check privilege levels. This is very bad when the same functions are used -in the emulation of unprivileged instructions. This is CVE-2018-10853. - -The obvious fix is to add a new argument to ops->read_std and ops->write_std, -which decides whether the access is a "system" access or should use the -processor's CPL. - -Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12) -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/kvm_emulate.h | 6 ++++-- - arch/x86/kvm/emulate.c | 12 ++++++------ - arch/x86/kvm/x86.c | 18 ++++++++++++++---- - 3 files changed, 24 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h -index e9cd7be..0b7d332 100644 ---- a/arch/x86/include/asm/kvm_emulate.h -+++ b/arch/x86/include/asm/kvm_emulate.h -@@ -105,11 +105,12 @@ struct x86_emulate_ops { - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. -+ * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*read_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, - unsigned int bytes, -- struct x86_exception *fault); -+ struct x86_exception *fault, bool system); - - /* - * read_phys: Read bytes of standard (non-emulated/special) memory. -@@ -127,10 +128,11 @@ struct x86_emulate_ops { - * @addr: [IN ] Linear address to which to write. - * @val: [OUT] Value write to memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to write to memory. -+ * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*write_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, -- struct x86_exception *fault); -+ struct x86_exception *fault, bool system); - /* - * fetch: Read bytes of standard (non-emulated/special) memory. - * Used for instruction fetch. -diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c -index b6ec3e9..1e96a5a 100644 ---- a/arch/x86/kvm/emulate.c -+++ b/arch/x86/kvm/emulate.c -@@ -805,14 +805,14 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) - static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear, - void *data, unsigned size) - { -- return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); -+ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true); - } - - static int linear_write_system(struct x86_emulate_ctxt *ctxt, - ulong linear, void *data, - unsigned int size) - { -- return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); -+ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true); - } - - static int segmented_read_std(struct x86_emulate_ctxt *ctxt, -@@ -826,7 +826,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, - rc = linearize(ctxt, addr, size, false, &linear); - if (rc != X86EMUL_CONTINUE) - return rc; -- return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); -+ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false); - } - - static int segmented_write_std(struct x86_emulate_ctxt *ctxt, -@@ -840,7 +840,7 @@ static int segmented_write_std(struct x86_emulate_ctxt *ctxt, - rc = linearize(ctxt, addr, size, true, &linear); - if (rc != X86EMUL_CONTINUE) - return rc; -- return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); -+ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false); - } - - /* -@@ -2893,12 +2893,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, - #ifdef CONFIG_X86_64 - base |= ((u64)base3) << 32; - #endif -- r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); -+ r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true); - if (r != X86EMUL_CONTINUE) - return false; - if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) - return false; -- r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); -+ r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true); - if (r != X86EMUL_CONTINUE) - return false; - if ((perm >> bit_idx) & mask) -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index af8e120..2c4d91e 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -4383,10 +4383,15 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_virt); - - static int emulator_read_std(struct x86_emulate_ctxt *ctxt, - gva_t addr, void *val, unsigned int bytes, -- struct x86_exception *exception) -+ struct x86_exception *exception, bool system) - { - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); -- return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); -+ u32 access = 0; -+ -+ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3) -+ access |= PFERR_USER_MASK; -+ -+ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); - } - - static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, -@@ -4430,12 +4435,17 @@ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes - } - - static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, -- unsigned int bytes, struct x86_exception *exception) -+ unsigned int bytes, struct x86_exception *exception, -+ bool system) - { - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); -+ u32 access = PFERR_WRITE_MASK; -+ -+ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3) -+ access |= PFERR_USER_MASK; - - return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, -- PFERR_WRITE_MASK, exception); -+ access, exception); - } - - int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch deleted file mode 100644 index 1e33e521..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch +++ /dev/null @@ -1,39 +0,0 @@ -From a3032e35007a8178f448e471acb6bc6c972c087a Mon Sep 17 00:00:00 2001 -From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> -Date: Mon, 5 Mar 2018 19:25:51 +0300 -Subject: [PATCH 05/93] x86/cpufeatures: Add Intel PCONFIG cpufeature - -commit 7958b2246fadf54b7ff820a2a5a2c5ca1554716f upstream. - -CPUID.0x7.0x0:EDX[18] indicates whether Intel CPU support PCONFIG instruction. - -Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kai Huang <kai.huang@linux.intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: linux-mm@kvack.org -Link: http://lkml.kernel.org/r/20180305162610.37510-4-kirill.shutemov@linux.intel.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index ed7a1d2..a248531 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -302,6 +302,7 @@ - /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ - #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -+#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ - #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ - #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ - #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch deleted file mode 100644 index bbb98553..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 56f0eb24f5e9ff1faf0818a928a6c4a1004aeef1 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Sun, 18 Dec 2016 17:44:13 +0100 -Subject: [PATCH 05/42] x86/microcode/AMD: Do not load when running on a - hypervisor - -commit a15a753539eca8ba243d576f02e7ca9c4b7d7042 upstream. - -Doing so is completely void of sense for multiple reasons so prevent -it. Set dis_ucode_ldr to true and thus disable the microcode loader by -default to address xen pv guests which execute the AP path but not the -BSP path. - -By having it turned off by default, the APs won't run into the loader -either. - -Also, check CPUID(1).ECX[31] which hypervisors set. Well almost, not the -xen pv one. That one gets the aforementioned "fix". - -Also, improve the detection method by caching the final decision whether -to continue loading in dis_ucode_ldr and do it once on the BSP. The APs -then simply test that value. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Tested-by: Juergen Gross <jgross@suse.com> -Tested-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Acked-by: Juergen Gross <jgross@suse.com> -Link: http://lkml.kernel.org/r/20161218164414.9649-4-bp@alien8.de -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Rolf Neugebauer <rolf.neugebauer@docker.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/microcode/core.c | 28 +++++++++++++++++++--------- - 1 file changed, 19 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c -index 5ce5155..dc0b9f8 100644 ---- a/arch/x86/kernel/cpu/microcode/core.c -+++ b/arch/x86/kernel/cpu/microcode/core.c -@@ -43,7 +43,7 @@ - #define MICROCODE_VERSION "2.01" - - static struct microcode_ops *microcode_ops; --static bool dis_ucode_ldr; -+static bool dis_ucode_ldr = true; - - /* - * Synchronization. -@@ -73,6 +73,7 @@ struct cpu_info_ctx { - static bool __init check_loader_disabled_bsp(void) - { - static const char *__dis_opt_str = "dis_ucode_ldr"; -+ u32 a, b, c, d; - - #ifdef CONFIG_X86_32 - const char *cmdline = (const char *)__pa_nodebug(boot_command_line); -@@ -85,8 +86,23 @@ static bool __init check_loader_disabled_bsp(void) - bool *res = &dis_ucode_ldr; - #endif - -- if (cmdline_find_option_bool(cmdline, option)) -- *res = true; -+ if (!have_cpuid_p()) -+ return *res; -+ -+ a = 1; -+ c = 0; -+ native_cpuid(&a, &b, &c, &d); -+ -+ /* -+ * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not -+ * completely accurate as xen pv guests don't see that CPUID bit set but -+ * that's good enough as they don't land on the BSP path anyway. -+ */ -+ if (c & BIT(31)) -+ return *res; -+ -+ if (cmdline_find_option_bool(cmdline, option) <= 0) -+ *res = false; - - return *res; - } -@@ -118,9 +134,6 @@ void __init load_ucode_bsp(void) - if (check_loader_disabled_bsp()) - return; - -- if (!have_cpuid_p()) -- return; -- - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); - -@@ -154,9 +167,6 @@ void load_ucode_ap(void) - if (check_loader_disabled_ap()) - return; - -- if (!have_cpuid_p()) -- return; -- - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch deleted file mode 100644 index b21b0f41..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 9c30656e4da86d6c69ad832ed9cb3e549b939566 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Thu, 29 Jun 2017 08:53:15 -0700 -Subject: [PATCH 05/14] x86/mm: Give each mm TLB flush generation a unique ID - -commit f39681ed0f48498b80455095376f11535feea332 upstream. - -This adds two new variables to mmu_context_t: ctx_id and tlb_gen. -ctx_id uniquely identifies the mm_struct and will never be reused. -For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic -count of the number of times that a TLB flush has been requested. -The pair (ctx_id, tlb_gen) can be used as an identifier for TLB -flush actions and will be used in subsequent patches to reliably -determine whether all needed TLB flushes have occurred on a given -CPU. - -This patch is split out for ease of review. By itself, it has no -real effect other than creating and updating the new variables. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Reviewed-by: Nadav Amit <nadav.amit@gmail.com> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Mel Gorman <mgorman@suse.de> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: linux-mm@kvack.org -Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/mmu.h | 15 +++++++++++++-- - arch/x86/include/asm/mmu_context.h | 5 +++++ - arch/x86/mm/tlb.c | 2 ++ - 3 files changed, 20 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h -index 8b272a0..e2e0934 100644 ---- a/arch/x86/include/asm/mmu.h -+++ b/arch/x86/include/asm/mmu.h -@@ -3,12 +3,18 @@ - - #include <linux/spinlock.h> - #include <linux/mutex.h> -+#include <linux/atomic.h> - - /* -- * The x86 doesn't have a mmu context, but -- * we put the segment information here. -+ * x86 has arch-specific MMU state beyond what lives in mm_struct. - */ - typedef struct { -+ /* -+ * ctx_id uniquely identifies this mm_struct. A ctx_id will never -+ * be reused, and zero is not a valid ctx_id. -+ */ -+ u64 ctx_id; -+ - #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - #endif -@@ -33,6 +39,11 @@ typedef struct { - #endif - } mm_context_t; - -+#define INIT_MM_CONTEXT(mm) \ -+ .context = { \ -+ .ctx_id = 1, \ -+ } -+ - void leave_mm(int cpu); - - #endif /* _ASM_X86_MMU_H */ -diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h -index 762d6c6..1ed17c92 100644 ---- a/arch/x86/include/asm/mmu_context.h -+++ b/arch/x86/include/asm/mmu_context.h -@@ -12,6 +12,9 @@ - #include <asm/tlbflush.h> - #include <asm/paravirt.h> - #include <asm/mpx.h> -+ -+extern atomic64_t last_mm_ctx_id; -+ - #ifndef CONFIG_PARAVIRT - static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) -@@ -106,6 +109,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) - static inline int init_new_context(struct task_struct *tsk, - struct mm_struct *mm) - { -+ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); -+ - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { - /* pkey 0 is the default and always allocated */ -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 613d07e..146e842 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -29,6 +29,8 @@ - * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi - */ - -+atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); -+ - struct flush_tlb_info { - struct mm_struct *flush_mm; - unsigned long flush_start; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-speculation-Clean-up-various-Spectre-related-det.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-speculation-Clean-up-various-Spectre-related-det.patch deleted file mode 100644 index e6531584..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-speculation-Clean-up-various-Spectre-related-det.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 891112052277801e900b37496ca8c260a5e7e7e1 Mon Sep 17 00:00:00 2001 -From: Ingo Molnar <mingo@kernel.org> -Date: Tue, 13 Feb 2018 09:03:08 +0100 -Subject: [PATCH 05/12] x86/speculation: Clean up various Spectre related - details - -commit 21e433bdb95bdf3aa48226fd3d33af608437f293 upstream. - -Harmonize all the Spectre messages so that a: - - dmesg | grep -i spectre - -... gives us most Spectre related kernel boot messages. - -Also fix a few other details: - - - clarify a comment about firmware speculation control - - - s/KPTI/PTI - - - remove various line-breaks that made the code uglier - -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-kernel@vger.kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 28 +++++++++++----------------- - 1 file changed, 11 insertions(+), 17 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 957ad44..b83e0c9 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -161,8 +161,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) - return SPECTRE_V2_CMD_NONE; - else { -- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, -- sizeof(arg)); -+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - -@@ -174,8 +173,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) - } - - if (i >= ARRAY_SIZE(mitigation_options)) { -- pr_err("unknown option (%s). Switching to AUTO select\n", -- mitigation_options[i].option); -+ pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); - return SPECTRE_V2_CMD_AUTO; - } - } -@@ -184,8 +182,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) - cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || - cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && - !IS_ENABLED(CONFIG_RETPOLINE)) { -- pr_err("%s selected but not compiled in. Switching to AUTO select\n", -- mitigation_options[i].option); -+ pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); - return SPECTRE_V2_CMD_AUTO; - } - -@@ -255,14 +252,14 @@ static void __init spectre_v2_select_mitigation(void) - goto retpoline_auto; - break; - } -- pr_err("kernel not compiled with retpoline; no mitigation available!"); -+ pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); - return; - - retpoline_auto: - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - retpoline_amd: - if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { -- pr_err("LFENCE not serializing. Switching to generic retpoline\n"); -+ pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); - goto retpoline_generic; - } - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : -@@ -280,7 +277,7 @@ static void __init spectre_v2_select_mitigation(void) - pr_info("%s\n", spectre_v2_strings[mode]); - - /* -- * If neither SMEP or KPTI are available, there is a risk of -+ * If neither SMEP nor PTI are available, there is a risk of - * hitting userspace addresses in the RSB after a context switch - * from a shallow call stack to a deeper one. To prevent this fill - * the entire RSB, even when using IBRS. -@@ -294,21 +291,20 @@ static void __init spectre_v2_select_mitigation(void) - if ((!boot_cpu_has(X86_FEATURE_KAISER) && - !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); -- pr_info("Filling RSB on context switch\n"); -+ pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); - } - - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); -- pr_info("Enabling Indirect Branch Prediction Barrier\n"); -+ pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - } - - #undef pr_fmt - - #ifdef CONFIG_SYSFS --ssize_t cpu_show_meltdown(struct device *dev, -- struct device_attribute *attr, char *buf) -+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) - { - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) - return sprintf(buf, "Not affected\n"); -@@ -317,16 +313,14 @@ ssize_t cpu_show_meltdown(struct device *dev, - return sprintf(buf, "Vulnerable\n"); - } - --ssize_t cpu_show_spectre_v1(struct device *dev, -- struct device_attribute *attr, char *buf) -+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) - { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); - } - --ssize_t cpu_show_spectre_v2(struct device *dev, -- struct device_attribute *attr, char *buf) -+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) - { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch deleted file mode 100644 index 9430b597..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 399e9dee4411858aa4eb8894f031ff68ab3b5e9f Mon Sep 17 00:00:00 2001 -From: Wanpeng Li <wanpeng.li@hotmail.com> -Date: Sun, 5 Nov 2017 16:54:47 -0800 -Subject: [PATCH 06/33] KVM: X86: Fix operand/address-size during instruction - decoding -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 3853be2603191829b442b64dac6ae8ba0c027bf9 ] - -Pedro reported: - During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" - instruction under KVM produces different results on both memory and the SP - register depending on whether EPT support is enabled. With EPT the SP is - reduced by 4 bytes (and the written value is 0-padded) but without EPT support - it is only reduced by 2 bytes. The difference can be observed when the CS.DB - field is 1 (32-bit) but not when it's 0 (16-bit). - -The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D -also should be respected instead of just default operand/address-size/66H -prefix/67H prefix during instruction decoding. This patch fixes it by also -adjusting operand/address-size according to CS.D. - -Reported-by: Pedro Fonseca <pfonseca@cs.washington.edu> -Tested-by: Pedro Fonseca <pfonseca@cs.washington.edu> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: Nadav Amit <nadav.amit@gmail.com> -Cc: Pedro Fonseca <pfonseca@cs.washington.edu> -Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> -Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/emulate.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c -index 9f676ad..9984daf 100644 ---- a/arch/x86/kvm/emulate.c -+++ b/arch/x86/kvm/emulate.c -@@ -4971,6 +4971,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) - bool op_prefix = false; - bool has_seg_override = false; - struct opcode opcode; -+ u16 dummy; -+ struct desc_struct desc; - - ctxt->memop.type = OP_NONE; - ctxt->memopp = NULL; -@@ -4989,6 +4991,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) - switch (mode) { - case X86EMUL_MODE_REAL: - case X86EMUL_MODE_VM86: -+ def_op_bytes = def_ad_bytes = 2; -+ ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); -+ if (desc.d) -+ def_op_bytes = def_ad_bytes = 4; -+ break; - case X86EMUL_MODE_PROT16: - def_op_bytes = def_ad_bytes = 2; - break; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch deleted file mode 100644 index 973bd7f6..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 495d2eaaa7862a3ad27140ad0876ae931ddd5e80 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Tue, 5 Sep 2017 12:05:01 -0700 -Subject: [PATCH 006/103] kaiser: do not set _PAGE_NX on pgd_none - -native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must -avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry: -usually that just generated a warning on exit, but sometimes -more mysterious and damaging failures (our production machines -could not complete booting). - -The original fix to this just avoided adding _PAGE_NX to -an empty entry; but eventually more problems surfaced with kexec, -and EFI mapping expected to be a problem too. So now instead -change native_set_pgd() to update shadow only if _PAGE_USER: - -A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure) -use set_pgd() to set up a temporary internal virtual address space, with -physical pages remapped at what Kaiser regards as userspace addresses: -Kaiser then assumes a shadow pgd follows, which it will try to corrupt. - -This appears to be responsible for the recent kexec and kdump failures; -though it's unclear how those did not manifest as a problem before. -Ah, the shadow pgd will only be assumed to "follow" if the requested -pgd is on an even-numbered page: so I suppose it was going wrong 50% -of the time all along. - -What we need is a flag to set_pgd(), to tell it we're dealing with -userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying? -Add a test for that. But we cannot do the same for pgd_clear() -(which may be called to clear corrupted entries - set aside the -question of "corrupt in which pgd?" until later), so there just -rely on pgd_clear() not being called in the problematic cases - -with a WARN_ON_ONCE() which should fire half the time if it is. - -But this is getting too big for an inline function: move it into -arch/x86/mm/kaiser.c (which then demands a boot/compressed mod); -and de-void and de-space native_get_shadow/normal_pgd() while here. - -Also make an unnecessary change to KASLR's init_trampoline(): it was -using set_pgd() to assign a pgd-value to a global variable (not in a -pg directory page), which was rather scary given Kaiser's previous -set_pgd() implementation: not a problem now, but too scary to leave -as was, it could easily blow up if we have to change set_pgd() again. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/boot/compressed/misc.h | 1 + - arch/x86/include/asm/pgtable_64.h | 51 ++++++++++----------------------------- - arch/x86/mm/kaiser.c | 42 ++++++++++++++++++++++++++++++++ - arch/x86/mm/kaslr.c | 4 +-- - 4 files changed, 58 insertions(+), 40 deletions(-) - -diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h -index 1c8355e..cd80024 100644 ---- a/arch/x86/boot/compressed/misc.h -+++ b/arch/x86/boot/compressed/misc.h -@@ -9,6 +9,7 @@ - */ - #undef CONFIG_PARAVIRT - #undef CONFIG_PARAVIRT_SPINLOCKS -+#undef CONFIG_KAISER - #undef CONFIG_KASAN - - #include <linux/linkage.h> -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index 000265c..177caf3 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_t *pud) - } - - #ifdef CONFIG_KAISER --static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) -+extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); -+ -+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) - { -- return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); -+ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); - } - --static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) -+static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) - { -- return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); -+ return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); - } - #else --static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) -+static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) -+{ -+ return pgd; -+} -+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) - { - BUILD_BUG_ON(1); - return NULL; - } --static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) -+static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) - { - return pgdp; - } - #endif /* CONFIG_KAISER */ - --/* -- * Page table pages are page-aligned. The lower half of the top -- * level is used for userspace and the top half for the kernel. -- * This returns true for user pages that need to get copied into -- * both the user and kernel copies of the page tables, and false -- * for kernel pages that should only be in the kernel copy. -- */ --static inline bool is_userspace_pgd(void *__ptr) --{ -- unsigned long ptr = (unsigned long)__ptr; -- -- return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); --} -- - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) - { --#ifdef CONFIG_KAISER -- pteval_t extra_kern_pgd_flags = 0; -- /* Do we need to also populate the shadow pgd? */ -- if (is_userspace_pgd(pgdp)) { -- native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; -- /* -- * Even if the entry is *mapping* userspace, ensure -- * that userspace can not use it. This way, if we -- * get out to userspace running on the kernel CR3, -- * userspace will crash instead of running. -- */ -- extra_kern_pgd_flags = _PAGE_NX; -- } -- pgdp->pgd = pgd.pgd; -- pgdp->pgd |= extra_kern_pgd_flags; --#else /* CONFIG_KAISER */ -- *pgdp = pgd; --#endif -+ *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); - } - - static inline void native_pgd_clear(pgd_t *pgd) -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 7270a29..8d6061c 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -302,4 +302,46 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) - unmap_pud_range_nofree(pgd, addr, end); - } - } -+ -+/* -+ * Page table pages are page-aligned. The lower half of the top -+ * level is used for userspace and the top half for the kernel. -+ * This returns true for user pages that need to get copied into -+ * both the user and kernel copies of the page tables, and false -+ * for kernel pages that should only be in the kernel copy. -+ */ -+static inline bool is_userspace_pgd(pgd_t *pgdp) -+{ -+ return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2); -+} -+ -+pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) -+{ -+ /* -+ * Do we need to also populate the shadow pgd? Check _PAGE_USER to -+ * skip cases like kexec and EFI which make temporary low mappings. -+ */ -+ if (pgd.pgd & _PAGE_USER) { -+ if (is_userspace_pgd(pgdp)) { -+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; -+ /* -+ * Even if the entry is *mapping* userspace, ensure -+ * that userspace can not use it. This way, if we -+ * get out to userspace running on the kernel CR3, -+ * userspace will crash instead of running. -+ */ -+ pgd.pgd |= _PAGE_NX; -+ } -+ } else if (!pgd.pgd) { -+ /* -+ * pgd_clear() cannot check _PAGE_USER, and is even used to -+ * clear corrupted pgd entries: so just rely on cases like -+ * kexec and EFI never to be using pgd_clear(). -+ */ -+ if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) && -+ is_userspace_pgd(pgdp)) -+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; -+ } -+ return pgd; -+} - #endif /* CONFIG_KAISER */ -diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c -index aed2064..9284ec1 100644 ---- a/arch/x86/mm/kaslr.c -+++ b/arch/x86/mm/kaslr.c -@@ -189,6 +189,6 @@ void __meminit init_trampoline(void) - *pud_tramp = *pud; - } - -- set_pgd(&trampoline_pgd_entry, -- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); -+ /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */ -+ trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch deleted file mode 100644 index e91992c0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch +++ /dev/null @@ -1,63 +0,0 @@ -From bd9bf4f96e31d86d230db1f5243608f3a500123d Mon Sep 17 00:00:00 2001 -From: Waiman Long <longman@redhat.com> -Date: Mon, 22 Jan 2018 17:09:34 -0500 -Subject: [PATCH 06/42] x86/retpoline: Remove the esp/rsp thunk - -(cherry picked from commit 1df37383a8aeabb9b418698f0bcdffea01f4b1b2) - -It doesn't make sense to have an indirect call thunk with esp/rsp as -retpoline code won't work correctly with the stack pointer register. -Removing it will help compiler writers to catch error in case such -a thunk call is emitted incorrectly. - -Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") -Suggested-by: Jeff Law <law@redhat.com> -Signed-off-by: Waiman Long <longman@redhat.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: Kees Cook <keescook@google.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1516658974-27852-1-git-send-email-longman@redhat.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/asm-prototypes.h | 1 - - arch/x86/lib/retpoline.S | 1 - - 2 files changed, 2 deletions(-) - -diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h -index b15aa40..5a25ada 100644 ---- a/arch/x86/include/asm/asm-prototypes.h -+++ b/arch/x86/include/asm/asm-prototypes.h -@@ -37,5 +37,4 @@ INDIRECT_THUNK(dx) - INDIRECT_THUNK(si) - INDIRECT_THUNK(di) - INDIRECT_THUNK(bp) --INDIRECT_THUNK(sp) - #endif /* CONFIG_RETPOLINE */ -diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S -index dfb2ba9..c909961 100644 ---- a/arch/x86/lib/retpoline.S -+++ b/arch/x86/lib/retpoline.S -@@ -36,7 +36,6 @@ GENERATE_THUNK(_ASM_DX) - GENERATE_THUNK(_ASM_SI) - GENERATE_THUNK(_ASM_DI) - GENERATE_THUNK(_ASM_BP) --GENERATE_THUNK(_ASM_SP) - #ifdef CONFIG_64BIT - GENERATE_THUNK(r8) - GENERATE_THUNK(r9) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch deleted file mode 100644 index a8632983..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch +++ /dev/null @@ -1,84 +0,0 @@ -From d98751217028054a791c98512d1ed81d406f55da Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Thu, 7 Jun 2018 09:13:48 -0700 -Subject: [PATCH 06/10] x86/spectre_v1: Disable compiler optimizations over - array_index_mask_nospec() - -commit eab6870fee877258122a042bfd99ee7908c40280 upstream. - -Mark Rutland noticed that GCC optimization passes have the potential to elide -necessary invocations of the array_index_mask_nospec() instruction sequence, -so mark the asm() volatile. - -Mark explains: - -"The volatile will inhibit *some* cases where the compiler could lift the - array_index_nospec() call out of a branch, e.g. where there are multiple - invocations of array_index_nospec() with the same arguments: - - if (idx < foo) { - idx1 = array_idx_nospec(idx, foo) - do_something(idx1); - } - - < some other code > - - if (idx < foo) { - idx2 = array_idx_nospec(idx, foo); - do_something_else(idx2); - } - - ... since the compiler can determine that the two invocations yield the same - result, and reuse the first result (likely the same register as idx was in - originally) for the second branch, effectively re-writing the above as: - - if (idx < foo) { - idx = array_idx_nospec(idx, foo); - do_something(idx); - } - - < some other code > - - if (idx < foo) { - do_something_else(idx); - } - - ... if we don't take the first branch, then speculatively take the second, we - lose the nospec protection. - - There's more info on volatile asm in the GCC docs: - - https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile - " - -Reported-by: Mark Rutland <mark.rutland@arm.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Acked-by: Mark Rutland <mark.rutland@arm.com> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Linus Torvalds <torvalds@linux-foundation.org> -Cc: <stable@vger.kernel.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Fixes: babdde2698d4 ("x86: Implement array_index_mask_nospec") -Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/barrier.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h -index 78d1c6a..eb53c2c 100644 ---- a/arch/x86/include/asm/barrier.h -+++ b/arch/x86/include/asm/barrier.h -@@ -37,7 +37,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - { - unsigned long mask; - -- asm ("cmp %1,%2; sbb %0,%0;" -+ asm volatile ("cmp %1,%2; sbb %0,%0;" - :"=r" (mask) - :"g"(size),"r" (index) - :"cc"); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch deleted file mode 100644 index 8f996720..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch +++ /dev/null @@ -1,39 +0,0 @@ -From eeedd09281a09c8f0470c638939a5121ca753461 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Tue, 6 Feb 2018 18:22:40 -0800 -Subject: [PATCH 06/12] x86/speculation: Fix up array_index_nospec_mask() asm - constraint - -commit be3233fbfcb8f5acb6e3bcd0895c3ef9e100d470 upstream. - -Allow the compiler to handle @size as an immediate value or memory -directly rather than allocating a register. - -Reported-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/151797010204.1289.1510000292250184993.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/barrier.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h -index 8575903..78d1c6a 100644 ---- a/arch/x86/include/asm/barrier.h -+++ b/arch/x86/include/asm/barrier.h -@@ -39,7 +39,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - - asm ("cmp %1,%2; sbb %0,%0;" - :"=r" (mask) -- :"r"(size),"r" (index) -+ :"g"(size),"r" (index) - :"cc"); - return mask; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch deleted file mode 100644 index 90877ac8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch +++ /dev/null @@ -1,129 +0,0 @@ -From b3ad1b7521b3f4aaddc02e93ce3835bcac48da35 Mon Sep 17 00:00:00 2001 -From: Tim Chen <tim.c.chen@linux.intel.com> -Date: Mon, 29 Jan 2018 22:04:47 +0000 -Subject: [PATCH 06/14] x86/speculation: Use Indirect Branch Prediction Barrier - in context switch -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 18bf3c3ea8ece8f03b6fc58508f2dfd23c7711c7 upstream. - -Flush indirect branches when switching into a process that marked itself -non dumpable. This protects high value processes like gpg better, -without having too high performance overhead. - -If done naïvely, we could switch to a kernel idle thread and then back -to the original process, such as: - - process A -> idle -> process A - -In such scenario, we do not have to do IBPB here even though the process -is non-dumpable, as we are switching back to the same process after a -hiatus. - -To avoid the redundant IBPB, which is expensive, we track the last mm -user context ID. The cost is to have an extra u64 mm context id to track -the last mm we were using before switching to the init_mm used by idle. -Avoiding the extra IBPB is probably worth the extra memory for this -common scenario. - -For those cases where tlb_defer_switch_to_init_mm() returns true (non -PCID), lazy tlb will defer switch to init_mm, so we will not be changing -the mm for the process A -> idle -> process A switch. So IBPB will be -skipped for this case. - -Thanks to the reviewers and Andy Lutomirski for the suggestion of -using ctx_id which got rid of the problem of mm pointer recycling. - -Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: ak@linux.intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: linux@dominikbrodowski.net -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: luto@kernel.org -Cc: pbonzini@redhat.com -Link: https://lkml.kernel.org/r/1517263487-3708-1-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/tlbflush.h | 2 ++ - arch/x86/mm/tlb.c | 31 +++++++++++++++++++++++++++++++ - 2 files changed, 33 insertions(+) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 94146f6..99185a0 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void) - struct tlb_state { - struct mm_struct *active_mm; - int state; -+ /* last user mm's ctx id */ -+ u64 last_ctx_id; - - /* - * Access to this CR4 shadow and to H/W CR4 is protected by -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 146e842..b1bf41b 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -10,6 +10,7 @@ - - #include <asm/tlbflush.h> - #include <asm/mmu_context.h> -+#include <asm/nospec-branch.h> - #include <asm/cache.h> - #include <asm/apic.h> - #include <asm/uv/uv.h> -@@ -106,6 +107,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - unsigned cpu = smp_processor_id(); - - if (likely(prev != next)) { -+ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); -+ -+ /* -+ * Avoid user/user BTB poisoning by flushing the branch -+ * predictor when switching between processes. This stops -+ * one process from doing Spectre-v2 attacks on another. -+ * -+ * As an optimization, flush indirect branches only when -+ * switching into processes that disable dumping. This -+ * protects high value processes like gpg, without having -+ * too high performance overhead. IBPB is *expensive*! -+ * -+ * This will not flush branches when switching into kernel -+ * threads. It will also not flush if we switch to idle -+ * thread and back to the same process. It will flush if we -+ * switch to a different non-dumpable process. -+ */ -+ if (tsk && tsk->mm && -+ tsk->mm->context.ctx_id != last_ctx_id && -+ get_dumpable(tsk->mm) != SUID_DUMP_USER) -+ indirect_branch_prediction_barrier(); -+ - if (IS_ENABLED(CONFIG_VMAP_STACK)) { - /* - * If our current stack is in vmalloc space and isn't -@@ -120,6 +143,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - set_pgd(pgd, init_mm.pgd[stack_pgd_index]); - } - -+ /* -+ * Record last user mm's context id, so we can avoid -+ * flushing branch buffer with IBPB if we switch back -+ * to the same user. -+ */ -+ if (next != &init_mm) -+ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); -+ - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - this_cpu_write(cpu_tlbstate.active_mm, next); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch deleted file mode 100644 index ecb1cdd3..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch +++ /dev/null @@ -1,57 +0,0 @@ -From b4f699a49be9bbfa6bb5408e7f54c89b9bdc8919 Mon Sep 17 00:00:00 2001 -From: Andy Whitcroft <apw@canonical.com> -Date: Wed, 14 Mar 2018 11:24:27 +0000 -Subject: [PATCH 06/93] x86/speculation, objtool: Annotate indirect calls/jumps - for objtool on 32-bit kernels - -commit a14bff131108faf50cc0cf864589fd71ee216c96 upstream. - -In the following commit: - - 9e0e3c5130e9 ("x86/speculation, objtool: Annotate indirect calls/jumps for objtool") - -... we added annotations for CALL_NOSPEC/JMP_NOSPEC on 64-bit x86 kernels, -but we did not annotate the 32-bit path. - -Annotate it similarly. - -Signed-off-by: Andy Whitcroft <apw@canonical.com> -Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/20180314112427.22351-1-apw@canonical.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index d0dabea..f928ad9 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -183,7 +183,10 @@ - * otherwise we'll run out of registers. We don't care about CET - * here, anyway. - */ --# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ -+# define CALL_NOSPEC \ -+ ALTERNATIVE( \ -+ ANNOTATE_RETPOLINE_SAFE \ -+ "call *%[thunk_target]\n", \ - " jmp 904f;\n" \ - " .align 16\n" \ - "901: call 903f;\n" \ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch deleted file mode 100644 index 2ca432cf..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 34cbfb000e9bd72eb48fb3d1e61be034053f743f Mon Sep 17 00:00:00 2001 -From: Nikita Leshenko <nikita.leshchenko@oracle.com> -Date: Sun, 5 Nov 2017 15:52:29 +0200 -Subject: [PATCH 07/33] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC - reconfigure race -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 ] - -KVM uses ioapic_handled_vectors to track vectors that need to notify the -IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an -interrupt with old configuration is pending or running and -ioapic_handled_vectors only remembers the newest configuration; -thus EOI from the old interrupt is not delievered to the IOAPIC. - -A previous commit db2bdcbbbd32 -("KVM: x86: fix edge EOI and IOAPIC reconfig race") -addressed this issue by adding pending edge-triggered interrupts to -ioapic_handled_vectors, fixing this race for edge-triggered interrupts. -The commit explicitly ignored level-triggered interrupts, -but this race applies to them as well: - -1) IOAPIC sends a level triggered interrupt vector to VCPU0 -2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC - to route the vector to VCPU1. The reconfiguration rewrites only the - upper 32 bits of the IOREDTBLn register. (Causes KVM to update - ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) -3) VCPU0 sends EOI for the vector, but it's not delievered to the - IOAPIC because the ioapic_handled_vectors doesn't include the vector. -4) New interrupts are not delievered to VCPU1 because remote_irr bit - is set forever. - -Therefore, the correct behavior is to add all pending and running -interrupts to ioapic_handled_vectors. - -This commit introduces a slight performance hit similar to -commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") -for the rare case that the vector is reused by a non-IOAPIC source on -VCPU0. We prefer to keep solution simple and not handle this case just -as the original commit does. - -Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") - -Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com> -Reviewed-by: Liran Alon <liran.alon@oracle.com> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/ioapic.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c -index 6e219e5..a7ac868 100644 ---- a/arch/x86/kvm/ioapic.c -+++ b/arch/x86/kvm/ioapic.c -@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) - index == RTC_GSI) { - if (kvm_apic_match_dest(vcpu, NULL, 0, - e->fields.dest_id, e->fields.dest_mode) || -- (e->fields.trig_mode == IOAPIC_EDGE_TRIG && -- kvm_apic_pending_eoi(vcpu, e->fields.vector))) -+ kvm_apic_pending_eoi(vcpu, e->fields.vector)) - __set_bit(e->fields.vector, - ioapic_handled_vectors); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-bpf-x64-implement-retpoline-for-tail-call.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-bpf-x64-implement-retpoline-for-tail-call.patch deleted file mode 100644 index 69809c28..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-bpf-x64-implement-retpoline-for-tail-call.patch +++ /dev/null @@ -1,183 +0,0 @@ -From 8dfc905d7d2e3c68f31eca0178b6137b2e1fc7f9 Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Thu, 8 Mar 2018 16:17:34 +0100 -Subject: [PATCH 07/14] bpf, x64: implement retpoline for tail call - -[ upstream commit a493a87f38cfa48caaa95c9347be2d914c6fdf29 ] - -Implement a retpoline [0] for the BPF tail call JIT'ing that converts -the indirect jump via jmp %rax that is used to make the long jump into -another JITed BPF image. Since this is subject to speculative execution, -we need to control the transient instruction sequence here as well -when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop. -The latter aligns also with what gcc / clang emits (e.g. [1]). - -JIT dump after patch: - - # bpftool p d x i 1 - 0: (18) r2 = map[id:1] - 2: (b7) r3 = 0 - 3: (85) call bpf_tail_call#12 - 4: (b7) r0 = 2 - 5: (95) exit - -With CONFIG_RETPOLINE: - - # bpftool p d j i 1 - [...] - 33: cmp %edx,0x24(%rsi) - 36: jbe 0x0000000000000072 |* - 38: mov 0x24(%rbp),%eax - 3e: cmp $0x20,%eax - 41: ja 0x0000000000000072 | - 43: add $0x1,%eax - 46: mov %eax,0x24(%rbp) - 4c: mov 0x90(%rsi,%rdx,8),%rax - 54: test %rax,%rax - 57: je 0x0000000000000072 | - 59: mov 0x28(%rax),%rax - 5d: add $0x25,%rax - 61: callq 0x000000000000006d |+ - 66: pause | - 68: lfence | - 6b: jmp 0x0000000000000066 | - 6d: mov %rax,(%rsp) | - 71: retq | - 72: mov $0x2,%eax - [...] - - * relative fall-through jumps in error case - + retpoline for indirect jump - -Without CONFIG_RETPOLINE: - - # bpftool p d j i 1 - [...] - 33: cmp %edx,0x24(%rsi) - 36: jbe 0x0000000000000063 |* - 38: mov 0x24(%rbp),%eax - 3e: cmp $0x20,%eax - 41: ja 0x0000000000000063 | - 43: add $0x1,%eax - 46: mov %eax,0x24(%rbp) - 4c: mov 0x90(%rsi,%rdx,8),%rax - 54: test %rax,%rax - 57: je 0x0000000000000063 | - 59: mov 0x28(%rax),%rax - 5d: add $0x25,%rax - 61: jmpq *%rax |- - 63: mov $0x2,%eax - [...] - - * relative fall-through jumps in error case - - plain indirect jump as before - - [0] https://support.google.com/faqs/answer/7625886 - [1] https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b - -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 37 ++++++++++++++++++++++++++++++++++++ - arch/x86/net/bpf_jit_comp.c | 9 +++++---- - 2 files changed, 42 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 76b0585..81a1be3 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) - } - - #endif /* __ASSEMBLY__ */ -+ -+/* -+ * Below is used in the eBPF JIT compiler and emits the byte sequence -+ * for the following assembly: -+ * -+ * With retpolines configured: -+ * -+ * callq do_rop -+ * spec_trap: -+ * pause -+ * lfence -+ * jmp spec_trap -+ * do_rop: -+ * mov %rax,(%rsp) -+ * retq -+ * -+ * Without retpolines configured: -+ * -+ * jmp *%rax -+ */ -+#ifdef CONFIG_RETPOLINE -+# define RETPOLINE_RAX_BPF_JIT_SIZE 17 -+# define RETPOLINE_RAX_BPF_JIT() \ -+ EMIT1_off32(0xE8, 7); /* callq do_rop */ \ -+ /* spec_trap: */ \ -+ EMIT2(0xF3, 0x90); /* pause */ \ -+ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ -+ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ -+ /* do_rop: */ \ -+ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ -+ EMIT1(0xC3); /* retq */ -+#else -+# define RETPOLINE_RAX_BPF_JIT_SIZE 2 -+# define RETPOLINE_RAX_BPF_JIT() \ -+ EMIT2(0xFF, 0xE0); /* jmp *%rax */ -+#endif -+ - #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ -diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c -index 7840331..1f7ed2e 100644 ---- a/arch/x86/net/bpf_jit_comp.c -+++ b/arch/x86/net/bpf_jit_comp.c -@@ -12,6 +12,7 @@ - #include <linux/filter.h> - #include <linux/if_vlan.h> - #include <asm/cacheflush.h> -+#include <asm/nospec-branch.h> - #include <linux/bpf.h> - - int bpf_jit_enable __read_mostly; -@@ -281,7 +282,7 @@ static void emit_bpf_tail_call(u8 **pprog) - EMIT2(0x89, 0xD2); /* mov edx, edx */ - EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ - offsetof(struct bpf_array, map.max_entries)); --#define OFFSET1 43 /* number of bytes to jump */ -+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ - EMIT2(X86_JBE, OFFSET1); /* jbe out */ - label1 = cnt; - -@@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog) - */ - EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ - EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ --#define OFFSET2 32 -+#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) - EMIT2(X86_JA, OFFSET2); /* ja out */ - label2 = cnt; - EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ -@@ -304,7 +305,7 @@ static void emit_bpf_tail_call(u8 **pprog) - * goto out; - */ - EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ --#define OFFSET3 10 -+#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) - EMIT2(X86_JE, OFFSET3); /* je out */ - label3 = cnt; - -@@ -317,7 +318,7 @@ static void emit_bpf_tail_call(u8 **pprog) - * rdi == ctx (1st arg) - * rax == prog->bpf_func + prologue_size - */ -- EMIT2(0xFF, 0xE0); /* jmp rax */ -+ RETPOLINE_RAX_BPF_JIT(); - - /* out: */ - BUILD_BUG_ON(cnt - label1 != OFFSET1); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch deleted file mode 100644 index 2fb277eb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch +++ /dev/null @@ -1,145 +0,0 @@ -From bdfb218abe244fde0b09b65dc9648b72e7d4579b Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 3 Sep 2017 18:57:03 -0700 -Subject: [PATCH 007/103] kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE - -Kaiser only needs to map one page of the stack; and -kernel/fork.c did not build on powerpc (no __PAGE_KERNEL). -It's all cleaner if linux/kaiser.h provides kaiser_map_thread_stack() -and kaiser_unmap_thread_stack() wrappers around asm/kaiser.h's -kaiser_add_mapping() and kaiser_remove_mapping(). And use -linux/kaiser.h in init/main.c to avoid the #ifdefs there. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/kaiser.h | 40 +++++++++++++++++++++++++++++++++------- - init/main.c | 6 +----- - kernel/fork.c | 7 ++----- - 3 files changed, 36 insertions(+), 17 deletions(-) - -diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h -index 9db5433..4a4d6d9 100644 ---- a/include/linux/kaiser.h -+++ b/include/linux/kaiser.h -@@ -1,26 +1,52 @@ --#ifndef _INCLUDE_KAISER_H --#define _INCLUDE_KAISER_H -+#ifndef _LINUX_KAISER_H -+#define _LINUX_KAISER_H - - #ifdef CONFIG_KAISER - #include <asm/kaiser.h> -+ -+static inline int kaiser_map_thread_stack(void *stack) -+{ -+ /* -+ * Map that page of kernel stack on which we enter from user context. -+ */ -+ return kaiser_add_mapping((unsigned long)stack + -+ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL); -+} -+ -+static inline void kaiser_unmap_thread_stack(void *stack) -+{ -+ /* -+ * Note: may be called even when kaiser_map_thread_stack() failed. -+ */ -+ kaiser_remove_mapping((unsigned long)stack + -+ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE); -+} - #else - - /* - * These stubs are used whenever CONFIG_KAISER is off, which -- * includes architectures that support KAISER, but have it -- * disabled. -+ * includes architectures that support KAISER, but have it disabled. - */ - - static inline void kaiser_init(void) - { - } --static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) -+static inline int kaiser_add_mapping(unsigned long addr, -+ unsigned long size, unsigned long flags) -+{ -+ return 0; -+} -+static inline void kaiser_remove_mapping(unsigned long start, -+ unsigned long size) - { - } --static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) -+static inline int kaiser_map_thread_stack(void *stack) - { - return 0; - } -+static inline void kaiser_unmap_thread_stack(void *stack) -+{ -+} - - #endif /* !CONFIG_KAISER */ --#endif /* _INCLUDE_KAISER_H */ -+#endif /* _LINUX_KAISER_H */ -diff --git a/init/main.c b/init/main.c -index d2c8c23..eb47369 100644 ---- a/init/main.c -+++ b/init/main.c -@@ -81,15 +81,13 @@ - #include <linux/integrity.h> - #include <linux/proc_ns.h> - #include <linux/io.h> -+#include <linux/kaiser.h> - - #include <asm/io.h> - #include <asm/bugs.h> - #include <asm/setup.h> - #include <asm/sections.h> - #include <asm/cacheflush.h> --#ifdef CONFIG_KAISER --#include <asm/kaiser.h> --#endif - - static int kernel_init(void *); - -@@ -477,9 +475,7 @@ static void __init mm_init(void) - pgtable_init(); - vmalloc_init(); - ioremap_huge_init(); --#ifdef CONFIG_KAISER - kaiser_init(); --#endif - } - - asmlinkage __visible void __init start_kernel(void) -diff --git a/kernel/fork.c b/kernel/fork.c -index 7ba50f1..2bddd1d 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -212,12 +212,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) - #endif - } - --extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size); - static inline void free_thread_stack(struct task_struct *tsk) - { --#ifdef CONFIG_KAISER -- kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE); --#endif -+ kaiser_unmap_thread_stack(tsk->stack); - #ifdef CONFIG_VMAP_STACK - if (task_stack_vm_area(tsk)) { - unsigned long flags; -@@ -501,7 +498,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) - */ - tsk->stack = stack; - -- err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL); -+ err= kaiser_map_thread_stack(tsk->stack); - if (err) - goto free_stack; - #ifdef CONFIG_VMAP_STACK --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch deleted file mode 100644 index be5712b6..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch +++ /dev/null @@ -1,159 +0,0 @@ -From dabd9b2a92eda21c93aeee9f7bf8f369fed15833 Mon Sep 17 00:00:00 2001 -From: Andi Kleen <ak@linux.intel.com> -Date: Thu, 25 Jan 2018 15:50:28 -0800 -Subject: [PATCH 07/42] module/retpoline: Warn about missing retpoline in - module - -(cherry picked from commit caf7501a1b4ec964190f31f9c3f163de252273b8) - -There's a risk that a kernel which has full retpoline mitigations becomes -vulnerable when a module gets loaded that hasn't been compiled with the -right compiler or the right option. - -To enable detection of that mismatch at module load time, add a module info -string "retpoline" at build time when the module was compiled with -retpoline support. This only covers compiled C source, but assembler source -or prebuilt object files are not checked. - -If a retpoline enabled kernel detects a non retpoline protected module at -load time, print a warning and report it in the sysfs vulnerability file. - -[ tglx: Massaged changelog ] - -Signed-off-by: Andi Kleen <ak@linux.intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: gregkh@linuxfoundation.org -Cc: torvalds@linux-foundation.org -Cc: jeyu@kernel.org -Cc: arjan@linux.intel.com -Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- - include/linux/module.h | 9 +++++++++ - kernel/module.c | 11 +++++++++++ - scripts/mod/modpost.c | 9 +++++++++ - 4 files changed, 45 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 8cacf62..4cea7d4 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -10,6 +10,7 @@ - #include <linux/init.h> - #include <linux/utsname.h> - #include <linux/cpu.h> -+#include <linux/module.h> - - #include <asm/nospec-branch.h> - #include <asm/cmdline.h> -@@ -92,6 +93,19 @@ static const char *spectre_v2_strings[] = { - #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt - - static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -+static bool spectre_v2_bad_module; -+ -+#ifdef RETPOLINE -+bool retpoline_module_ok(bool has_retpoline) -+{ -+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) -+ return true; -+ -+ pr_err("System may be vunerable to spectre v2\n"); -+ spectre_v2_bad_module = true; -+ return false; -+} -+#endif - - static void __init spec2_print_if_insecure(const char *reason) - { -@@ -277,6 +291,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); - -- return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); -+ return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], -+ spectre_v2_bad_module ? " - vulnerable module loaded" : ""); - } - #endif -diff --git a/include/linux/module.h b/include/linux/module.h -index 0c3207d..d2224a0 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, - static inline void module_bug_cleanup(struct module *mod) {} - #endif /* CONFIG_GENERIC_BUG */ - -+#ifdef RETPOLINE -+extern bool retpoline_module_ok(bool has_retpoline); -+#else -+static inline bool retpoline_module_ok(bool has_retpoline) -+{ -+ return true; -+} -+#endif -+ - #ifdef CONFIG_MODULE_SIG - static inline bool module_sig_ok(struct module *module) - { -diff --git a/kernel/module.c b/kernel/module.c -index 0e54d5b..07bfb99 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) - } - #endif /* CONFIG_LIVEPATCH */ - -+static void check_modinfo_retpoline(struct module *mod, struct load_info *info) -+{ -+ if (retpoline_module_ok(get_modinfo(info, "retpoline"))) -+ return; -+ -+ pr_warn("%s: loading module not compiled with retpoline compiler.\n", -+ mod->name); -+} -+ - /* Sets info->hdr and info->len. */ - static int copy_module_from_user(const void __user *umod, unsigned long len, - struct load_info *info) -@@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) - add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); - } - -+ check_modinfo_retpoline(mod, info); -+ - if (get_modinfo(info, "staging")) { - add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); - pr_warn("%s: module is from the staging directory, the quality " -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 325f1af..96a8047 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) - buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); - } - -+/* Cannot check for assembler */ -+static void add_retpoline(struct buffer *b) -+{ -+ buf_printf(b, "\n#ifdef RETPOLINE\n"); -+ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); -+ buf_printf(b, "#endif\n"); -+} -+ - static void add_staging_flag(struct buffer *b, const char *name) - { - static const char *staging_dir = "drivers/staging"; -@@ -2474,6 +2482,7 @@ int main(int argc, char **argv) - - add_header(&buf, mod); - add_intree_flag(&buf, !external_module); -+ add_retpoline(&buf); - add_staging_flag(&buf, mod->name); - err |= add_versions(&buf, mod); - add_depends(&buf, mod, modules); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch deleted file mode 100644 index 3ddb8ece..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch +++ /dev/null @@ -1,59 +0,0 @@ -From f08520b8eba49e29d01f53ac8f2a52022e435744 Mon Sep 17 00:00:00 2001 -From: Tony Luck <tony.luck@intel.com> -Date: Fri, 25 May 2018 14:41:39 -0700 -Subject: [PATCH 07/10] x86/mce: Improve error message when kernel cannot - recover - -commit c7d606f560e4c698884697fef503e4abacdd8c25 upstream. - -Since we added support to add recovery from some errors inside the kernel in: - -commit b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries") - -we have done a less than stellar job at reporting the cause of recoverable -machine checks that occur in other parts of the kernel. The user just gets -the unhelpful message: - - mce: [Hardware Error]: Machine check: Action required: unknown MCACOD - -doubly unhelpful when they check the manual for the reported IA32_MSR_STATUS.MCACOD -and see that it is listed as one of the standard recoverable values. - -Add an extra rule to the MCE severity table to catch this case and report it -as: - - mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel - -Fixes: b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries") -Signed-off-by: Tony Luck <tony.luck@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com> -Cc: Ashok Raj <ashok.raj@intel.com> -Cc: stable@vger.kernel.org # 4.6+ -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Borislav Petkov <bp@suse.de> -Link: https://lkml.kernel.org/r/4cc7c465150a9a48b8b9f45d0b840278e77eb9b5.1527283897.git.tony.luck@intel.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/mcheck/mce-severity.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c -index c7efbcf..17dbbdbb 100644 ---- a/arch/x86/kernel/cpu/mcheck/mce-severity.c -+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c -@@ -143,6 +143,11 @@ static struct severity { - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), - USER - ), -+ MCESEV( -+ PANIC, "Data load in unrecoverable area of kernel", -+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), -+ KERNEL -+ ), - #endif - MCESEV( - PANIC, "Action required: unknown MCACOD", --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Add-asm-msr-index.h-dependency.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Add-asm-msr-index.h-dependency.patch deleted file mode 100644 index abf0b6ba..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Add-asm-msr-index.h-dependency.patch +++ /dev/null @@ -1,50 +0,0 @@ -From ae5dca4c2f9a62ec120a32663609b3dabfeb8ae4 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Tue, 13 Feb 2018 14:28:19 +0100 -Subject: [PATCH 07/12] x86/speculation: Add <asm/msr-index.h> dependency - -commit ea00f301285ea2f07393678cd2b6057878320c9d upstream. - -Joe Konno reported a compile failure resulting from using an MSR -without inclusion of <asm/msr-index.h>, and while the current code builds -fine (by accident) this needs fixing for future patches. - -Reported-by: Joe Konno <joe.konno@linux.intel.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: arjan@linux.intel.com -Cc: bp@alien8.de -Cc: dan.j.williams@intel.com -Cc: dave.hansen@linux.intel.com -Cc: dwmw2@infradead.org -Cc: dwmw@amazon.co.uk -Cc: gregkh@linuxfoundation.org -Cc: hpa@zytor.com -Cc: jpoimboe@redhat.com -Cc: linux-tip-commits@vger.kernel.org -Cc: luto@kernel.org -Fixes: 20ffa1caecca ("x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support") -Link: http://lkml.kernel.org/r/20180213132819.GJ25201@hirez.programming.kicks-ass.net -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 300cc15..76b0585 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -6,6 +6,7 @@ - #include <asm/alternative.h> - #include <asm/alternative-asm.h> - #include <asm/cpufeatures.h> -+#include <asm/msr-index.h> - - #ifdef __ASSEMBLY__ - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch deleted file mode 100644 index 4da48ef5..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 5516ae4d16ab0ce922de31fec20d5d5e198aa258 Mon Sep 17 00:00:00 2001 -From: Alexander Sergeyev <sergeev917@gmail.com> -Date: Tue, 13 Mar 2018 22:38:56 +0300 -Subject: [PATCH 07/93] x86/speculation: Remove Skylake C2 from Speculation - Control microcode blacklist - -commit e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 upstream. - -In accordance with Intel's microcode revision guidance from March 6 MCU -rev 0xc2 is cleared on both Skylake H/S and Skylake Xeon E3 processors -that share CPUID 506E3. - -Signed-off-by: Alexander Sergeyev <sergeev917@gmail.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Jia Zhang <qianyue.zj@alibaba-inc.com> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Kyle Huey <me@kylehuey.com> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Link: https://lkml.kernel.org/r/20180313193856.GA8580@localhost.localdomain -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/intel.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 7680425..8fb1d65 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -64,7 +64,7 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) - /* - * Early microcode releases for the Spectre v2 mitigation were broken. - * Information taken from; -- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf -+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf - * - https://kb.vmware.com/s/article/52345 - * - Microcode revisions observed in the wild - * - Release note from 20180108 microcode release -@@ -82,7 +82,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, - { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, - { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, -- { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, - { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, - { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, - { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch deleted file mode 100644 index 6e097d05..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch +++ /dev/null @@ -1,64 +0,0 @@ -From aca211b549c07b81295e817e663a61a1ae1fd659 Mon Sep 17 00:00:00 2001 -From: Nikita Leshenko <nikita.leshchenko@oracle.com> -Date: Sun, 5 Nov 2017 15:52:32 +0200 -Subject: [PATCH 08/33] KVM: x86: ioapic: Clear Remote IRR when entry is - switched to edge-triggered -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit a8bfec2930525808c01f038825d1df3904638631 ] - -Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for -IOAPICs without an EOI register. They simulate the EOI message manually -by changing the trigger mode to edge and then back to level, with the -entry being masked during this. - -QEMU implements this feature in commit ed1263c363c9 -("ioapic: clear remote irr bit for edge-triggered interrupts") - -As a side effect, this commit removes an incorrect behavior where Remote -IRR was cleared when the redirection table entry was rewritten. This is not -consistent with the manual and also opens an opportunity for a strange -behavior when a redirection table entry is modified from an interrupt -handler that handles the same entry: The modification will clear the -Remote IRR bit even though the interrupt handler is still running. - -Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com> -Reviewed-by: Liran Alon <liran.alon@oracle.com> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> -Reviewed-by: Steve Rutherford <srutherford@google.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/ioapic.c | 11 ++++++++++- - 1 file changed, 10 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c -index a7ac868..4b573c8 100644 ---- a/arch/x86/kvm/ioapic.c -+++ b/arch/x86/kvm/ioapic.c -@@ -306,8 +306,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) - } else { - e->bits &= ~0xffffffffULL; - e->bits |= (u32) val; -- e->fields.remote_irr = 0; - } -+ -+ /* -+ * Some OSes (Linux, Xen) assume that Remote IRR bit will -+ * be cleared by IOAPIC hardware when the entry is configured -+ * as edge-triggered. This behavior is used to simulate an -+ * explicit EOI on IOAPICs that don't have the EOI register. -+ */ -+ if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) -+ e->fields.remote_irr = 0; -+ - mask_after = e->fields.mask; - if (mask_before != mask_after) - kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch deleted file mode 100644 index a3bda594..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 183131e8c381ffb7c32a09a7356cb25450d2bd40 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 3 Sep 2017 17:09:44 -0700 -Subject: [PATCH 008/103] kaiser: fix build and FIXME in alloc_ldt_struct() - -Include linux/kaiser.h instead of asm/kaiser.h to build ldt.c without -CONFIG_KAISER. kaiser_add_mapping() does already return an error code, -so fix the FIXME. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/ldt.c | 10 ++++------ - 1 file changed, 4 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c -index 3c2d55b..8331bad 100644 ---- a/arch/x86/kernel/ldt.c -+++ b/arch/x86/kernel/ldt.c -@@ -15,9 +15,9 @@ - #include <linux/slab.h> - #include <linux/vmalloc.h> - #include <linux/uaccess.h> -+#include <linux/kaiser.h> - - #include <asm/ldt.h> --#include <asm/kaiser.h> - #include <asm/desc.h> - #include <asm/mmu_context.h> - #include <asm/syscalls.h> -@@ -48,7 +48,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) - { - struct ldt_struct *new_ldt; - int alloc_size; -- int ret = 0; -+ int ret; - - if (size > LDT_ENTRIES) - return NULL; -@@ -76,10 +76,8 @@ static struct ldt_struct *alloc_ldt_struct(int size) - return NULL; - } - -- // FIXME: make kaiser_add_mapping() return an error code -- // when it fails -- kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, -- __PAGE_KERNEL); -+ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, -+ __PAGE_KERNEL); - if (ret) { - __free_ldt_struct(new_ldt); - return NULL; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch deleted file mode 100644 index 5dc0b927..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch +++ /dev/null @@ -1,760 +0,0 @@ -From 4ac936f6e6b191d2eac4083da651826a8bb7b03b Mon Sep 17 00:00:00 2001 -From: Jia Zhang <qianyue.zj@alibaba-inc.com> -Date: Mon, 1 Jan 2018 09:52:10 +0800 -Subject: [PATCH 08/12] x86/cpu: Rename cpu_data.x86_mask to - cpu_data.x86_stepping - -commit b399151cb48db30ad1e0e93dd40d68c6d007b637 upstream. - -x86_mask is a confusing name which is hard to associate with the -processor's stepping. - -Additionally, correct an indent issue in lib/cpu.c. - -Signed-off-by: Jia Zhang <qianyue.zj@alibaba-inc.com> -[ Updated it to more recent kernels. ] -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: bp@alien8.de -Cc: tony.luck@intel.com -Link: http://lkml.kernel.org/r/1514771530-70829-1-git-send-email-qianyue.zj@alibaba-inc.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/events/intel/core.c | 2 +- - arch/x86/events/intel/lbr.c | 2 +- - arch/x86/events/intel/p6.c | 2 +- - arch/x86/include/asm/acpi.h | 2 +- - arch/x86/include/asm/processor.h | 2 +- - arch/x86/kernel/amd_nb.c | 2 +- - arch/x86/kernel/asm-offsets_32.c | 2 +- - arch/x86/kernel/cpu/amd.c | 26 +++++++++++++------------- - arch/x86/kernel/cpu/centaur.c | 4 ++-- - arch/x86/kernel/cpu/common.c | 8 ++++---- - arch/x86/kernel/cpu/cyrix.c | 2 +- - arch/x86/kernel/cpu/intel.c | 18 +++++++++--------- - arch/x86/kernel/cpu/microcode/intel.c | 2 +- - arch/x86/kernel/cpu/mtrr/generic.c | 2 +- - arch/x86/kernel/cpu/mtrr/main.c | 4 ++-- - arch/x86/kernel/cpu/proc.c | 4 ++-- - arch/x86/kernel/head_32.S | 4 ++-- - arch/x86/kernel/mpparse.c | 2 +- - arch/x86/lib/cpu.c | 2 +- - drivers/char/hw_random/via-rng.c | 2 +- - drivers/cpufreq/acpi-cpufreq.c | 2 +- - drivers/cpufreq/longhaul.c | 6 +++--- - drivers/cpufreq/p4-clockmod.c | 2 +- - drivers/cpufreq/powernow-k7.c | 2 +- - drivers/cpufreq/speedstep-centrino.c | 4 ++-- - drivers/cpufreq/speedstep-lib.c | 6 +++--- - drivers/crypto/padlock-aes.c | 2 +- - drivers/edac/amd64_edac.c | 2 +- - drivers/edac/mce_amd.c | 2 +- - drivers/hwmon/coretemp.c | 6 +++--- - drivers/hwmon/hwmon-vid.c | 2 +- - drivers/hwmon/k10temp.c | 2 +- - drivers/hwmon/k8temp.c | 2 +- - drivers/video/fbdev/geode/video_gx.c | 2 +- - 34 files changed, 68 insertions(+), 68 deletions(-) - -diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c -index cb85222..6b251fcc 100644 ---- a/arch/x86/events/intel/core.c -+++ b/arch/x86/events/intel/core.c -@@ -3360,7 +3360,7 @@ static int intel_snb_pebs_broken(int cpu) - break; - - case INTEL_FAM6_SANDYBRIDGE_X: -- switch (cpu_data(cpu).x86_mask) { -+ switch (cpu_data(cpu).x86_stepping) { - case 6: rev = 0x618; break; - case 7: rev = 0x70c; break; - } -diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c -index 81b321a..34ba350 100644 ---- a/arch/x86/events/intel/lbr.c -+++ b/arch/x86/events/intel/lbr.c -@@ -1128,7 +1128,7 @@ void __init intel_pmu_lbr_init_atom(void) - * on PMU interrupt - */ - if (boot_cpu_data.x86_model == 28 -- && boot_cpu_data.x86_mask < 10) { -+ && boot_cpu_data.x86_stepping < 10) { - pr_cont("LBR disabled due to erratum"); - return; - } -diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c -index 1f5c47a..c5e441b 100644 ---- a/arch/x86/events/intel/p6.c -+++ b/arch/x86/events/intel/p6.c -@@ -233,7 +233,7 @@ static __initconst const struct x86_pmu p6_pmu = { - - static __init void p6_pmu_rdpmc_quirk(void) - { -- if (boot_cpu_data.x86_mask < 9) { -+ if (boot_cpu_data.x86_stepping < 9) { - /* - * PPro erratum 26; fixed in stepping 9 and above. - */ -diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h -index 5391b0a..d32bab6 100644 ---- a/arch/x86/include/asm/acpi.h -+++ b/arch/x86/include/asm/acpi.h -@@ -92,7 +92,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) - if (boot_cpu_data.x86 == 0x0F && - boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86_model <= 0x05 && -- boot_cpu_data.x86_mask < 0x0A) -+ boot_cpu_data.x86_stepping < 0x0A) - return 1; - else if (amd_e400_c1e_detected) - return 1; -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index a781668..df29212 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -88,7 +88,7 @@ struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; -- __u8 x86_mask; -+ __u8 x86_stepping; - #ifdef CONFIG_X86_32 - char wp_works_ok; /* It doesn't on 386's */ - -diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c -index 458da85..8fe41c6 100644 ---- a/arch/x86/kernel/amd_nb.c -+++ b/arch/x86/kernel/amd_nb.c -@@ -231,7 +231,7 @@ int amd_cache_northbridges(void) - if (boot_cpu_data.x86 == 0x10 && - boot_cpu_data.x86_model >= 0x8 && - (boot_cpu_data.x86_model > 0x9 || -- boot_cpu_data.x86_mask >= 0x1)) -+ boot_cpu_data.x86_stepping >= 0x1)) - amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; - - if (boot_cpu_data.x86 == 0x15) -diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c -index 880aa09..36ebb6d 100644 ---- a/arch/x86/kernel/asm-offsets_32.c -+++ b/arch/x86/kernel/asm-offsets_32.c -@@ -20,7 +20,7 @@ void foo(void) - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); - OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); - OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); -- OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); -+ OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping); - OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); - OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); - OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 1b89f0c..c375bc6 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -118,7 +118,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) - return; - } - -- if (c->x86_model == 6 && c->x86_mask == 1) { -+ if (c->x86_model == 6 && c->x86_stepping == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); -@@ -147,7 +147,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || -- (c->x86_model == 8 && c->x86_mask < 8)) { -+ (c->x86_model == 8 && c->x86_stepping < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; -@@ -166,7 +166,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) - return; - } - -- if ((c->x86_model == 8 && c->x86_mask > 7) || -+ if ((c->x86_model == 8 && c->x86_stepping > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - -@@ -219,7 +219,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ -- if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { -+ if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", -@@ -239,12 +239,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c) - * but they are not certified as MP capable. - */ - /* Athlon 660/661 is valid. */ -- if ((c->x86_model == 6) && ((c->x86_mask == 0) || -- (c->x86_mask == 1))) -+ if ((c->x86_model == 6) && ((c->x86_stepping == 0) || -+ (c->x86_stepping == 1))) - return; - - /* Duron 670 is valid */ -- if ((c->x86_model == 7) && (c->x86_mask == 0)) -+ if ((c->x86_model == 7) && (c->x86_stepping == 0)) - return; - - /* -@@ -254,8 +254,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for - * more. - */ -- if (((c->x86_model == 6) && (c->x86_mask >= 2)) || -- ((c->x86_model == 7) && (c->x86_mask >= 1)) || -+ if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || -+ ((c->x86_model == 7) && (c->x86_stepping >= 1)) || - (c->x86_model > 7)) - if (cpu_has(c, X86_FEATURE_MP)) - return; -@@ -569,7 +569,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) - /* Set MTRR capability flag if appropriate */ - if (c->x86 == 5) - if (c->x86_model == 13 || c->x86_model == 9 || -- (c->x86_model == 8 && c->x86_mask >= 8)) -+ (c->x86_model == 8 && c->x86_stepping >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); - #endif - #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) -@@ -834,11 +834,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - /* Duron Rev A0 */ -- if (c->x86_model == 3 && c->x86_mask == 0) -+ if (c->x86_model == 3 && c->x86_stepping == 0) - size = 64; - /* Tbird rev A1/A2 */ - if (c->x86_model == 4 && -- (c->x86_mask == 0 || c->x86_mask == 1)) -+ (c->x86_stepping == 0 || c->x86_stepping == 1)) - size = 256; - } - return size; -@@ -975,7 +975,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) - } - - /* OSVW unavailable or ID unknown, match family-model-stepping range */ -- ms = (cpu->x86_model << 4) | cpu->x86_mask; -+ ms = (cpu->x86_model << 4) | cpu->x86_stepping; - while ((range = *erratum++)) - if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && - (ms >= AMD_MODEL_RANGE_START(range)) && -diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c -index 1661d8e..4d2f61f 100644 ---- a/arch/x86/kernel/cpu/centaur.c -+++ b/arch/x86/kernel/cpu/centaur.c -@@ -134,7 +134,7 @@ static void init_centaur(struct cpuinfo_x86 *c) - clear_cpu_cap(c, X86_FEATURE_TSC); - break; - case 8: -- switch (c->x86_mask) { -+ switch (c->x86_stepping) { - default: - name = "2"; - break; -@@ -209,7 +209,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) - * - Note, it seems this may only be in engineering samples. - */ - if ((c->x86 == 6) && (c->x86_model == 9) && -- (c->x86_mask == 1) && (size == 65)) -+ (c->x86_stepping == 1) && (size == 65)) - size -= 1; - return size; - } -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 08e89ed..96b2c83 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -699,7 +699,7 @@ void cpu_detect(struct cpuinfo_x86 *c) - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = x86_family(tfms); - c->x86_model = x86_model(tfms); -- c->x86_mask = x86_stepping(tfms); -+ c->x86_stepping = x86_stepping(tfms); - - if (cap0 & (1<<19)) { - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; -@@ -1146,7 +1146,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; -- c->x86_model = c->x86_mask = 0; /* So far unknown... */ -+ c->x86_model = c->x86_stepping = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; -@@ -1391,8 +1391,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) - - pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); - -- if (c->x86_mask || c->cpuid_level >= 0) -- pr_cont(", stepping: 0x%x)\n", c->x86_mask); -+ if (c->x86_stepping || c->cpuid_level >= 0) -+ pr_cont(", stepping: 0x%x)\n", c->x86_stepping); - else - pr_cont(")\n"); - -diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c -index bd9dcd6..455d8ad 100644 ---- a/arch/x86/kernel/cpu/cyrix.c -+++ b/arch/x86/kernel/cpu/cyrix.c -@@ -212,7 +212,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) - - /* common case step number/rev -- exceptions handled below */ - c->x86_model = (dir1 >> 4) + 1; -- c->x86_mask = dir1 & 0xf; -+ c->x86_stepping = dir1 & 0xf; - - /* Now cook; the original recipe is by Channing Corn, from Cyrix. - * We do the same thing for each generation: we work out -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 02cb2e3..6ed206b 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -105,7 +105,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) - - for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { - if (c->x86_model == spectre_bad_microcodes[i].model && -- c->x86_mask == spectre_bad_microcodes[i].stepping) -+ c->x86_stepping == spectre_bad_microcodes[i].stepping) - return (c->microcode <= spectre_bad_microcodes[i].microcode); - } - return false; -@@ -158,7 +158,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) - * need the microcode to have already been loaded... so if it is - * not, recommend a BIOS update and disable large pages. - */ -- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && -+ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && - c->microcode < 0x20e) { - pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); - clear_cpu_cap(c, X86_FEATURE_PSE); -@@ -174,7 +174,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) - - /* CPUID workaround for 0F33/0F34 CPU */ - if (c->x86 == 0xF && c->x86_model == 0x3 -- && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) -+ && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) - c->x86_phys_bits = 36; - - /* -@@ -289,7 +289,7 @@ int ppro_with_ram_bug(void) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && -- boot_cpu_data.x86_mask < 8) { -+ boot_cpu_data.x86_stepping < 8) { - pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } -@@ -306,7 +306,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c) - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86 == 5 && -- c->x86_mask >= 1 && c->x86_mask <= 4 && -+ c->x86_stepping >= 1 && c->x86_stepping <= 4 && - c->x86_model <= 3) { - /* - * Remember we have B step Pentia with bugs -@@ -349,7 +349,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) - * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until - * model 3 mask 3 - */ -- if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) -+ if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); - - /* -@@ -367,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) - * P4 Xeon erratum 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -- if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { -+ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { - if (msr_set_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { - pr_info("CPU: C0 stepping P4 Xeon detected.\n"); -@@ -382,7 +382,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) - * Specification Update"). - */ - if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && -- (c->x86_mask < 0x6 || c->x86_mask == 0xb)) -+ (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) - set_cpu_bug(c, X86_BUG_11AP); - - -@@ -601,7 +601,7 @@ static void init_intel(struct cpuinfo_x86 *c) - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; -- else if (c->x86_mask == 0 || c->x86_mask == 5) -+ else if (c->x86_stepping == 0 || c->x86_stepping == 5) - p = "Celeron-A"; - break; - -diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c -index cdc0dea..5d346c0 100644 ---- a/arch/x86/kernel/cpu/microcode/intel.c -+++ b/arch/x86/kernel/cpu/microcode/intel.c -@@ -1055,7 +1055,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, - enum ucode_state ret; - - sprintf(name, "intel-ucode/%02x-%02x-%02x", -- c->x86, c->x86_model, c->x86_mask); -+ c->x86, c->x86_model, c->x86_stepping); - - if (request_firmware_direct(&firmware, name, device)) { - pr_debug("data file %s load failed\n", name); -diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c -index fdc5521..e12ee86 100644 ---- a/arch/x86/kernel/cpu/mtrr/generic.c -+++ b/arch/x86/kernel/cpu/mtrr/generic.c -@@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, - */ - if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && -- boot_cpu_data.x86_mask <= 7) { -+ boot_cpu_data.x86_stepping <= 7) { - if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); - return -EINVAL; -diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c -index 24e87e7..fae740c 100644 ---- a/arch/x86/kernel/cpu/mtrr/main.c -+++ b/arch/x86/kernel/cpu/mtrr/main.c -@@ -699,8 +699,8 @@ void __init mtrr_bp_init(void) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 0xF && - boot_cpu_data.x86_model == 0x3 && -- (boot_cpu_data.x86_mask == 0x3 || -- boot_cpu_data.x86_mask == 0x4)) -+ (boot_cpu_data.x86_stepping == 0x3 || -+ boot_cpu_data.x86_stepping == 0x4)) - phys_addr = 36; - - size_or_mask = SIZE_OR_MASK_BITS(phys_addr); -diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c -index 18ca99f..9e817f2 100644 ---- a/arch/x86/kernel/cpu/proc.c -+++ b/arch/x86/kernel/cpu/proc.c -@@ -70,8 +70,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - -- if (c->x86_mask || c->cpuid_level >= 0) -- seq_printf(m, "stepping\t: %d\n", c->x86_mask); -+ if (c->x86_stepping || c->cpuid_level >= 0) -+ seq_printf(m, "stepping\t: %d\n", c->x86_stepping); - else - seq_puts(m, "stepping\t: unknown\n"); - if (c->microcode) -diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S -index 2dabea4..82155d0 100644 ---- a/arch/x86/kernel/head_32.S -+++ b/arch/x86/kernel/head_32.S -@@ -35,7 +35,7 @@ - #define X86 new_cpu_data+CPUINFO_x86 - #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor - #define X86_MODEL new_cpu_data+CPUINFO_x86_model --#define X86_MASK new_cpu_data+CPUINFO_x86_mask -+#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping - #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math - #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level - #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability -@@ -441,7 +441,7 @@ enable_paging: - shrb $4,%al - movb %al,X86_MODEL - andb $0x0f,%cl # mask mask revision -- movb %cl,X86_MASK -+ movb %cl,X86_STEPPING - movl %edx,X86_CAPABILITY - - is486: -diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c -index 0f8d204..d0fb941 100644 ---- a/arch/x86/kernel/mpparse.c -+++ b/arch/x86/kernel/mpparse.c -@@ -406,7 +406,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) - processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.cpuflag = CPU_ENABLED; - processor.cpufeature = (boot_cpu_data.x86 << 8) | -- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; -+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping; - processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; - processor.reserved[0] = 0; - processor.reserved[1] = 0; -diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c -index d6f848d..2dd1fe13 100644 ---- a/arch/x86/lib/cpu.c -+++ b/arch/x86/lib/cpu.c -@@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig) - { - unsigned int fam, model; - -- fam = x86_family(sig); -+ fam = x86_family(sig); - - model = (sig >> 4) & 0xf; - -diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c -index 44ce806..e278125 100644 ---- a/drivers/char/hw_random/via-rng.c -+++ b/drivers/char/hw_random/via-rng.c -@@ -166,7 +166,7 @@ static int via_rng_init(struct hwrng *rng) - /* Enable secondary noise source on CPUs where it is present. */ - - /* Nehemiah stepping 8 and higher */ -- if ((c->x86_model == 9) && (c->x86_mask > 7)) -+ if ((c->x86_model == 9) && (c->x86_stepping > 7)) - lo |= VIA_NOISESRC2; - - /* Esther */ -diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c -index 297e912..1ee3674 100644 ---- a/drivers/cpufreq/acpi-cpufreq.c -+++ b/drivers/cpufreq/acpi-cpufreq.c -@@ -648,7 +648,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) - if (c->x86_vendor == X86_VENDOR_INTEL) { - if ((c->x86 == 15) && - (c->x86_model == 6) && -- (c->x86_mask == 8)) { -+ (c->x86_stepping == 8)) { - pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n"); - return -ENODEV; - } -diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c -index c46a12d..d5e27bc 100644 ---- a/drivers/cpufreq/longhaul.c -+++ b/drivers/cpufreq/longhaul.c -@@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) - break; - - case 7: -- switch (c->x86_mask) { -+ switch (c->x86_stepping) { - case 0: - longhaul_version = TYPE_LONGHAUL_V1; - cpu_model = CPU_SAMUEL2; -@@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) - break; - case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V2; -- if (c->x86_mask < 8) { -+ if (c->x86_stepping < 8) { - cpu_model = CPU_SAMUEL2; - cpuname = "C3 'Samuel 2' [C5B]"; - } else { -@@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) - numscales = 32; - memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); - memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); -- switch (c->x86_mask) { -+ switch (c->x86_stepping) { - case 0 ... 1: - cpu_model = CPU_NEHEMIAH; - cpuname = "C3 'Nehemiah A' [C5XLOE]"; -diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c -index fd77812..a25741b 100644 ---- a/drivers/cpufreq/p4-clockmod.c -+++ b/drivers/cpufreq/p4-clockmod.c -@@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) - #endif - - /* Errata workaround */ -- cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; -+ cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping; - switch (cpuid) { - case 0x0f07: - case 0x0f0a: -diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c -index 9f013ed..ef276f6 100644 ---- a/drivers/cpufreq/powernow-k7.c -+++ b/drivers/cpufreq/powernow-k7.c -@@ -131,7 +131,7 @@ static int check_powernow(void) - return 0; - } - -- if ((c->x86_model == 6) && (c->x86_mask == 0)) { -+ if ((c->x86_model == 6) && (c->x86_stepping == 0)) { - pr_info("K7 660[A0] core detected, enabling errata workarounds\n"); - have_a0 = 1; - } -diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c -index 41bc539..4fa5adf 100644 ---- a/drivers/cpufreq/speedstep-centrino.c -+++ b/drivers/cpufreq/speedstep-centrino.c -@@ -37,7 +37,7 @@ struct cpu_id - { - __u8 x86; /* CPU family */ - __u8 x86_model; /* model */ -- __u8 x86_mask; /* stepping */ -+ __u8 x86_stepping; /* stepping */ - }; - - enum { -@@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, - { - if ((c->x86 == x->x86) && - (c->x86_model == x->x86_model) && -- (c->x86_mask == x->x86_mask)) -+ (c->x86_stepping == x->x86_stepping)) - return 1; - return 0; - } -diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c -index 1b80621..ade98a2 100644 ---- a/drivers/cpufreq/speedstep-lib.c -+++ b/drivers/cpufreq/speedstep-lib.c -@@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void) - ebx = cpuid_ebx(0x00000001); - ebx &= 0x000000FF; - -- pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); -+ pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping); - -- switch (c->x86_mask) { -+ switch (c->x86_stepping) { - case 4: - /* - * B-stepping [M-P4-M] -@@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void) - msr_lo, msr_hi); - if ((msr_hi & (1<<18)) && - (relaxed_check ? 1 : (msr_hi & (3<<24)))) { -- if (c->x86_mask == 0x01) { -+ if (c->x86_stepping == 0x01) { - pr_debug("early PIII version\n"); - return SPEEDSTEP_CPU_PIII_C_EARLY; - } else -diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c -index 441e86b..9126627 100644 ---- a/drivers/crypto/padlock-aes.c -+++ b/drivers/crypto/padlock-aes.c -@@ -531,7 +531,7 @@ static int __init padlock_init(void) - - printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); - -- if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { -+ if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) { - ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; - cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; - printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); -diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c -index 82dab16..3cb3e8b 100644 ---- a/drivers/edac/amd64_edac.c -+++ b/drivers/edac/amd64_edac.c -@@ -3150,7 +3150,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) - struct amd64_family_type *fam_type = NULL; - - pvt->ext_model = boot_cpu_data.x86_model >> 4; -- pvt->stepping = boot_cpu_data.x86_mask; -+ pvt->stepping = boot_cpu_data.x86_stepping; - pvt->model = boot_cpu_data.x86_model; - pvt->fam = boot_cpu_data.x86; - -diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c -index 3af92fc..3d5436f 100644 ---- a/drivers/edac/mce_amd.c -+++ b/drivers/edac/mce_amd.c -@@ -949,7 +949,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) - - pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", - m->extcpu, -- c->x86, c->x86_model, c->x86_mask, -+ c->x86, c->x86_model, c->x86_stepping, - m->bank, - ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), - ((m->status & MCI_STATUS_UC) ? "UE" : -diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c -index 6a27eb2..be1e380 100644 ---- a/drivers/hwmon/coretemp.c -+++ b/drivers/hwmon/coretemp.c -@@ -269,13 +269,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) - for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { - const struct tjmax_model *tm = &tjmax_model_table[i]; - if (c->x86_model == tm->model && -- (tm->mask == ANY || c->x86_mask == tm->mask)) -+ (tm->mask == ANY || c->x86_stepping == tm->mask)) - return tm->tjmax; - } - - /* Early chips have no MSR for TjMax */ - -- if (c->x86_model == 0xf && c->x86_mask < 4) -+ if (c->x86_model == 0xf && c->x86_stepping < 4) - usemsr_ee = 0; - - if (c->x86_model > 0xe && usemsr_ee) { -@@ -426,7 +426,7 @@ static int chk_ucode_version(unsigned int cpu) - * Readings might stop update when processor visited too deep sleep, - * fixed for stepping D0 (6EC). - */ -- if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) { -+ if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { - pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); - return -ENODEV; - } -diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c -index ef91b8a..84e9128 100644 ---- a/drivers/hwmon/hwmon-vid.c -+++ b/drivers/hwmon/hwmon-vid.c -@@ -293,7 +293,7 @@ u8 vid_which_vrm(void) - if (c->x86 < 6) /* Any CPU with family lower than 6 */ - return 0; /* doesn't have VID */ - -- vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor); -+ vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor); - if (vrm_ret == 134) - vrm_ret = get_via_model_d_vrm(); - if (vrm_ret == 0) -diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c -index 9cdfde6..0124584 100644 ---- a/drivers/hwmon/k10temp.c -+++ b/drivers/hwmon/k10temp.c -@@ -179,7 +179,7 @@ static bool has_erratum_319(struct pci_dev *pdev) - * and AM3 formats, but that's the best we can do. - */ - return boot_cpu_data.x86_model < 4 || -- (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2); -+ (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2); - } - - static int k10temp_probe(struct pci_dev *pdev, -diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c -index 734d55d..4865027 100644 ---- a/drivers/hwmon/k8temp.c -+++ b/drivers/hwmon/k8temp.c -@@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev, - return -ENOMEM; - - model = boot_cpu_data.x86_model; -- stepping = boot_cpu_data.x86_mask; -+ stepping = boot_cpu_data.x86_stepping; - - /* feature available since SH-C0, exclude older revisions */ - if ((model == 4 && stepping == 0) || -diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c -index 6082f65..67773e8 100644 ---- a/drivers/video/fbdev/geode/video_gx.c -+++ b/drivers/video/fbdev/geode/video_gx.c -@@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info) - int timeout = 1000; - - /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ -- if (cpu_data(0).x86_mask == 1) { -+ if (cpu_data(0).x86_stepping == 1) { - pll_table = gx_pll_table_14MHz; - pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); - } else { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch deleted file mode 100644 index 147b2675..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch +++ /dev/null @@ -1,162 +0,0 @@ -From e187253b583696b67f207047bab1360cabd461c8 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 25 Jan 2018 16:14:09 +0000 -Subject: [PATCH 08/42] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf - -(cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f) - -This is a pure feature bits leaf. There are two AVX512 feature bits in it -already which were handled as scattered bits, and three more from this leaf -are going to be added for speculation control features. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Reviewed-by: Borislav Petkov <bp@suse.de> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: ak@linux.intel.com -Cc: ashok.raj@intel.com -Cc: dave.hansen@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeature.h | 7 +++++-- - arch/x86/include/asm/cpufeatures.h | 10 ++++++---- - arch/x86/include/asm/disabled-features.h | 3 ++- - arch/x86/include/asm/required-features.h | 3 ++- - arch/x86/kernel/cpu/common.c | 1 + - arch/x86/kernel/cpu/scattered.c | 2 -- - 6 files changed, 16 insertions(+), 10 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h -index 9ea67a0..8c10157 100644 ---- a/arch/x86/include/asm/cpufeature.h -+++ b/arch/x86/include/asm/cpufeature.h -@@ -28,6 +28,7 @@ enum cpuid_leafs - CPUID_8000_000A_EDX, - CPUID_7_ECX, - CPUID_8000_0007_EBX, -+ CPUID_7_EDX, - }; - - #ifdef CONFIG_X86_FEATURE_NAMES -@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ -+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ - REQUIRED_MASK_CHECK || \ -- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) -+ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) - - #define DISABLED_MASK_BIT_SET(feature_bit) \ - ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ -@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ -+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ - DISABLED_MASK_CHECK || \ -- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) -+ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) - - #define cpu_has(c, bit) \ - (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 8537a21..9d4a422 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -12,7 +12,7 @@ - /* - * Defines x86 CPU feature bits - */ --#define NCAPINTS 18 /* N 32-bit words worth of info */ -+#define NCAPINTS 19 /* N 32-bit words worth of info */ - #define NBUGINTS 1 /* N 32-bit bug flags */ - - /* -@@ -197,9 +197,7 @@ - #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ - #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ - --#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ --#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ --#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ -+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ - - /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ - #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ -@@ -295,6 +293,10 @@ - #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ - #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ - -+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -+ - /* - * BUG word(s) - */ -diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h -index 85599ad..8b45e08 100644 ---- a/arch/x86/include/asm/disabled-features.h -+++ b/arch/x86/include/asm/disabled-features.h -@@ -57,6 +57,7 @@ - #define DISABLED_MASK15 0 - #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) - #define DISABLED_MASK17 0 --#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) -+#define DISABLED_MASK18 0 -+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) - - #endif /* _ASM_X86_DISABLED_FEATURES_H */ -diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h -index fac9a5c..6847d85 100644 ---- a/arch/x86/include/asm/required-features.h -+++ b/arch/x86/include/asm/required-features.h -@@ -100,6 +100,7 @@ - #define REQUIRED_MASK15 0 - #define REQUIRED_MASK16 0 - #define REQUIRED_MASK17 0 --#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) -+#define REQUIRED_MASK18 0 -+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) - - #endif /* _ASM_X86_REQUIRED_FEATURES_H */ -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index d198ae0..4267273 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -737,6 +737,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) - cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_7_0_EBX] = ebx; - c->x86_capability[CPUID_7_ECX] = ecx; -+ c->x86_capability[CPUID_7_EDX] = edx; - } - - /* Extended state features: level 0x0000000d */ -diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c -index b0dd9ae..afbb525 100644 ---- a/arch/x86/kernel/cpu/scattered.c -+++ b/arch/x86/kernel/cpu/scattered.c -@@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) - const struct cpuid_bit *cb; - - static const struct cpuid_bit cpuid_bits[] = { -- { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, -- { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, - { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, - { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch deleted file mode 100644 index d8206d02..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch +++ /dev/null @@ -1,60 +0,0 @@ -From ed22188fb6b2b43b2af7b1f6714d3befb6fe7965 Mon Sep 17 00:00:00 2001 -From: Tony Luck <tony.luck@intel.com> -Date: Fri, 25 May 2018 14:42:09 -0700 -Subject: [PATCH 08/10] x86/mce: Check for alternate indication of machine - check recovery on Skylake - -commit 4c5717da1d021cf368eabb3cb1adcaead56c0d1e upstream. - -Currently we just check the "CAPID0" register to see whether the CPU -can recover from machine checks. - -But there are also some special SKUs which do not have all advanced -RAS features, but do enable machine check recovery for use with NVDIMMs. - -Add a check for any of bits {8:5} in the "CAPID5" register (each -reports some NVDIMM mode available, if any of them are set, then -the system supports memory machine check recovery). - -Signed-off-by: Tony Luck <tony.luck@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com> -Cc: Ashok Raj <ashok.raj@intel.com> -Cc: stable@vger.kernel.org # 4.9 -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Borislav Petkov <bp@suse.de> -Link: https://lkml.kernel.org/r/03cbed6e99ddafb51c2eadf9a3b7c8d7a0cc204e.1527283897.git.tony.luck@intel.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/quirks.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c -index 0bee04d..b57100a 100644 ---- a/arch/x86/kernel/quirks.c -+++ b/arch/x86/kernel/quirks.c -@@ -643,12 +643,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) - /* Skylake */ - static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) - { -- u32 capid0; -+ u32 capid0, capid5; - - pci_read_config_dword(pdev, 0x84, &capid0); -+ pci_read_config_dword(pdev, 0x98, &capid5); - -- if ((capid0 & 0xc0) == 0xc0) -+ /* -+ * CAPID0{7:6} indicate whether this is an advanced RAS SKU -+ * CAPID5{8:5} indicate that various NVDIMM usage modes are -+ * enabled, so memory machine check recovery is also enabled. -+ */ -+ if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) - static_branch_inc(&mcsafe_key); -+ - } - DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); - DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch deleted file mode 100644 index 1b5231fc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 7737fc421365d9f2fd328b19fdccf005092d4ec1 Mon Sep 17 00:00:00 2001 -From: Tiantian Feng <fengtiantian@huawei.com> -Date: Wed, 19 Apr 2017 18:18:39 +0200 -Subject: [PATCH 08/93] x86/reboot: Turn off KVM when halting a CPU - -[ Upstream commit fba4f472b33aa81ca1836f57d005455261e9126f ] - -A CPU in VMX root mode will ignore INIT signals and will fail to bring -up the APs after reboot. Therefore, on a panic we disable VMX on all -CPUs before rebooting or triggering kdump. - -Do this when halting the machine as well, in case a firmware-level reboot -does not perform a cold reset for all processors. Without doing this, -rebooting the host may hang. - -Signed-off-by: Tiantian Feng <fengtiantian@huawei.com> -Signed-off-by: Xishi Qiu <qiuxishi@huawei.com> -[ Rewritten commit message. ] -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: kvm@vger.kernel.org -Link: http://lkml.kernel.org/r/20170419161839.30550-1-pbonzini@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/smp.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c -index c00cb64..420f2dc 100644 ---- a/arch/x86/kernel/smp.c -+++ b/arch/x86/kernel/smp.c -@@ -33,6 +33,7 @@ - #include <asm/mce.h> - #include <asm/trace/irq_vectors.h> - #include <asm/kexec.h> -+#include <asm/virtext.h> - - /* - * Some notes on x86 processor bugs affecting SMP operation: -@@ -162,6 +163,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) - if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) - return NMI_HANDLED; - -+ cpu_emergency_vmxoff(); - stop_this_cpu(NULL); - - return NMI_HANDLED; -@@ -174,6 +176,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) - asmlinkage __visible void smp_reboot_interrupt(void) - { - ipi_entering_ack_irq(); -+ cpu_emergency_vmxoff(); - stop_this_cpu(NULL); - irq_exit(); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch deleted file mode 100644 index 0f35decd..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 03a686fb1ba599b2ed6b0bb256fa364f629ed2c7 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Mon, 26 Feb 2018 09:35:01 -0500 -Subject: [PATCH 08/14] x86/spectre_v2: Don't check microcode versions when - running under hypervisors -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 36268223c1e9981d6cfc33aff8520b3bde4b8114 upstream. - -As: - - 1) It's known that hypervisors lie about the environment anyhow (host - mismatch) - - 2) Even if the hypervisor (Xen, KVM, VMWare, etc) provided a valid - "correct" value, it all gets to be very murky when migration happens - (do you provide the "new" microcode of the machine?). - -And in reality the cloud vendors are the ones that should make sure that -the microcode that is running is correct and we should just sing lalalala -and trust them. - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> -Cc: Wanpeng Li <kernellwp@gmail.com> -Cc: kvm <kvm@vger.kernel.org> -Cc: Krčmář <rkrcmar@redhat.com> -Cc: Borislav Petkov <bp@alien8.de> -CC: "H. Peter Anvin" <hpa@zytor.com> -CC: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20180226213019.GE9497@char.us.oracle.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/intel.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 6ed206b..7680425 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -103,6 +103,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) - { - int i; - -+ /* -+ * We know that the hypervisor lie to us on the microcode version so -+ * we may as well hope that it is running the correct version. -+ */ -+ if (cpu_has(c, X86_FEATURE_HYPERVISOR)) -+ return false; -+ - for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { - if (c->x86_model == spectre_bad_microcodes[i].model && - c->x86_stepping == spectre_bad_microcodes[i].stepping) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch deleted file mode 100644 index 071eccd3..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch +++ /dev/null @@ -1,61 +0,0 @@ -From a4337b660fe26046e81471186dc393ca77371b83 Mon Sep 17 00:00:00 2001 -From: Nikita Leshenko <nikita.leshchenko@oracle.com> -Date: Sun, 5 Nov 2017 15:52:33 +0200 -Subject: [PATCH 09/33] KVM: x86: ioapic: Preserve read-only values in the - redirection table -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit b200dded0a6974a3b69599832b2203483920ab25 ] - -According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are -read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb -("ioapic: keep RO bits for IOAPIC entry"). - -Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com> -Reviewed-by: Liran Alon <liran.alon@oracle.com> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> -Reviewed-by: Steve Rutherford <srutherford@google.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/ioapic.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c -index 4b573c8..5f810bb 100644 ---- a/arch/x86/kvm/ioapic.c -+++ b/arch/x86/kvm/ioapic.c -@@ -278,6 +278,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) - { - unsigned index; - bool mask_before, mask_after; -+ int old_remote_irr, old_delivery_status; - union kvm_ioapic_redirect_entry *e; - - switch (ioapic->ioregsel) { -@@ -300,6 +301,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) - return; - e = &ioapic->redirtbl[index]; - mask_before = e->fields.mask; -+ /* Preserve read-only fields */ -+ old_remote_irr = e->fields.remote_irr; -+ old_delivery_status = e->fields.delivery_status; - if (ioapic->ioregsel & 1) { - e->bits &= 0xffffffff; - e->bits |= (u64) val << 32; -@@ -307,6 +311,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) - e->bits &= ~0xffffffffULL; - e->bits |= (u32) val; - } -+ e->fields.remote_irr = old_remote_irr; -+ e->fields.delivery_status = old_delivery_status; - - /* - * Some OSes (Linux, Xen) assume that Remote IRR bit will --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch deleted file mode 100644 index 19dfa3a4..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch +++ /dev/null @@ -1,263 +0,0 @@ -From d901d344ca4172a49bab9852e993e5a2c47a7fde Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Mon, 19 Feb 2018 10:50:56 +0000 -Subject: [PATCH 09/14] Revert "x86/retpoline: Simplify vmexit_fill_RSB()" - -commit d1c99108af3c5992640aa2afa7d2e88c3775c06e upstream. - -This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting -the RSB filling out of line and calling it, we waste one RSB slot for -returning from the function itself, which means one fewer actual function -call we can make if we're doing the Skylake abomination of call-depth -counting. - -It also changed the number of RSB stuffings we do on vmexit from 32, -which was correct, to 16. Let's just stop with the bikeshedding; it -didn't actually *fix* anything anyway. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: arjan.van.de.ven@intel.com -Cc: bp@alien8.de -Cc: dave.hansen@intel.com -Cc: jmattson@google.com -Cc: karahmed@amazon.de -Cc: kvm@vger.kernel.org -Cc: pbonzini@redhat.com -Cc: rkrcmar@redhat.com -Link: http://lkml.kernel.org/r/1519037457-7643-4-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_32.S | 3 +- - arch/x86/entry/entry_64.S | 3 +- - arch/x86/include/asm/asm-prototypes.h | 3 -- - arch/x86/include/asm/nospec-branch.h | 70 +++++++++++++++++++++++++++++++---- - arch/x86/lib/Makefile | 1 - - arch/x86/lib/retpoline.S | 56 ---------------------------- - 6 files changed, 65 insertions(+), 71 deletions(-) - -diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S -index f5434b4..a76dc73 100644 ---- a/arch/x86/entry/entry_32.S -+++ b/arch/x86/entry/entry_32.S -@@ -237,8 +237,7 @@ ENTRY(__switch_to_asm) - * exist, overwrite the RSB with entries which capture - * speculative execution to prevent attack. - */ -- /* Clobbers %ebx */ -- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW - #endif - - /* restore callee-saved registers */ -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index e9120d4..caf79e3 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -331,8 +331,7 @@ ENTRY(__switch_to_asm) - * exist, overwrite the RSB with entries which capture - * speculative execution to prevent attack. - */ -- /* Clobbers %rbx */ -- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW - #endif - - /* restore callee-saved registers */ -diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h -index 1666542..5a25ada 100644 ---- a/arch/x86/include/asm/asm-prototypes.h -+++ b/arch/x86/include/asm/asm-prototypes.h -@@ -37,7 +37,4 @@ INDIRECT_THUNK(dx) - INDIRECT_THUNK(si) - INDIRECT_THUNK(di) - INDIRECT_THUNK(bp) --asmlinkage void __fill_rsb(void); --asmlinkage void __clear_rsb(void); -- - #endif /* CONFIG_RETPOLINE */ -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 81a1be3..dace2de 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -8,6 +8,50 @@ - #include <asm/cpufeatures.h> - #include <asm/msr-index.h> - -+/* -+ * Fill the CPU return stack buffer. -+ * -+ * Each entry in the RSB, if used for a speculative 'ret', contains an -+ * infinite 'pause; lfence; jmp' loop to capture speculative execution. -+ * -+ * This is required in various cases for retpoline and IBRS-based -+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to -+ * eliminate potentially bogus entries from the RSB, and sometimes -+ * purely to ensure that it doesn't get empty, which on some CPUs would -+ * allow predictions from other (unwanted!) sources to be used. -+ * -+ * We define a CPP macro such that it can be used from both .S files and -+ * inline assembly. It's possible to do a .macro and then include that -+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. -+ */ -+ -+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -+#define RSB_FILL_LOOPS 16 /* To avoid underflow */ -+ -+/* -+ * Google experimented with loop-unrolling and this turned out to be -+ * the optimal version — two calls, each with their own speculation -+ * trap should their return address end up getting used, in a loop. -+ */ -+#define __FILL_RETURN_BUFFER(reg, nr, sp) \ -+ mov $(nr/2), reg; \ -+771: \ -+ call 772f; \ -+773: /* speculation trap */ \ -+ pause; \ -+ lfence; \ -+ jmp 773b; \ -+772: \ -+ call 774f; \ -+775: /* speculation trap */ \ -+ pause; \ -+ lfence; \ -+ jmp 775b; \ -+774: \ -+ dec reg; \ -+ jnz 771b; \ -+ add $(BITS_PER_LONG/8) * nr, sp; -+ - #ifdef __ASSEMBLY__ - - /* -@@ -78,10 +122,17 @@ - #endif - .endm - --/* This clobbers the BX register */ --.macro FILL_RETURN_BUFFER nr:req ftr:req -+ /* -+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP -+ * monstrosity above, manually. -+ */ -+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req - #ifdef CONFIG_RETPOLINE -- ALTERNATIVE "", "call __clear_rsb", \ftr -+ ANNOTATE_NOSPEC_ALTERNATIVE -+ ALTERNATIVE "jmp .Lskip_rsb_\@", \ -+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ -+ \ftr -+.Lskip_rsb_\@: - #endif - .endm - -@@ -156,10 +207,15 @@ extern char __indirect_thunk_end[]; - static inline void vmexit_fill_RSB(void) - { - #ifdef CONFIG_RETPOLINE -- alternative_input("", -- "call __fill_rsb", -- X86_FEATURE_RETPOLINE, -- ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); -+ unsigned long loops; -+ -+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE -+ ALTERNATIVE("jmp 910f", -+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), -+ X86_FEATURE_RETPOLINE) -+ "910:" -+ : "=r" (loops), ASM_CALL_CONSTRAINT -+ : : "memory" ); - #endif - } - -diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile -index 4ad7c4d..6bf1898 100644 ---- a/arch/x86/lib/Makefile -+++ b/arch/x86/lib/Makefile -@@ -26,7 +26,6 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o - lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o - lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o - lib-$(CONFIG_RETPOLINE) += retpoline.o --OBJECT_FILES_NON_STANDARD_retpoline.o :=y - - obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o - -diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S -index 480edc3..c909961 100644 ---- a/arch/x86/lib/retpoline.S -+++ b/arch/x86/lib/retpoline.S -@@ -7,7 +7,6 @@ - #include <asm/alternative-asm.h> - #include <asm/export.h> - #include <asm/nospec-branch.h> --#include <asm/bitsperlong.h> - - .macro THUNK reg - .section .text.__x86.indirect_thunk -@@ -47,58 +46,3 @@ GENERATE_THUNK(r13) - GENERATE_THUNK(r14) - GENERATE_THUNK(r15) - #endif -- --/* -- * Fill the CPU return stack buffer. -- * -- * Each entry in the RSB, if used for a speculative 'ret', contains an -- * infinite 'pause; lfence; jmp' loop to capture speculative execution. -- * -- * This is required in various cases for retpoline and IBRS-based -- * mitigations for the Spectre variant 2 vulnerability. Sometimes to -- * eliminate potentially bogus entries from the RSB, and sometimes -- * purely to ensure that it doesn't get empty, which on some CPUs would -- * allow predictions from other (unwanted!) sources to be used. -- * -- * Google experimented with loop-unrolling and this turned out to be -- * the optimal version - two calls, each with their own speculation -- * trap should their return address end up getting used, in a loop. -- */ --.macro STUFF_RSB nr:req sp:req -- mov $(\nr / 2), %_ASM_BX -- .align 16 --771: -- call 772f --773: /* speculation trap */ -- pause -- lfence -- jmp 773b -- .align 16 --772: -- call 774f --775: /* speculation trap */ -- pause -- lfence -- jmp 775b -- .align 16 --774: -- dec %_ASM_BX -- jnz 771b -- add $((BITS_PER_LONG/8) * \nr), \sp --.endm -- --#define RSB_FILL_LOOPS 16 /* To avoid underflow */ -- --ENTRY(__fill_rsb) -- STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP -- ret --END(__fill_rsb) --EXPORT_SYMBOL_GPL(__fill_rsb) -- --#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -- --ENTRY(__clear_rsb) -- STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP -- ret --END(__clear_rsb) --EXPORT_SYMBOL_GPL(__clear_rsb) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-kaiser-KAISER-depends-on-SMP.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-kaiser-KAISER-depends-on-SMP.patch deleted file mode 100644 index 206cd97f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-kaiser-KAISER-depends-on-SMP.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 876bf15aa8a6a2355ed9f880b5f52f1287e44b39 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Wed, 13 Sep 2017 14:03:10 -0700 -Subject: [PATCH 009/103] kaiser: KAISER depends on SMP - -It is absurd that KAISER should depend on SMP, but apparently nobody -has tried a UP build before: which breaks on implicit declaration of -function 'per_cpu_offset' in arch/x86/mm/kaiser.c. - -Now, you would expect that to be trivially fixed up; but looking at -the System.map when that block is #ifdef'ed out of kaiser_init(), -I see that in a UP build __per_cpu_user_mapped_end is precisely at -__per_cpu_user_mapped_start, and the items carefully gathered into -that section for user-mapping on SMP, dispersed elsewhere on UP. - -So, some other kind of section assignment will be needed on UP, -but implementing that is not a priority: just make KAISER depend -on SMP for now. - -Also inserted a blank line before the option, tidied up the -brief Kconfig help message, and added an "If unsure, Y". - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - security/Kconfig | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/security/Kconfig b/security/Kconfig -index 334d2e8..dc78671 100644 ---- a/security/Kconfig -+++ b/security/Kconfig -@@ -30,14 +30,16 @@ config SECURITY - model will be used. - - If you are unsure how to answer this question, answer N. -+ - config KAISER - bool "Remove the kernel mapping in user mode" - default y -- depends on X86_64 -- depends on !PARAVIRT -+ depends on X86_64 && SMP && !PARAVIRT - help -- This enforces a strict kernel and user space isolation in order to close -- hardware side channels on kernel address information. -+ This enforces a strict kernel and user space isolation, in order -+ to close hardware side channels on kernel address information. -+ -+ If you are unsure how to answer this question, answer Y. - - config KAISER_REAL_SWITCH - bool "KAISER: actually switch page tables" --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch deleted file mode 100644 index 1e9973e7..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 29fa51519ae0978980c8fc154eba5b244ad7980f Mon Sep 17 00:00:00 2001 -From: Baoquan He <bhe@redhat.com> -Date: Thu, 27 Apr 2017 15:42:20 +0800 -Subject: [PATCH 09/93] x86/KASLR: Fix kexec kernel boot crash when KASLR - randomization fails - -[ Upstream commit da63b6b20077469bd6bd96e07991ce145fc4fbc4 ] - -Dave found that a kdump kernel with KASLR enabled will reset to the BIOS -immediately if physical randomization failed to find a new position for -the kernel. A kernel with the 'nokaslr' option works in this case. - -The reason is that KASLR will install a new page table for the identity -mapping, while it missed building it for the original kernel location -if KASLR physical randomization fails. - -This only happens in the kexec/kdump kernel, because the identity mapping -has been built for kexec/kdump in the 1st kernel for the whole memory by -calling init_pgtable(). Here if physical randomizaiton fails, it won't build -the identity mapping for the original area of the kernel but change to a -new page table '_pgtable'. Then the kernel will triple fault immediately -caused by no identity mappings. - -The normal kernel won't see this bug, because it comes here via startup_32() -and CR3 will be set to _pgtable already. In startup_32() the identity -mapping is built for the 0~4G area. In KASLR we just append to the existing -area instead of entirely overwriting it for on-demand identity mapping -building. So the identity mapping for the original area of kernel is still -there. - -To fix it we just switch to the new identity mapping page table when physical -KASLR succeeds. Otherwise we keep the old page table unchanged just like -"nokaslr" does. - -Signed-off-by: Baoquan He <bhe@redhat.com> -Signed-off-by: Dave Young <dyoung@redhat.com> -Acked-by: Kees Cook <keescook@chromium.org> -Cc: Borislav Petkov <bp@suse.de> -Cc: Dave Jiang <dave.jiang@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Garnier <thgarnie@google.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Yinghai Lu <yinghai@kernel.org> -Link: http://lkml.kernel.org/r/1493278940-5885-1-git-send-email-bhe@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/boot/compressed/kaslr.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c -index a66854d..af42b4d 100644 ---- a/arch/x86/boot/compressed/kaslr.c -+++ b/arch/x86/boot/compressed/kaslr.c -@@ -463,10 +463,17 @@ void choose_random_location(unsigned long input, - add_identity_map(random_addr, output_size); - *output = random_addr; - } -+ -+ /* -+ * This loads the identity mapping page table. -+ * This should only be done if a new physical address -+ * is found for the kernel, otherwise we should keep -+ * the old page table to make it be like the "nokaslr" -+ * case. -+ */ -+ finalize_identity_maps(); - } - -- /* This actually loads the identity pagetable on x86_64. */ -- finalize_identity_maps(); - - /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ - if (IS_ENABLED(CONFIG_X86_64)) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch deleted file mode 100644 index 1de4e886..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch +++ /dev/null @@ -1,51 +0,0 @@ -From a56ed550fd79c3bab8aa9d0f136086314dc377f5 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 25 Jan 2018 16:14:10 +0000 -Subject: [PATCH 09/42] x86/cpufeatures: Add Intel feature bits for Speculation - Control - -(cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61) - -Add three feature bits exposed by new microcode on Intel CPUs for -speculation control. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Reviewed-by: Borislav Petkov <bp@suse.de> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: ak@linux.intel.com -Cc: ashok.raj@intel.com -Cc: dave.hansen@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 9d4a422..1f03888 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -296,6 +296,9 @@ - /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ - #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ -+#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ -+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ - - /* - * BUG word(s) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch deleted file mode 100644 index 76fa3b70..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 1357825b6905bcf665161dc41b764a83b21954e9 Mon Sep 17 00:00:00 2001 -From: Tony Luck <tony.luck@intel.com> -Date: Fri, 22 Jun 2018 11:54:23 +0200 -Subject: [PATCH 09/10] x86/mce: Fix incorrect "Machine check from unknown - source" message - -commit 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 upstream. - -Some injection testing resulted in the following console log: - - mce: [Hardware Error]: CPU 22: Machine Check Exception: f Bank 1: bd80000000100134 - mce: [Hardware Error]: RIP 10:<ffffffffc05292dd> {pmem_do_bvec+0x11d/0x330 [nd_pmem]} - mce: [Hardware Error]: TSC c51a63035d52 ADDR 3234bc4000 MISC 88 - mce: [Hardware Error]: PROCESSOR 0:50654 TIME 1526502199 SOCKET 0 APIC 38 microcode 2000043 - mce: [Hardware Error]: Run the above through 'mcelog --ascii' - Kernel panic - not syncing: Machine check from unknown source - -This confused everybody because the first line quite clearly shows -that we found a logged error in "Bank 1", while the last line says -"unknown source". - -The problem is that the Linux code doesn't do the right thing -for a local machine check that results in a fatal error. - -It turns out that we know very early in the handler whether the -machine check is fatal. The call to mce_no_way_out() has checked -all the banks for the CPU that took the local machine check. If -it says we must crash, we can do so right away with the right -messages. - -We do scan all the banks again. This means that we might initially -not see a problem, but during the second scan find something fatal. -If this happens we print a slightly different message (so I can -see if it actually every happens). - -[ bp: Remove unneeded severity assignment. ] - -Signed-off-by: Tony Luck <tony.luck@intel.com> -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Ashok Raj <ashok.raj@intel.com> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com> -Cc: linux-edac <linux-edac@vger.kernel.org> -Cc: stable@vger.kernel.org # 4.2 -Link: http://lkml.kernel.org/r/52e049a497e86fd0b71c529651def8871c804df0.1527283897.git.tony.luck@intel.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/mcheck/mce.c | 26 ++++++++++++++++++-------- - 1 file changed, 18 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c -index 72bcd08..4711e1c 100644 ---- a/arch/x86/kernel/cpu/mcheck/mce.c -+++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -1169,13 +1169,18 @@ void do_machine_check(struct pt_regs *regs, long error_code) - lmce = m.mcgstatus & MCG_STATUS_LMCES; - - /* -+ * Local machine check may already know that we have to panic. -+ * Broadcast machine check begins rendezvous in mce_start() - * Go through all banks in exclusion of the other CPUs. This way we - * don't report duplicated events on shared banks because the first one -- * to see it will clear it. If this is a Local MCE, then no need to -- * perform rendezvous. -+ * to see it will clear it. - */ -- if (!lmce) -+ if (lmce) { -+ if (no_way_out) -+ mce_panic("Fatal local machine check", &m, msg); -+ } else { - order = mce_start(&no_way_out); -+ } - - for (i = 0; i < cfg->banks; i++) { - __clear_bit(i, toclear); -@@ -1251,12 +1256,17 @@ void do_machine_check(struct pt_regs *regs, long error_code) - no_way_out = worst >= MCE_PANIC_SEVERITY; - } else { - /* -- * Local MCE skipped calling mce_reign() -- * If we found a fatal error, we need to panic here. -+ * If there was a fatal machine check we should have -+ * already called mce_panic earlier in this function. -+ * Since we re-read the banks, we might have found -+ * something new. Check again to see if we found a -+ * fatal error. We call "mce_severity()" again to -+ * make sure we have the right "msg". - */ -- if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) -- mce_panic("Machine check from unknown source", -- NULL, NULL); -+ if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { -+ mce_severity(&m, cfg->tolerant, &msg, true); -+ mce_panic("Local fatal machine check!", &m, msg); -+ } - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-spectre-Fix-an-error-message.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-spectre-Fix-an-error-message.patch deleted file mode 100644 index b3f35a95..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-spectre-Fix-an-error-message.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 6893aed64644e59c2aec9a347e6a324233b81dd7 Mon Sep 17 00:00:00 2001 -From: Dan Carpenter <dan.carpenter@oracle.com> -Date: Wed, 14 Feb 2018 10:14:17 +0300 -Subject: [PATCH 09/12] x86/spectre: Fix an error message - -commit 9de29eac8d2189424d81c0d840cd0469aa3d41c8 upstream. - -If i == ARRAY_SIZE(mitigation_options) then we accidentally print -garbage from one space beyond the end of the mitigation_options[] array. - -Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Borislav Petkov <bp@suse.de> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: KarimAllah Ahmed <karahmed@amazon.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: kernel-janitors@vger.kernel.org -Fixes: 9005c6834c0f ("x86/spectre: Simplify spectre_v2 command line parsing") -Link: http://lkml.kernel.org/r/20180214071416.GA26677@mwanda -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index b83e0c9..baddc9e 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -173,7 +173,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) - } - - if (i >= ARRAY_SIZE(mitigation_options)) { -- pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); -+ pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch deleted file mode 100644 index 7ab25b0b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch +++ /dev/null @@ -1,103 +0,0 @@ -From fc18f773d54edfedf8875473d8e69753265a3dfd Mon Sep 17 00:00:00 2001 -From: Wanpeng Li <wanpeng.li@hotmail.com> -Date: Mon, 20 Nov 2017 14:52:21 -0800 -Subject: [PATCH 10/33] KVM: VMX: Fix rflags cache during vCPU reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit c37c28730bb031cc8a44a130c2555c0f3efbe2d0 ] - -Reported by syzkaller: - - *** Guest State *** - CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 - CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 - CR3 = 0x000000002081e000 - RSP = 0x000000000000fffa RIP = 0x0000000000000000 - RFLAGS=0x00023000 DR7 = 0x00000000000000 - ^^^^^^^^^^ - ------------[ cut here ]------------ - WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] - CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 - RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] - RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 - Call Trace: - kvm_vcpu_ioctl+0x479/0x880 [kvm] - do_vfs_ioctl+0x142/0x9a0 - SyS_ioctl+0x74/0x80 - entry_SYSCALL_64_fastpath+0x23/0x9a - -The failed vmentry is triggered by the following beautified testcase: - - #include <unistd.h> - #include <sys/syscall.h> - #include <string.h> - #include <stdint.h> - #include <linux/kvm.h> - #include <fcntl.h> - #include <sys/ioctl.h> - - long r[5]; - int main() - { - struct kvm_debugregs dr = { 0 }; - - r[2] = open("/dev/kvm", O_RDONLY); - r[3] = ioctl(r[2], KVM_CREATE_VM, 0); - r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); - struct kvm_guest_debug debug = { - .control = 0xf0403, - .arch = { - .debugreg[6] = 0x2, - .debugreg[7] = 0x2 - } - }; - ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); - ioctl(r[4], KVM_RUN, 0); - } - -which testcase tries to setup the processor specific debug -registers and configure vCPU for handling guest debug events through -KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set -rflags in order to set TF bit if single step is needed. All regs' caches -are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU -reset. However, the cache of rflags is not reset during vCPU reset. The -function vmx_get_rflags() returns an unreset rflags cache value since -the cache is marked avail, it is 0 after boot. Vmentry fails if the -rflags reserved bit 1 is 0. - -This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and -its cache to 0x2 during vCPU reset. - -Reported-by: Dmitry Vyukov <dvyukov@google.com> -Tested-by: Dmitry Vyukov <dvyukov@google.com> -Reviewed-by: David Hildenbrand <david@redhat.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: Nadav Amit <nadav.amit@gmail.com> -Cc: Dmitry Vyukov <dvyukov@google.com> -Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 8e5001d..98f6545 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -5171,7 +5171,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - } - -- vmcs_writel(GUEST_RFLAGS, 0x02); -+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - kvm_rip_write(vcpu, 0xfff0); - - vmcs_writel(GUEST_GDTR_BASE, 0); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch deleted file mode 100644 index 0021537f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 74fc29fe722da8a939d8fa59e6ba835296c9bc56 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Thu, 21 Sep 2017 20:39:56 -0700 -Subject: [PATCH 010/103] kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER - -pjt has observed that nmi's second (nmi_from_kernel) call to do_nmi() -adjusted the %rdi regs arg, rightly when CONFIG_KAISER, but wrongly -when not CONFIG_KAISER. - -Although the minimal change is to add an #ifdef CONFIG_KAISER around -the addq line, that looks cluttered, and I prefer how the first call -to do_nmi() handled it: prepare args in %rdi and %rsi before getting -into the CONFIG_KAISER block, since it does not touch them at all. - -And while we're here, place the "#ifdef CONFIG_KAISER" that follows -each, to enclose the "Unconditionally restore CR3" comment: matching -how the "Unconditionally use kernel CR3" comment above is enclosed. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index d84e3a7..57f7993 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1321,12 +1321,13 @@ ENTRY(nmi) - movq %rax, %cr3 - #endif - call do_nmi -+ -+#ifdef CONFIG_KAISER - /* - * Unconditionally restore CR3. I know we return to - * kernel code that needs user CR3, but do we ever return - * to "user mode" where we need the kernel CR3? - */ --#ifdef CONFIG_KAISER - popq %rax - mov %rax, %cr3 - #endif -@@ -1550,6 +1551,8 @@ end_repeat_nmi: - SWAPGS - xorl %ebx, %ebx - 1: -+ movq %rsp, %rdi -+ movq $-1, %rsi - #ifdef CONFIG_KAISER - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ -@@ -1562,16 +1565,14 @@ end_repeat_nmi: - #endif - - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ -- movq %rsp, %rdi -- addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */ -- movq $-1, %rsi - call do_nmi -+ -+#ifdef CONFIG_KAISER - /* - * Unconditionally restore CR3. We might be returning to - * kernel code that needs user CR3, like just just before - * a sysret. - */ --#ifdef CONFIG_KAISER - popq %rax - mov %rax, %cr3 - #endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kvm-x86-fix-icebp-instruction-handling.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kvm-x86-fix-icebp-instruction-handling.patch deleted file mode 100644 index aef1109b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kvm-x86-fix-icebp-instruction-handling.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 694ba89c4cb4e43ae4cb418ea46b1415f6d31ce7 Mon Sep 17 00:00:00 2001 -From: Linus Torvalds <torvalds@linux-foundation.org> -Date: Tue, 20 Mar 2018 12:16:59 -0700 -Subject: [PATCH 10/93] kvm/x86: fix icebp instruction handling - -commit 32d43cd391bacb5f0814c2624399a5dad3501d09 upstream. - -The undocumented 'icebp' instruction (aka 'int1') works pretty much like -'int3' in the absense of in-circuit probing equipment (except, -obviously, that it raises #DB instead of raising #BP), and is used by -some validation test-suites as such. - -But Andy Lutomirski noticed that his test suite acted differently in kvm -than on bare hardware. - -The reason is that kvm used an inexact test for the icebp instruction: -it just assumed that an all-zero VM exit qualification value meant that -the VM exit was due to icebp. - -That is not unlike the guess that do_debug() does for the actual -exception handling case, but it's purely a heuristic, not an absolute -rule. do_debug() does it because it wants to ascribe _some_ reasons to -the #DB that happened, and an empty %dr6 value means that 'icebp' is the -most likely casue and we have no better information. - -But kvm can just do it right, because unlike the do_debug() case, kvm -actually sees the real reason for the #DB in the VM-exit interruption -information field. - -So instead of relying on an inexact heuristic, just use the actual VM -exit information that says "it was 'icebp'". - -Right now the 'icebp' instruction isn't technically documented by Intel, -but that will hopefully change. The special "privileged software -exception" information _is_ actually mentioned in the Intel SDM, even -though the cause of it isn't enumerated. - -Reported-by: Andy Lutomirski <luto@kernel.org> -Tested-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/vmx.h | 1 + - arch/x86/kvm/vmx.c | 9 ++++++++- - 2 files changed, 9 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h -index 6899cf1..9cbfbef 100644 ---- a/arch/x86/include/asm/vmx.h -+++ b/arch/x86/include/asm/vmx.h -@@ -309,6 +309,7 @@ enum vmcs_field { - #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ - #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ - #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ -+#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ - #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ - - /* GUEST_INTERRUPTIBILITY_INFO flags. */ -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 3c3558b..27f505d 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -1053,6 +1053,13 @@ static inline bool is_machine_check(u32 intr_info) - (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); - } - -+/* Undocumented: icebp/int1 */ -+static inline bool is_icebp(u32 intr_info) -+{ -+ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) -+ == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); -+} -+ - static inline bool cpu_has_vmx_msr_bitmap(void) - { - return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; -@@ -5708,7 +5715,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= dr6 | DR6_RTM; -- if (!(dr6 & ~DR6_RESERVED)) /* icebp */ -+ if (is_icebp(intr_info)) - skip_emulated_instruction(vcpu); - - kvm_queue_exception(vcpu, DB_VECTOR); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch deleted file mode 100644 index 68e82a01..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 5d671cb212c75a4adebb52863b5e9d370c8c23c1 Mon Sep 17 00:00:00 2001 -From: "Gustavo A. R. Silva" <garsilva@embeddedor.com> -Date: Tue, 13 Feb 2018 13:22:08 -0600 -Subject: [PATCH 10/12] x86/cpu: Change type of x86_cache_size variable to - unsigned int - -commit 24dbc6000f4b9b0ef5a9daecb161f1907733765a upstream. - -Currently, x86_cache_size is of type int, which makes no sense as we -will never have a valid cache size equal or less than 0. So instead of -initializing this variable to -1, it can perfectly be initialized to 0 -and use it as an unsigned variable instead. - -Suggested-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Addresses-Coverity-ID: 1464429 -Link: http://lkml.kernel.org/r/20180213192208.GA26414@embeddedor.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/processor.h | 2 +- - arch/x86/kernel/cpu/common.c | 2 +- - arch/x86/kernel/cpu/proc.c | 4 ++-- - 3 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index df29212..d51e679 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -113,7 +113,7 @@ struct cpuinfo_x86 { - char x86_vendor_id[16]; - char x86_model_id[64]; - /* in KB - valid for CPUS which support this call: */ -- int x86_cache_size; -+ unsigned int x86_cache_size; - int x86_cache_alignment; /* In bytes */ - /* Cache QoS architectural values: */ - int x86_cache_max_rmid; /* max index */ -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 96b2c83..301bbd1 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -1144,7 +1144,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) - int i; - - c->loops_per_jiffy = loops_per_jiffy; -- c->x86_cache_size = -1; -+ c->x86_cache_size = 0; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_stepping = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ -diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c -index 9e817f2..c4f772d 100644 ---- a/arch/x86/kernel/cpu/proc.c -+++ b/arch/x86/kernel/cpu/proc.c -@@ -87,8 +87,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) - } - - /* Cache size */ -- if (c->x86_cache_size >= 0) -- seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); -+ if (c->x86_cache_size) -+ seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size); - - show_cpuinfo_core(m, c, cpu); - show_cpuinfo_misc(m, c); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch deleted file mode 100644 index 9417a4ec..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 3a855b66f0fb7388b32ed33a536b4f68cd09afc3 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 25 Jan 2018 16:14:11 +0000 -Subject: [PATCH 10/42] x86/cpufeatures: Add AMD feature bits for Speculation - Control - -(cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7) - -AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel. -See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: ak@linux.intel.com -Cc: ashok.raj@intel.com -Cc: dave.hansen@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 1f03888..c4d03e7 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -258,6 +258,9 @@ - /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ - #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ - #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ -+#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ -+#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ -+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ - - /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ - #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch deleted file mode 100644 index d00a4886..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 754013b3067881c493df74f91ad34099c3a32c61 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Fri, 22 Jun 2018 11:54:28 +0200 -Subject: [PATCH 10/10] x86/mce: Do not overwrite MCi_STATUS in - mce_no_way_out() - -commit 1f74c8a64798e2c488f86efc97e308b85fb7d7aa upstream. - -mce_no_way_out() does a quick check during #MC to see whether some of -the MCEs logged would require the kernel to panic immediately. And it -passes a struct mce where MCi_STATUS gets written. - -However, after having saved a valid status value, the next iteration -of the loop which goes over the MCA banks on the CPU, overwrites the -valid status value because we're using struct mce as storage instead of -a temporary variable. - -Which leads to MCE records with an empty status value: - - mce: [Hardware Error]: CPU 0: Machine Check Exception: 6 Bank 0: 0000000000000000 - mce: [Hardware Error]: RIP 10:<ffffffffbd42fbd7> {trigger_mce+0x7/0x10} - -In order to prevent the loss of the status register value, return -immediately when severity is a panic one so that we can panic -immediately with the first fatal MCE logged. This is also the intention -of this function and not to noodle over the banks while a fatal MCE is -already logged. - -Tony: read the rest of the MCA bank to populate the struct mce fully. - -Suggested-by: Tony Luck <tony.luck@intel.com> -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: <stable@vger.kernel.org> -Link: https://lkml.kernel.org/r/20180622095428.626-8-bp@alien8.de -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++++++-------- - 1 file changed, 10 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c -index 4711e1c..bf6013d 100644 ---- a/arch/x86/kernel/cpu/mcheck/mce.c -+++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -779,23 +779,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll); - static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, - struct pt_regs *regs) - { -- int i, ret = 0; - char *tmp; -+ int i; - - for (i = 0; i < mca_cfg.banks; i++) { - m->status = mce_rdmsrl(msr_ops.status(i)); -- if (m->status & MCI_STATUS_VAL) { -- __set_bit(i, validp); -- if (quirk_no_way_out) -- quirk_no_way_out(i, m, regs); -- } -+ if (!(m->status & MCI_STATUS_VAL)) -+ continue; -+ -+ __set_bit(i, validp); -+ if (quirk_no_way_out) -+ quirk_no_way_out(i, m, regs); - - if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { -+ mce_read_aux(m, i); - *msg = tmp; -- ret = 1; -+ return 1; - } - } -- return ret; -+ return 0; - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-speculation-Use-IBRS-if-available-before-calling.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-speculation-Use-IBRS-if-available-before-calling.patch deleted file mode 100644 index d5bd585e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-speculation-Use-IBRS-if-available-before-calling.patch +++ /dev/null @@ -1,232 +0,0 @@ -From d65c0b72013dac24f4e2d0b031ed8bc6b71bfcca Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Mon, 19 Feb 2018 10:50:54 +0000 -Subject: [PATCH 10/14] x86/speculation: Use IBRS if available before calling - into firmware - -commit dd84441a797150dcc49298ec95c459a8891d8bb1 upstream. - -Retpoline means the kernel is safe because it has no indirect branches. -But firmware isn't, so use IBRS for firmware calls if it's available. - -Block preemption while IBRS is set, although in practice the call sites -already had to be doing that. - -Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware -code, from an NMI handler. I don't want to touch that with a bargepole. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: arjan.van.de.ven@intel.com -Cc: bp@alien8.de -Cc: dave.hansen@intel.com -Cc: jmattson@google.com -Cc: karahmed@amazon.de -Cc: kvm@vger.kernel.org -Cc: pbonzini@redhat.com -Cc: rkrcmar@redhat.com -Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/apm.h | 6 ++++++ - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/efi.h | 17 ++++++++++++++-- - arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++--------- - arch/x86/kernel/cpu/bugs.c | 12 ++++++++++- - 5 files changed, 63 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h -index 93eebc63..46e40ae 100644 ---- a/arch/x86/include/asm/apm.h -+++ b/arch/x86/include/asm/apm.h -@@ -6,6 +6,8 @@ - #ifndef _ASM_X86_MACH_DEFAULT_APM_H - #define _ASM_X86_MACH_DEFAULT_APM_H - -+#include <asm/nospec-branch.h> -+ - #ifdef APM_ZERO_SEGS - # define APM_DO_ZERO_SEGS \ - "pushl %%ds\n\t" \ -@@ -31,6 +33,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, - * N.B. We do NOT need a cld after the BIOS call - * because we always save and restore the flags. - */ -+ firmware_restrict_branch_speculation_start(); - __asm__ __volatile__(APM_DO_ZERO_SEGS - "pushl %%edi\n\t" - "pushl %%ebp\n\t" -@@ -43,6 +46,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, - "=S" (*esi) - : "a" (func), "b" (ebx_in), "c" (ecx_in) - : "memory", "cc"); -+ firmware_restrict_branch_speculation_end(); - } - - static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, -@@ -55,6 +59,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, - * N.B. We do NOT need a cld after the BIOS call - * because we always save and restore the flags. - */ -+ firmware_restrict_branch_speculation_start(); - __asm__ __volatile__(APM_DO_ZERO_SEGS - "pushl %%edi\n\t" - "pushl %%ebp\n\t" -@@ -67,6 +72,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, - "=S" (si) - : "a" (func), "b" (ebx_in), "c" (ecx_in) - : "memory", "cc"); -+ firmware_restrict_branch_speculation_end(); - return error; - } - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 8eb23f5..ed7a1d2 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -203,6 +203,7 @@ - #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ - - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h -index 389d700..9df22bb 100644 ---- a/arch/x86/include/asm/efi.h -+++ b/arch/x86/include/asm/efi.h -@@ -5,6 +5,7 @@ - #include <asm/pgtable.h> - #include <asm/processor-flags.h> - #include <asm/tlb.h> -+#include <asm/nospec-branch.h> - - /* - * We map the EFI regions needed for runtime services non-contiguously, -@@ -35,8 +36,18 @@ - - extern unsigned long asmlinkage efi_call_phys(void *, ...); - --#define arch_efi_call_virt_setup() kernel_fpu_begin() --#define arch_efi_call_virt_teardown() kernel_fpu_end() -+#define arch_efi_call_virt_setup() \ -+({ \ -+ kernel_fpu_begin(); \ -+ firmware_restrict_branch_speculation_start(); \ -+}) -+ -+#define arch_efi_call_virt_teardown() \ -+({ \ -+ firmware_restrict_branch_speculation_end(); \ -+ kernel_fpu_end(); \ -+}) -+ - - /* - * Wrap all the virtual calls in a way that forces the parameters on the stack. -@@ -72,6 +83,7 @@ struct efi_scratch { - efi_sync_low_kernel_mappings(); \ - preempt_disable(); \ - __kernel_fpu_begin(); \ -+ firmware_restrict_branch_speculation_start(); \ - \ - if (efi_scratch.use_pgd) { \ - efi_scratch.prev_cr3 = read_cr3(); \ -@@ -90,6 +102,7 @@ struct efi_scratch { - __flush_tlb_all(); \ - } \ - \ -+ firmware_restrict_branch_speculation_end(); \ - __kernel_fpu_end(); \ - preempt_enable(); \ - }) -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index dace2de..031840a 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -219,17 +219,38 @@ static inline void vmexit_fill_RSB(void) - #endif - } - -+#define alternative_msr_write(_msr, _val, _feature) \ -+ asm volatile(ALTERNATIVE("", \ -+ "movl %[msr], %%ecx\n\t" \ -+ "movl %[val], %%eax\n\t" \ -+ "movl $0, %%edx\n\t" \ -+ "wrmsr", \ -+ _feature) \ -+ : : [msr] "i" (_msr), [val] "i" (_val) \ -+ : "eax", "ecx", "edx", "memory") -+ - static inline void indirect_branch_prediction_barrier(void) - { -- asm volatile(ALTERNATIVE("", -- "movl %[msr], %%ecx\n\t" -- "movl %[val], %%eax\n\t" -- "movl $0, %%edx\n\t" -- "wrmsr", -- X86_FEATURE_USE_IBPB) -- : : [msr] "i" (MSR_IA32_PRED_CMD), -- [val] "i" (PRED_CMD_IBPB) -- : "eax", "ecx", "edx", "memory"); -+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, -+ X86_FEATURE_USE_IBPB); -+} -+ -+/* -+ * With retpoline, we must use IBRS to restrict branch prediction -+ * before calling into firmware. -+ */ -+static inline void firmware_restrict_branch_speculation_start(void) -+{ -+ preempt_disable(); -+ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, -+ X86_FEATURE_USE_IBRS_FW); -+} -+ -+static inline void firmware_restrict_branch_speculation_end(void) -+{ -+ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, -+ X86_FEATURE_USE_IBRS_FW); -+ preempt_enable(); - } - - #endif /* __ASSEMBLY__ */ -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index baddc9e..b8b0b6e 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -299,6 +299,15 @@ static void __init spectre_v2_select_mitigation(void) - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } -+ -+ /* -+ * Retpoline means the kernel is safe because it has no indirect -+ * branches. But firmware isn't, so use IBRS to protect that. -+ */ -+ if (boot_cpu_has(X86_FEATURE_IBRS)) { -+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); -+ pr_info("Enabling Restricted Speculation for firmware calls\n"); -+ } - } - - #undef pr_fmt -@@ -325,8 +334,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); - -- return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", -+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); - } - #endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch deleted file mode 100644 index 4e1d906b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch +++ /dev/null @@ -1,82 +0,0 @@ -From adbb63b59bd2792df649335e7d3c28be2fbbe1c2 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Thu, 25 Jan 2018 10:58:13 +0100 -Subject: [PATCH 11/33] KVM: x86: Make indirect calls in emulator speculation - safe - -(cherry picked from commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab) - -Replace the indirect calls with CALL_NOSPEC. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andrea Arcangeli <aarcange@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Ashok Raj <ashok.raj@intel.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: Jun Nakajima <jun.nakajima@intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: rga@amazon.de -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Asit Mallick <asit.k.mallick@intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Jason Baron <jbaron@akamai.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org -[dwmw2: Use ASM_CALL_CONSTRAINT like upstream, now we have it] -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/emulate.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c -index 9984daf..6faac71 100644 ---- a/arch/x86/kvm/emulate.c -+++ b/arch/x86/kvm/emulate.c -@@ -25,6 +25,7 @@ - #include <asm/kvm_emulate.h> - #include <linux/stringify.h> - #include <asm/debugreg.h> -+#include <asm/nospec-branch.h> - - #include "x86.h" - #include "tss.h" -@@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) - void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); - - flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; -- asm("push %[flags]; popf; call *%[fastop]" -- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); -+ asm("push %[flags]; popf; " CALL_NOSPEC -+ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); - return rc; - } - -@@ -5287,15 +5288,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, - - static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) - { -- register void *__sp asm(_ASM_SP); - ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; - - if (!(ctxt->d & ByteOp)) - fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - -- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" -+ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" - : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), -- [fastop]"+S"(fop), "+r"(__sp) -+ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT - : "c"(ctxt->src2.val)); - - ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-bpf-x64-increase-number-of-passes.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-bpf-x64-increase-number-of-passes.patch deleted file mode 100644 index bf2556b8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-bpf-x64-increase-number-of-passes.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1909a1513f6d5b9170e40c4fee98bf2cd57b5b55 Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Wed, 7 Mar 2018 22:10:01 +0100 -Subject: [PATCH 11/93] bpf, x64: increase number of passes - -commit 6007b080d2e2adb7af22bf29165f0594ea12b34c upstream. - -In Cilium some of the main programs we run today are hitting 9 passes -on x64's JIT compiler, and we've had cases already where we surpassed -the limit where the JIT then punts the program to the interpreter -instead, leading to insertion failures due to CONFIG_BPF_JIT_ALWAYS_ON -or insertion failures due to the prog array owner being JITed but the -program to insert not (both must have the same JITed/non-JITed property). - -One concrete case the program image shrunk from 12,767 bytes down to -10,288 bytes where the image converged after 16 steps. I've measured -that this took 340us in the JIT until it converges on my i7-6600U. Thus, -increase the original limit we had from day one where the JIT covered -cBPF only back then before we run into the case (as similar with the -complexity limit) where we trip over this and hit program rejections. -Also add a cond_resched() into the compilation loop, the JIT process -runs without any locks and may sleep anyway. - -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Acked-by: Alexei Starovoitov <ast@kernel.org> -Reviewed-by: Eric Dumazet <edumazet@google.com> -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/net/bpf_jit_comp.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c -index 1f7ed2e..cd97645 100644 ---- a/arch/x86/net/bpf_jit_comp.c -+++ b/arch/x86/net/bpf_jit_comp.c -@@ -1135,7 +1135,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) - * may converge on the last pass. In such case do one more - * pass to emit the final image - */ -- for (pass = 0; pass < 10 || image; pass++) { -+ for (pass = 0; pass < 20 || image; pass++) { - proglen = do_jit(prog, addrs, image, oldproglen, &ctx); - if (proglen <= 0) { - image = NULL; -@@ -1162,6 +1162,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) - } - } - oldproglen = proglen; -+ cond_resched(); - } - - if (bpf_jit_enable > 1) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-kaiser-fix-perf-crashes.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-kaiser-fix-perf-crashes.patch deleted file mode 100644 index b1a35070..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-kaiser-fix-perf-crashes.patch +++ /dev/null @@ -1,152 +0,0 @@ -From b070484be405393d801b7b9dcd0027875d9fd873 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Wed, 23 Aug 2017 14:21:14 -0700 -Subject: [PATCH 011/103] kaiser: fix perf crashes - -Avoid perf crashes: place debug_store in the user-mapped per-cpu area -instead of allocating, and use page allocator plus kaiser_add_mapping() -to keep the BTS and PEBS buffers user-mapped (that is, present in the -user mapping, though visible only to kernel and hardware). The PEBS -fixup buffer does not need this treatment. - -The need for a user-mapped struct debug_store showed up before doing -any conscious perf testing: in a couple of kernel paging oopses on -Westmere, implicating the debug_store offset of the per-cpu area. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/events/intel/ds.c | 57 ++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 45 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c -index be20239..c2e4ae2 100644 ---- a/arch/x86/events/intel/ds.c -+++ b/arch/x86/events/intel/ds.c -@@ -2,11 +2,15 @@ - #include <linux/types.h> - #include <linux/slab.h> - -+#include <asm/kaiser.h> - #include <asm/perf_event.h> - #include <asm/insn.h> - - #include "../perf_event.h" - -+static -+DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store); -+ - /* The size of a BTS record in bytes: */ - #define BTS_RECORD_SIZE 24 - -@@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu) - - static DEFINE_PER_CPU(void *, insn_buffer); - -+static void *dsalloc(size_t size, gfp_t flags, int node) -+{ -+#ifdef CONFIG_KAISER -+ unsigned int order = get_order(size); -+ struct page *page; -+ unsigned long addr; -+ -+ page = __alloc_pages_node(node, flags | __GFP_ZERO, order); -+ if (!page) -+ return NULL; -+ addr = (unsigned long)page_address(page); -+ if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) { -+ __free_pages(page, order); -+ addr = 0; -+ } -+ return (void *)addr; -+#else -+ return kmalloc_node(size, flags | __GFP_ZERO, node); -+#endif -+} -+ -+static void dsfree(const void *buffer, size_t size) -+{ -+#ifdef CONFIG_KAISER -+ if (!buffer) -+ return; -+ kaiser_remove_mapping((unsigned long)buffer, size); -+ free_pages((unsigned long)buffer, get_order(size)); -+#else -+ kfree(buffer); -+#endif -+} -+ - static int alloc_pebs_buffer(int cpu) - { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; -@@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu) - if (!x86_pmu.pebs) - return 0; - -- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); -+ buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); - if (unlikely(!buffer)) - return -ENOMEM; - -@@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu) - if (x86_pmu.intel_cap.pebs_format < 2) { - ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); - if (!ibuffer) { -- kfree(buffer); -+ dsfree(buffer, x86_pmu.pebs_buffer_size); - return -ENOMEM; - } - per_cpu(insn_buffer, cpu) = ibuffer; -@@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu) - kfree(per_cpu(insn_buffer, cpu)); - per_cpu(insn_buffer, cpu) = NULL; - -- kfree((void *)(unsigned long)ds->pebs_buffer_base); -+ dsfree((void *)(unsigned long)ds->pebs_buffer_base, -+ x86_pmu.pebs_buffer_size); - ds->pebs_buffer_base = 0; - } - -@@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu) - if (!x86_pmu.bts) - return 0; - -- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); -+ buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); - if (unlikely(!buffer)) { - WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); - return -ENOMEM; -@@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu) - if (!ds || !x86_pmu.bts) - return; - -- kfree((void *)(unsigned long)ds->bts_buffer_base); -+ dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE); - ds->bts_buffer_base = 0; - } - - static int alloc_ds_buffer(int cpu) - { -- int node = cpu_to_node(cpu); -- struct debug_store *ds; -- -- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); -- if (unlikely(!ds)) -- return -ENOMEM; -+ struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu); - -+ memset(ds, 0, sizeof(*ds)); - per_cpu(cpu_hw_events, cpu).ds = ds; - - return 0; -@@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu) - return; - - per_cpu(cpu_hw_events, cpu).ds = NULL; -- kfree(ds); - } - - void release_ds_buffers(void) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch deleted file mode 100644 index 00297c34..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch +++ /dev/null @@ -1,133 +0,0 @@ -From df2f7e0d21ca37bdbdf3fc5b6fa42a9b0bc6fbd6 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Mon, 19 Feb 2018 11:13:28 +0100 -Subject: [PATCH 11/12] x86/microcode/AMD: Change load_microcode_amd()'s param - to bool to fix preemptibility bug - -commit dac6ca243c4c49a9ca7507d3d66140ebfac8b04b upstream. - -With CONFIG_DEBUG_PREEMPT enabled, I get: - - BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 - caller is debug_smp_processor_id - CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.12.0-rc2+ #2 - Call Trace: - dump_stack - check_preemption_disabled - debug_smp_processor_id - save_microcode_in_initrd_amd - ? microcode_init - save_microcode_in_initrd - ... - -because, well, it says it above, we're using smp_processor_id() in -preemptible code. - -But passing the CPU number is not really needed. It is only used to -determine whether we're on the BSP, and, if so, to save the microcode -patch for early loading. - - [ We don't absolutely need to do it on the BSP but we do that - customarily there. ] - -Instead, convert that function parameter to a boolean which denotes -whether the patch should be saved or not, thereby avoiding the use of -smp_processor_id() in preemptible code. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/20170528200414.31305-1-bp@alien8.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> -[arnd: rebased to 4.9, after running into warning: - arch/x86/kernel/cpu/microcode/amd.c:881:30: self-comparison always evaluates to true] -Signed-off-by: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/microcode_amd.h | 1 - - arch/x86/kernel/cpu/microcode/amd.c | 17 +++++++++++------ - 2 files changed, 11 insertions(+), 7 deletions(-) - -diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h -index 15eb754..98ccbd1 100644 ---- a/arch/x86/include/asm/microcode_amd.h -+++ b/arch/x86/include/asm/microcode_amd.h -@@ -59,7 +59,6 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, - - extern int __apply_microcode_amd(struct microcode_amd *mc_amd); - extern int apply_microcode_amd(int cpu); --extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); - - #define PATCH_MAX_SIZE PAGE_SIZE - -diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c -index 017bda1..aaab28a 100644 ---- a/arch/x86/kernel/cpu/microcode/amd.c -+++ b/arch/x86/kernel/cpu/microcode/amd.c -@@ -135,6 +135,9 @@ static size_t compute_container_size(u8 *data, u32 total_size) - return size; - } - -+static enum ucode_state -+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size); -+ - /* - * Early load occurs before we can vmalloc(). So we look for the microcode - * patch container file in initrd, traverse equivalent cpu table, look for a -@@ -451,7 +454,7 @@ int __init save_microcode_in_initrd_amd(void) - eax = cpuid_eax(0x00000001); - eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); - -- ret = load_microcode_amd(smp_processor_id(), eax, container, container_size); -+ ret = load_microcode_amd(true, eax, container, container_size); - if (ret != UCODE_OK) - retval = -EINVAL; - -@@ -860,7 +863,8 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, - return UCODE_OK; - } - --enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) -+static enum ucode_state -+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) - { - enum ucode_state ret; - -@@ -874,8 +878,8 @@ enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t s - - #ifdef CONFIG_X86_32 - /* save BSP's matching patch for early load */ -- if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { -- struct ucode_patch *p = find_patch(cpu); -+ if (save) { -+ struct ucode_patch *p = find_patch(0); - if (p) { - memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), -@@ -907,11 +911,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, - { - char fw_name[36] = "amd-ucode/microcode_amd.bin"; - struct cpuinfo_x86 *c = &cpu_data(cpu); -+ bool bsp = c->cpu_index == boot_cpu_data.cpu_index; - enum ucode_state ret = UCODE_NFOUND; - const struct firmware *fw; - - /* reload ucode container only on the boot cpu */ -- if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) -+ if (!refresh_fw || !bsp) - return UCODE_OK; - - if (c->x86 >= 0x15) -@@ -928,7 +933,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, - goto fw_release; - } - -- ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); -+ ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); - - fw_release: - release_firmware(fw); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-msr-Add-definitions-for-new-speculation-control-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-msr-Add-definitions-for-new-speculation-control-.patch deleted file mode 100644 index 311c2e85..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-msr-Add-definitions-for-new-speculation-control-.patch +++ /dev/null @@ -1,67 +0,0 @@ -From b733a28baec38d991f253a8587a94e9b2948a7d0 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 25 Jan 2018 16:14:12 +0000 -Subject: [PATCH 11/42] x86/msr: Add definitions for new speculation control - MSRs - -(cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410) - -Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES. - -See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: ak@linux.intel.com -Cc: ashok.raj@intel.com -Cc: dave.hansen@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/msr-index.h | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index 4eeaa36..0e4da8e 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -37,6 +37,13 @@ - #define EFER_FFXSR (1<<_EFER_FFXSR) - - /* Intel MSRs. Some also available on other CPUs */ -+#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -+#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -+#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -+ -+#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -+#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ -+ - #define MSR_IA32_PERFCTR0 0x000000c1 - #define MSR_IA32_PERFCTR1 0x000000c2 - #define MSR_FSB_FREQ 0x000000cd -@@ -50,6 +57,11 @@ - #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - - #define MSR_MTRRcap 0x000000fe -+ -+#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -+#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -+#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -+ - #define MSR_IA32_BBL_CR_CTL 0x00000119 - #define MSR_IA32_BBL_CR_CTL3 0x0000011e - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch deleted file mode 100644 index 6caed4a9..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 3de13a223fa7e5d0dc5bb20d87be73f686768daf Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Mon, 19 Feb 2018 10:50:57 +0000 -Subject: [PATCH 11/14] x86/retpoline: Support retpoline builds with Clang - -commit 87358710c1fb4f1bf96bbe2349975ff9953fc9b2 upstream. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Reviewed-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: arjan.van.de.ven@intel.com -Cc: bp@alien8.de -Cc: dave.hansen@intel.com -Cc: jmattson@google.com -Cc: karahmed@amazon.de -Cc: kvm@vger.kernel.org -Cc: pbonzini@redhat.com -Cc: rkrcmar@redhat.com -Link: http://lkml.kernel.org/r/1519037457-7643-5-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/Makefile | 5 ++++- - include/linux/compiler-clang.h | 5 +++++ - include/linux/compiler-gcc.h | 4 ++++ - include/linux/init.h | 8 ++++---- - 4 files changed, 17 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/Makefile b/arch/x86/Makefile -index cd22cb8..b609961 100644 ---- a/arch/x86/Makefile -+++ b/arch/x86/Makefile -@@ -184,7 +184,10 @@ KBUILD_AFLAGS += $(mflags-y) - - # Avoid indirect branches in kernel to deal with Spectre - ifdef CONFIG_RETPOLINE -- RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) -+ RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register -+ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk -+ -+ RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) - ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - endif -diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h -index de17999..01225b0 100644 ---- a/include/linux/compiler-clang.h -+++ b/include/linux/compiler-clang.h -@@ -15,3 +15,8 @@ - * with any version that can compile the kernel - */ - #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) -+ -+/* Clang doesn't have a way to turn it off per-function, yet. */ -+#ifdef __noretpoline -+#undef __noretpoline -+#endif -diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h -index 928e5ca..362a1e17 100644 ---- a/include/linux/compiler-gcc.h -+++ b/include/linux/compiler-gcc.h -@@ -88,6 +88,10 @@ - #define __weak __attribute__((weak)) - #define __alias(symbol) __attribute__((alias(#symbol))) - -+#ifdef RETPOLINE -+#define __noretpoline __attribute__((indirect_branch("keep"))) -+#endif -+ - /* - * it doesn't make sense on ARM (currently the only user of __naked) - * to trace naked functions because then mcount is called without -diff --git a/include/linux/init.h b/include/linux/init.h -index 8e346d1..683508f 100644 ---- a/include/linux/init.h -+++ b/include/linux/init.h -@@ -5,10 +5,10 @@ - #include <linux/types.h> - - /* Built-in __init functions needn't be compiled with retpoline */ --#if defined(RETPOLINE) && !defined(MODULE) --#define __noretpoline __attribute__((indirect_branch("keep"))) -+#if defined(__noretpoline) && !defined(MODULE) -+#define __noinitretpoline __noretpoline - #else --#define __noretpoline -+#define __noinitretpoline - #endif - - /* These macros are used to mark some functions or -@@ -46,7 +46,7 @@ - - /* These are for everybody (although not all archs will actually - discard it in modules) */ --#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline -+#define __init __section(.init.text) __cold notrace __latent_entropy __noinitretpoline - #define __initdata __section(.init.data) - #define __initconst __section(.init.rodata) - #define __exitdata __section(.exit.data) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch deleted file mode 100644 index ba052d9e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 9eee1ba493f5899d7c3793818db16deaf084df21 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Thu, 25 Jan 2018 10:58:14 +0100 -Subject: [PATCH 12/33] KVM: VMX: Make indirect call speculation safe - -(cherry picked from commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699) - -Replace indirect call with CALL_NOSPEC. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andrea Arcangeli <aarcange@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Ashok Raj <ashok.raj@intel.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: Jun Nakajima <jun.nakajima@intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: rga@amazon.de -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Asit Mallick <asit.k.mallick@intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Jason Baron <jbaron@akamai.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 98f6545..6f3ed0e 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -8659,14 +8659,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) - #endif - "pushf\n\t" - __ASM_SIZE(push) " $%c[cs]\n\t" -- "call *%[entry]\n\t" -+ CALL_NOSPEC - : - #ifdef CONFIG_X86_64 - [sp]"=&r"(tmp), - #endif - "+r"(__sp) - : -- [entry]"r"(entry), -+ THUNK_TARGET(entry), - [ss]"i"(__KERNEL_DS), - [cs]"i"(__KERNEL_CS) - ); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch deleted file mode 100644 index 74d00005..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 43eb304091f01c302dfec0f98b29072a0022fdf0 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 3 Sep 2017 18:48:02 -0700 -Subject: [PATCH 012/103] kaiser: ENOMEM if kaiser_pagetable_walk() NULL - -kaiser_add_user_map() took no notice when kaiser_pagetable_walk() failed. -And avoid its might_sleep() when atomic (though atomic at present unused). - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 8d6061c..ba6fc2c 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -98,11 +98,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) - pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); - gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - -- might_sleep(); - if (is_atomic) { - gfp &= ~GFP_KERNEL; - gfp |= __GFP_HIGH | __GFP_ATOMIC; -- } -+ } else -+ might_sleep(); - - if (pgd_none(*pgd)) { - WARN_ONCE(1, "All shadow pgds should have been populated"); -@@ -159,13 +159,17 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, - unsigned long end_addr = PAGE_ALIGN(start_addr + size); - unsigned long target_address; - -- for (;address < end_addr; address += PAGE_SIZE) { -+ for (; address < end_addr; address += PAGE_SIZE) { - target_address = get_pa_from_mapping(address); - if (target_address == -1) { - ret = -EIO; - break; - } - pte = kaiser_pagetable_walk(address, false); -+ if (!pte) { -+ ret = -ENOMEM; -+ break; -+ } - if (pte_none(*pte)) { - set_pte(pte, __pte(flags | target_address)); - } else { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch deleted file mode 100644 index f8e4bda9..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch +++ /dev/null @@ -1,79 +0,0 @@ -From c8c45aa51a96245b04ac18e6f3475d66bc90d4e3 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Fri, 23 Feb 2018 14:06:21 -0800 -Subject: [PATCH 12/12] x86/entry/64: Clear extra registers beyond syscall - arguments, to reduce speculation attack surface - -commit 8e1eb3fa009aa7c0b944b3c8b26b07de0efb3200 upstream. - -At entry userspace may have (maliciously) populated the extra registers -outside the syscall calling convention with arbitrary values that could -be useful in a speculative execution (Spectre style) attack. - -Clear these registers to minimize the kernel's attack surface. - -Note, this only clears the extra registers and not the unused -registers for syscalls less than 6 arguments, since those registers are -likely to be clobbered well before their values could be put to use -under speculation. - -Note, Linus found that the XOR instructions can be executed with -minimized cost if interleaved with the PUSH instructions, and Ingo's -analysis found that R10 and R11 should be included in the register -clearing beyond the typical 'extra' syscall calling convention -registers. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Reported-by: Andi Kleen <ak@linux.intel.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Cc: <stable@vger.kernel.org> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/151787988577.7847.16733592218894189003.stgit@dwillia2-desk3.amr.corp.intel.com -[ Made small improvements to the changelog and the code comments. ] -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index c915eeb..e9120d4 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -176,13 +176,26 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) - pushq %r8 /* pt_regs->r8 */ - pushq %r9 /* pt_regs->r9 */ - pushq %r10 /* pt_regs->r10 */ -+ /* -+ * Clear extra registers that a speculation attack might -+ * otherwise want to exploit. Interleave XOR with PUSH -+ * for better uop scheduling: -+ */ -+ xorq %r10, %r10 /* nospec r10 */ - pushq %r11 /* pt_regs->r11 */ -+ xorq %r11, %r11 /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ -+ xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp */ -+ xorl %ebp, %ebp /* nospec rbp */ - pushq %r12 /* pt_regs->r12 */ -+ xorq %r12, %r12 /* nospec r12 */ - pushq %r13 /* pt_regs->r13 */ -+ xorq %r13, %r13 /* nospec r13 */ - pushq %r14 /* pt_regs->r14 */ -+ xorq %r14, %r14 /* nospec r14 */ - pushq %r15 /* pt_regs->r15 */ -+ xorq %r15, %r15 /* nospec r15 */ - - /* IRQs are off. */ - movq %rsp, %rdi --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch deleted file mode 100644 index bdb55fda..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 280488ceca9427dd91e5ee449d90f8cf16d8e65c Mon Sep 17 00:00:00 2001 -From: Matthias Kaehlcke <mka@chromium.org> -Date: Mon, 1 May 2017 15:47:41 -0700 -Subject: [PATCH 12/93] x86/mm/kaslr: Use the _ASM_MUL macro for multiplication - to work around Clang incompatibility - -[ Upstream commit 121843eb02a6e2fa30aefab64bfe183c97230c75 ] - -The constraint "rm" allows the compiler to put mix_const into memory. -When the input operand is a memory location then MUL needs an operand -size suffix, since Clang can't infer the multiplication width from the -operand. - -Add and use the _ASM_MUL macro which determines the operand size and -resolves to the NUL instruction with the corresponding suffix. - -This fixes the following error when building with clang: - - CC arch/x86/lib/kaslr.o - /tmp/kaslr-dfe1ad.s: Assembler messages: - /tmp/kaslr-dfe1ad.s:182: Error: no instruction mnemonic suffix given and no register operands; can't size instruction - -Signed-off-by: Matthias Kaehlcke <mka@chromium.org> -Cc: Grant Grundler <grundler@chromium.org> -Cc: Greg Hackmann <ghackmann@google.com> -Cc: Kees Cook <keescook@chromium.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Michael Davidson <md@google.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/20170501224741.133938-1-mka@chromium.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/asm.h | 1 + - arch/x86/lib/kaslr.c | 3 ++- - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h -index 7bb29a4..08684b3 100644 ---- a/arch/x86/include/asm/asm.h -+++ b/arch/x86/include/asm/asm.h -@@ -34,6 +34,7 @@ - #define _ASM_ADD __ASM_SIZE(add) - #define _ASM_SUB __ASM_SIZE(sub) - #define _ASM_XADD __ASM_SIZE(xadd) -+#define _ASM_MUL __ASM_SIZE(mul) - - #define _ASM_AX __ASM_REG(ax) - #define _ASM_BX __ASM_REG(bx) -diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c -index 121f59c..0c7fe44 100644 ---- a/arch/x86/lib/kaslr.c -+++ b/arch/x86/lib/kaslr.c -@@ -5,6 +5,7 @@ - * kernel starts. This file is included in the compressed kernel and - * normally linked in the regular. - */ -+#include <asm/asm.h> - #include <asm/kaslr.h> - #include <asm/msr.h> - #include <asm/archrandom.h> -@@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose) - } - - /* Circular multiply for better bit diffusion */ -- asm("mul %3" -+ asm(_ASM_MUL "%3" - : "=a" (random), "=d" (raw) - : "a" (random), "rm" (mix_const)); - random += raw; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch deleted file mode 100644 index b1f180c1..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 50f378f14484a86ee783e0e4da697e32295c6694 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 25 Jan 2018 16:14:13 +0000 -Subject: [PATCH 12/42] x86/pti: Do not enable PTI on CPUs which are not - vulnerable to Meltdown - -(cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621) - -Also, for CPUs which don't speculate at all, don't report that they're -vulnerable to the Spectre variants either. - -Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it -for now, even though that could be done with a simple comparison, on the -assumption that we'll have more to add. - -Based on suggestions from Dave Hansen and Alan Cox. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Reviewed-by: Borislav Petkov <bp@suse.de> -Acked-by: Dave Hansen <dave.hansen@intel.com> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: ak@linux.intel.com -Cc: ashok.raj@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/common.c | 48 +++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 43 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 4267273..cfa026f 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -44,6 +44,8 @@ - #include <asm/pat.h> - #include <asm/microcode.h> - #include <asm/microcode_intel.h> -+#include <asm/intel-family.h> -+#include <asm/cpu_device_id.h> - - #ifdef CONFIG_X86_LOCAL_APIC - #include <asm/uv/uv.h> -@@ -838,6 +840,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) - #endif - } - -+static const __initdata struct x86_cpu_id cpu_no_speculation[] = { -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, -+ { X86_VENDOR_CENTAUR, 5 }, -+ { X86_VENDOR_INTEL, 5 }, -+ { X86_VENDOR_NSC, 5 }, -+ { X86_VENDOR_ANY, 4 }, -+ {} -+}; -+ -+static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { -+ { X86_VENDOR_AMD }, -+ {} -+}; -+ -+static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) -+{ -+ u64 ia32_cap = 0; -+ -+ if (x86_match_cpu(cpu_no_meltdown)) -+ return false; -+ -+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) -+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); -+ -+ /* Rogue Data Cache Load? No! */ -+ if (ia32_cap & ARCH_CAP_RDCL_NO) -+ return false; -+ -+ return true; -+} -+ - /* - * Do minimum CPU detection early. - * Fields really needed: vendor, cpuid_level, family, model, mask, -@@ -884,11 +921,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - - setup_force_cpu_cap(X86_FEATURE_ALWAYS); - -- if (c->x86_vendor != X86_VENDOR_AMD) -- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); -- -- setup_force_cpu_bug(X86_BUG_SPECTRE_V1); -- setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -+ if (!x86_match_cpu(cpu_no_speculation)) { -+ if (cpu_vulnerable_to_meltdown(c)) -+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); -+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); -+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -+ } - - fpu__init_system(c); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch deleted file mode 100644 index 62777941..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 05395f5046a3ff9280cde5804ff4505bbd42b115 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Wed, 17 Jan 2018 22:34:34 +0100 -Subject: [PATCH 12/14] x86/speculation, objtool: Annotate indirect calls/jumps - for objtool - -commit 9e0e3c5130e949c389caabc8033e9799b129e429 upstream. - -Annotate the indirect calls/jumps in the CALL_NOSPEC/JUMP_NOSPEC -alternatives. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++++++++++++---- - 1 file changed, 23 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 031840a..29e8f30 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -68,6 +68,18 @@ - .endm - - /* -+ * This should be used immediately before an indirect jump/call. It tells -+ * objtool the subsequent indirect jump/call is vouched safe for retpoline -+ * builds. -+ */ -+.macro ANNOTATE_RETPOLINE_SAFE -+ .Lannotate_\@: -+ .pushsection .discard.retpoline_safe -+ _ASM_PTR .Lannotate_\@ -+ .popsection -+.endm -+ -+/* - * These are the bare retpoline primitives for indirect jmp and call. - * Do not use these directly; they only exist to make the ALTERNATIVE - * invocation below less ugly. -@@ -103,9 +115,9 @@ - .macro JMP_NOSPEC reg:req - #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE -- ALTERNATIVE_2 __stringify(jmp *\reg), \ -+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ - __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ -- __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD -+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD - #else - jmp *\reg - #endif -@@ -114,9 +126,9 @@ - .macro CALL_NOSPEC reg:req - #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE -- ALTERNATIVE_2 __stringify(call *\reg), \ -+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ - __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ -- __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD -+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD - #else - call *\reg - #endif -@@ -144,6 +156,12 @@ - ".long 999b - .\n\t" \ - ".popsection\n\t" - -+#define ANNOTATE_RETPOLINE_SAFE \ -+ "999:\n\t" \ -+ ".pushsection .discard.retpoline_safe\n\t" \ -+ _ASM_PTR " 999b\n\t" \ -+ ".popsection\n\t" -+ - #if defined(CONFIG_X86_64) && defined(RETPOLINE) - - /* -@@ -153,6 +171,7 @@ - # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ - ALTERNATIVE( \ -+ ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch deleted file mode 100644 index 4331a9f4..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch +++ /dev/null @@ -1,93 +0,0 @@ -From b541de5f53d608796a946a42f5c3251e4dd07522 Mon Sep 17 00:00:00 2001 -From: Wanpeng Li <wanpeng.li@hotmail.com> -Date: Sat, 20 May 2017 20:32:32 -0700 -Subject: [PATCH 13/93] KVM: X86: Fix preempt the preemption timer cancel -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 5acc1ca4fb15f00bfa3d4046e35ca381bc25d580 ] - -Preemption can occur during cancel preemption timer, and there will be -inconsistent status in lapic, vmx and vmcs field. - - CPU0 CPU1 - - preemption timer vmexit - handle_preemption_timer(vCPU0) - kvm_lapic_expired_hv_timer - vmx_cancel_hv_timer - vmx->hv_deadline_tsc = -1 - vmcs_clear_bits - /* hv_timer_in_use still true */ - sched_out - sched_in - kvm_arch_vcpu_load - vmx_set_hv_timer - write vmx->hv_deadline_tsc - vmcs_set_bits - /* back in kvm_lapic_expired_hv_timer */ - hv_timer_in_use = false - ... - vmx_vcpu_run - vmx_arm_hv_run - write preemption timer deadline - spurious preemption timer vmexit - handle_preemption_timer(vCPU0) - kvm_lapic_expired_hv_timer - WARN_ON(!apic->lapic_timer.hv_timer_in_use); - -This can be reproduced sporadically during boot of L2 on a -preemptible L1, causing a splat on L1. - - WARNING: CPU: 3 PID: 1952 at arch/x86/kvm/lapic.c:1529 kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] - CPU: 3 PID: 1952 Comm: qemu-system-x86 Not tainted 4.12.0-rc1+ #24 RIP: 0010:kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] - Call Trace: - handle_preemption_timer+0xe/0x20 [kvm_intel] - vmx_handle_exit+0xc9/0x15f0 [kvm_intel] - ? lock_acquire+0xdb/0x250 - ? lock_acquire+0xdb/0x250 - ? kvm_arch_vcpu_ioctl_run+0xdf3/0x1ce0 [kvm] - kvm_arch_vcpu_ioctl_run+0xe55/0x1ce0 [kvm] - kvm_vcpu_ioctl+0x384/0x7b0 [kvm] - ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm] - ? __fget+0xf3/0x210 - do_vfs_ioctl+0xa4/0x700 - ? __fget+0x114/0x210 - SyS_ioctl+0x79/0x90 - do_syscall_64+0x8f/0x750 - ? trace_hardirqs_on_thunk+0x1a/0x1c - entry_SYSCALL64_slow_path+0x25/0x25 - -This patch fixes it by disabling preemption while cancelling -preemption timer. This way cancel_hv_timer is atomic with -respect to kvm_arch_vcpu_load. - -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/lapic.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c -index 3f05c04..650ff4a 100644 ---- a/arch/x86/kvm/lapic.c -+++ b/arch/x86/kvm/lapic.c -@@ -1358,8 +1358,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); - - static void cancel_hv_tscdeadline(struct kvm_lapic *apic) - { -+ preempt_disable(); - kvm_x86_ops->cancel_hv_timer(apic->vcpu); - apic->lapic_timer.hv_timer_in_use = false; -+ preempt_enable(); - } - - void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch deleted file mode 100644 index 61cff38e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 7bb8f481c84ef1755e442700593f0ef10857c108 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 3 Sep 2017 19:18:07 -0700 -Subject: [PATCH 013/103] kaiser: tidied up asm/kaiser.h somewhat - -Mainly deleting a surfeit of blank lines, and reflowing header comment. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/kaiser.h | 32 +++++++++++++------------------- - 1 file changed, 13 insertions(+), 19 deletions(-) - -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 0703f48..7394ba9 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -1,15 +1,17 @@ - #ifndef _ASM_X86_KAISER_H - #define _ASM_X86_KAISER_H -- --/* This file includes the definitions for the KAISER feature. -- * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory. -- * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped, -- * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled, -- * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled. -- * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory. -+/* -+ * This file includes the definitions for the KAISER feature. -+ * KAISER is a counter measure against x86_64 side channel attacks on -+ * the kernel virtual memory. It has a shadow pgd for every process: the -+ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole -+ * user memory. Within a kernel context switch, or when an interrupt is handled, -+ * the pgd is switched to the normal one. When the system switches to user mode, -+ * the shadow pgd is enabled. By this, the virtual memory caches are freed, -+ * and the user may not attack the whole kernel memory. - * -- * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions -- * of the user space, or the stacks. -+ * A minimalistic kernel mapping holds the parts needed to be mapped in user -+ * mode, such as the entry/exit functions of the user space, or the stacks. - */ - #ifdef __ASSEMBLY__ - #ifdef CONFIG_KAISER -@@ -48,13 +50,10 @@ _SWITCH_TO_KERNEL_CR3 %rax - movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - .endm - -- - .macro SWITCH_USER_CR3_NO_STACK -- - movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) - _SWITCH_TO_USER_CR3 %rax - movq PER_CPU_VAR(unsafe_stack_register_backup), %rax -- - .endm - - #else /* CONFIG_KAISER */ -@@ -72,7 +71,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - - #else /* __ASSEMBLY__ */ - -- - #ifdef CONFIG_KAISER - /* - * Upon kernel/user mode switch, it may happen that the address -@@ -80,7 +78,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - * stored. To change the address space, another register is - * needed. A register therefore has to be stored/restored. - */ -- - DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - - /** -@@ -95,7 +92,6 @@ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - */ - extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); - -- - /** - * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping - * @addr: the start address of the range -@@ -104,12 +100,12 @@ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned l - extern void kaiser_remove_mapping(unsigned long start, unsigned long size); - - /** -- * kaiser_initialize_mapping - Initalize the shadow mapping -+ * kaiser_init - Initialize the shadow mapping - * - * Most parts of the shadow mapping can be mapped upon boot - * time. Only per-process things like the thread stacks - * or a new LDT have to be mapped at runtime. These boot- -- * time mappings are permanent and nevertunmapped. -+ * time mappings are permanent and never unmapped. - */ - extern void kaiser_init(void); - -@@ -117,6 +113,4 @@ extern void kaiser_init(void); - - #endif /* __ASSEMBLY */ - -- -- - #endif /* _ASM_X86_KAISER_H */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch deleted file mode 100644 index 7fa185ec..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 8642e6bac57983a63f16725873f6df03a16c5e14 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Tue, 16 Jan 2018 10:38:09 +0100 -Subject: [PATCH 13/14] x86/boot, objtool: Annotate indirect jump in - secondary_startup_64() - -commit bd89004f6305cbf7352238f61da093207ee518d6 upstream. - -The objtool retpoline validation found this indirect jump. Seeing how -it's on CPU bringup before we run userspace it should be safe, annotate -it. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/head_64.S | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S -index 67cd7c1..9d72cf5 100644 ---- a/arch/x86/kernel/head_64.S -+++ b/arch/x86/kernel/head_64.S -@@ -22,6 +22,7 @@ - #include <asm/nops.h> - #include "../entry/calling.h" - #include <asm/export.h> -+#include <asm/nospec-branch.h> - - #ifdef CONFIG_PARAVIRT - #include <asm/asm-offsets.h> -@@ -200,6 +201,7 @@ ENTRY(secondary_startup_64) - - /* Ensure I am executing from virtual addresses */ - movq $1f, %rax -+ ANNOTATE_RETPOLINE_SAFE - jmp *%rax - 1: - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch deleted file mode 100644 index 7377d2cd..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch +++ /dev/null @@ -1,173 +0,0 @@ -From ba3461b1d9bf51d9719e001f3095a2f4b9b7031d Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 25 Jan 2018 16:14:14 +0000 -Subject: [PATCH 13/42] x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early - Spectre v2 microcodes - -(cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035) - -This doesn't refuse to load the affected microcodes; it just refuses to -use the Spectre v2 mitigation features if they're detected, by clearing -the appropriate feature bits. - -The AMD CPUID bits are handled here too, because hypervisors *may* have -been exposing those bits even on Intel chips, for fine-grained control -of what's available. - -It is non-trivial to use x86_match_cpu() for this table because that -doesn't handle steppings. And the approach taken in commit bd9240a18 -almost made me lose my lunch. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: ak@linux.intel.com -Cc: ashok.raj@intel.com -Cc: dave.hansen@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/intel-family.h | 7 ++-- - arch/x86/kernel/cpu/intel.c | 66 +++++++++++++++++++++++++++++++++++++ - 2 files changed, 71 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h -index 34a46dc..75b748a 100644 ---- a/arch/x86/include/asm/intel-family.h -+++ b/arch/x86/include/asm/intel-family.h -@@ -12,6 +12,7 @@ - */ - - #define INTEL_FAM6_CORE_YONAH 0x0E -+ - #define INTEL_FAM6_CORE2_MEROM 0x0F - #define INTEL_FAM6_CORE2_MEROM_L 0x16 - #define INTEL_FAM6_CORE2_PENRYN 0x17 -@@ -21,6 +22,7 @@ - #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ - #define INTEL_FAM6_NEHALEM_EP 0x1A - #define INTEL_FAM6_NEHALEM_EX 0x2E -+ - #define INTEL_FAM6_WESTMERE 0x25 - #define INTEL_FAM6_WESTMERE_EP 0x2C - #define INTEL_FAM6_WESTMERE_EX 0x2F -@@ -36,9 +38,9 @@ - #define INTEL_FAM6_HASWELL_GT3E 0x46 - - #define INTEL_FAM6_BROADWELL_CORE 0x3D --#define INTEL_FAM6_BROADWELL_XEON_D 0x56 - #define INTEL_FAM6_BROADWELL_GT3E 0x47 - #define INTEL_FAM6_BROADWELL_X 0x4F -+#define INTEL_FAM6_BROADWELL_XEON_D 0x56 - - #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E - #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E -@@ -57,9 +59,10 @@ - #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ - #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ - #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ --#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ -+#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ - #define INTEL_FAM6_ATOM_GOLDMONT 0x5C - #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ -+#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A - - /* Xeon Phi */ - -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index fcd484d..4d23d78 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) - } - } - -+/* -+ * Early microcode releases for the Spectre v2 mitigation were broken. -+ * Information taken from; -+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf -+ * - https://kb.vmware.com/s/article/52345 -+ * - Microcode revisions observed in the wild -+ * - Release note from 20180108 microcode release -+ */ -+struct sku_microcode { -+ u8 model; -+ u8 stepping; -+ u32 microcode; -+}; -+static const struct sku_microcode spectre_bad_microcodes[] = { -+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, -+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, -+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, -+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, -+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, -+ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, -+ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, -+ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, -+ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, -+ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, -+ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, -+ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, -+ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, -+ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, -+ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, -+ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, -+ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, -+ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, -+ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, -+ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, -+ /* Updated in the 20180108 release; blacklist until we know otherwise */ -+ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, -+ /* Observed in the wild */ -+ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, -+ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, -+}; -+ -+static bool bad_spectre_microcode(struct cpuinfo_x86 *c) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { -+ if (c->x86_model == spectre_bad_microcodes[i].model && -+ c->x86_mask == spectre_bad_microcodes[i].stepping) -+ return (c->microcode <= spectre_bad_microcodes[i].microcode); -+ } -+ return false; -+} -+ - static void early_init_intel(struct cpuinfo_x86 *c) - { - u64 misc_enable; -@@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) - rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); - } - -+ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || -+ cpu_has(c, X86_FEATURE_STIBP) || -+ cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || -+ cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || -+ cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { -+ pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); -+ clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); -+ clear_cpu_cap(c, X86_FEATURE_STIBP); -+ clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); -+ clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); -+ clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); -+ } -+ - /* - * Atom erratum AAE44/AAF40/AAG38/AAH41: - * --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch deleted file mode 100644 index 8b58f32e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 7a1d0c7758b49b1f107157db33df0aae1c10cf26 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Wed, 31 Jan 2018 17:47:03 -0800 -Subject: [PATCH 13/33] x86/kvm: Update spectre-v1 mitigation - -(cherry picked from commit 085331dfc6bbe3501fb936e657331ca943827600) - -Commit 75f139aaf896 "KVM: x86: Add memory barrier on vmcs field lookup" -added a raw 'asm("lfence");' to prevent a bounds check bypass of -'vmcs_field_to_offset_table'. - -The lfence can be avoided in this path by using the array_index_nospec() -helper designed for these types of fixes. - -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Paolo Bonzini <pbonzini@redhat.com> -Cc: Andrew Honig <ahonig@google.com> -Cc: kvm@vger.kernel.org -Cc: Jim Mattson <jmattson@google.com> -Link: https://lkml.kernel.org/r/151744959670.6342.3001723920950249067.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 20 +++++++++----------- - 1 file changed, 9 insertions(+), 11 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 6f3ed0e..af90bc4 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -33,6 +33,7 @@ - #include <linux/slab.h> - #include <linux/tboot.h> - #include <linux/hrtimer.h> -+#include <linux/nospec.h> - #include "kvm_cache_regs.h" - #include "x86.h" - -@@ -856,21 +857,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { - - static inline short vmcs_field_to_offset(unsigned long field) - { -- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); -+ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); -+ unsigned short offset; - -- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) -+ BUILD_BUG_ON(size > SHRT_MAX); -+ if (field >= size) - return -ENOENT; - -- /* -- * FIXME: Mitigation for CVE-2017-5753. To be replaced with a -- * generic mechanism. -- */ -- asm("lfence"); -- -- if (vmcs_field_to_offset_table[field] == 0) -+ field = array_index_nospec(field, size); -+ offset = vmcs_field_to_offset_table[field]; -+ if (offset == 0) - return -ENOENT; -- -- return vmcs_field_to_offset_table[field]; -+ return offset; - } - - static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch deleted file mode 100644 index 43b1f38e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 2c5329f428b85d1167abdd3206bdac08a02ae082 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= <jschoenh@amazon.de> -Date: Sat, 20 May 2017 13:22:56 +0200 -Subject: [PATCH 14/93] KVM: nVMX: Fix handling of lmsw instruction -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit e1d39b17e044e8ae819827810d87d809ba5f58c0 ] - -The decision whether or not to exit from L2 to L1 on an lmsw instruction is -based on bogus values: instead of using the information encoded within the -exit qualification, it uses the data also used for the mov-to-cr -instruction, which boils down to using whatever is in %eax at that point. - -Use the correct values instead. - -Without this fix, an L1 may not get notified when a 32-bit Linux L2 -switches its secondary CPUs to protected mode; the L1 is only notified on -the next modification of CR0. This short time window poses a problem, when -there is some other reason to exit to L1 in between. Then, L2 will be -resumed in real mode and chaos ensues. - -Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de> -Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 27f505d..8d842d9 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -7910,11 +7910,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, - { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - int cr = exit_qualification & 15; -- int reg = (exit_qualification >> 8) & 15; -- unsigned long val = kvm_register_readl(vcpu, reg); -+ int reg; -+ unsigned long val; - - switch ((exit_qualification >> 4) & 3) { - case 0: /* mov to cr */ -+ reg = (exit_qualification >> 8) & 15; -+ val = kvm_register_readl(vcpu, reg); - switch (cr) { - case 0: - if (vmcs12->cr0_guest_host_mask & -@@ -7969,6 +7971,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, - * lmsw can change bits 1..3 of cr0, and only set bit 0 of - * cr0. Other attempted changes are ignored, with no exit. - */ -+ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; - if (vmcs12->cr0_guest_host_mask & 0xe & - (val ^ vmcs12->cr0_read_shadow)) - return true; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch deleted file mode 100644 index 38a23282..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 6b359ffcb519698f93eadc2706d06805ce933086 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand <david@redhat.com> -Date: Wed, 25 Jan 2017 11:58:57 +0100 -Subject: [PATCH 14/33] KVM: nVMX: kmap() can't fail - -commit 42cf014d38d8822cce63703a467e00f65d000952 upstream. - -kmap() can't fail, therefore it will always return a valid pointer. Let's -just get rid of the unnecessary checks. - -Signed-off-by: David Hildenbrand <david@redhat.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index af90bc4..17fcbaf 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -4742,10 +4742,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - return 0; - - vapic_page = kmap(vmx->nested.virtual_apic_page); -- if (!vapic_page) { -- WARN_ON(1); -- return -ENOMEM; -- } - __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); - kunmap(vmx->nested.virtual_apic_page); - -@@ -9562,11 +9558,6 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - return false; - } - msr_bitmap_l1 = (unsigned long *)kmap(page); -- if (!msr_bitmap_l1) { -- nested_release_page_clean(page); -- WARN_ON(1); -- return false; -- } - - memset(msr_bitmap_l0, 0xff, PAGE_SIZE); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch deleted file mode 100644 index 4827bd5a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c20c1df0acf8c3b295e2a3e6e24febdd56f13816 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 3 Sep 2017 19:23:08 -0700 -Subject: [PATCH 014/103] kaiser: tidied up kaiser_add/remove_mapping slightly - -Yes, unmap_pud_range_nofree()'s declaration ought to be in a -header file really, but I'm not sure we want to use it anyway: -so for now just declare it inside kaiser_remove_mapping(). -And there doesn't seem to be such a thing as unmap_p4d_range(), -even in a 5-level paging tree. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index ba6fc2c..7a7e850 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -285,8 +285,7 @@ void __init kaiser_init(void) - __PAGE_KERNEL); - } - --extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end); --// add a mapping to the shadow-mapping, and synchronize the mappings -+/* Add a mapping to the shadow mapping, and synchronize the mappings */ - int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) - { - return kaiser_add_user_map((const void *)addr, size, flags); -@@ -294,15 +293,13 @@ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long fla - - void kaiser_remove_mapping(unsigned long start, unsigned long size) - { -+ extern void unmap_pud_range_nofree(pgd_t *pgd, -+ unsigned long start, unsigned long end); - unsigned long end = start + size; - unsigned long addr; - - for (addr = start; addr < end; addr += PGDIR_SIZE) { - pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); -- /* -- * unmap_p4d_range() handles > P4D_SIZE unmaps, -- * so no need to trim 'end'. -- */ - unmap_pud_range_nofree(pgd, addr, end); - } - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch deleted file mode 100644 index ed57dfd2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 8d91a1887b4fccf06f4077529dc167a52590b348 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 25 Jan 2018 16:14:15 +0000 -Subject: [PATCH 14/42] x86/speculation: Add basic IBPB (Indirect Branch - Prediction Barrier) support - -(cherry picked from commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d) - -Expose indirect_branch_prediction_barrier() for use in subsequent patches. - -[ tglx: Add IBPB status to spectre_v2 sysfs file ] - -Co-developed-by: KarimAllah Ahmed <karahmed@amazon.de> -Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: ak@linux.intel.com -Cc: ashok.raj@intel.com -Cc: dave.hansen@intel.com -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 2 ++ - arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ - arch/x86/kernel/cpu/bugs.c | 10 +++++++++- - 3 files changed, 24 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index c4d03e7..3901545 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -202,6 +202,8 @@ - /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ - #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ - -+#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ -+ - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ - #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 4ad4108..34e384c 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -218,5 +218,18 @@ static inline void vmexit_fill_RSB(void) - #endif - } - -+static inline void indirect_branch_prediction_barrier(void) -+{ -+ asm volatile(ALTERNATIVE("", -+ "movl %[msr], %%ecx\n\t" -+ "movl %[val], %%eax\n\t" -+ "movl $0, %%edx\n\t" -+ "wrmsr", -+ X86_FEATURE_IBPB) -+ : : [msr] "i" (MSR_IA32_PRED_CMD), -+ [val] "i" (PRED_CMD_IBPB) -+ : "eax", "ecx", "edx", "memory"); -+} -+ - #endif /* __ASSEMBLY__ */ - #endif /* __NOSPEC_BRANCH_H__ */ -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 4cea7d4..1c4b39d 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -262,6 +262,13 @@ static void __init spectre_v2_select_mitigation(void) - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Filling RSB on context switch\n"); - } -+ -+ /* Initialize Indirect Branch Prediction Barrier if supported */ -+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || -+ boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { -+ setup_force_cpu_cap(X86_FEATURE_IBPB); -+ pr_info("Enabling Indirect Branch Prediction Barrier\n"); -+ } - } - - #undef pr_fmt -@@ -291,7 +298,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); - -- return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], -+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -+ boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", - spectre_v2_bad_module ? " - vulnerable module loaded" : ""); - } - #endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Move-firmware_restrict_branch_specul.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Move-firmware_restrict_branch_specul.patch deleted file mode 100644 index 29fb0352..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-x86-speculation-Move-firmware_restrict_branch_specul.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 9c1c34861d012ab32557236c23a303e70bef627e Mon Sep 17 00:00:00 2001 -From: Ingo Molnar <mingo@kernel.org> -Date: Wed, 21 Feb 2018 09:20:37 +0100 -Subject: [PATCH 14/14] x86/speculation: Move - firmware_restrict_branch_speculation_*() from C to CPP - -commit d72f4e29e6d84b7ec02ae93088aa459ac70e733b upstream. - -firmware_restrict_branch_speculation_*() recently started using -preempt_enable()/disable(), but those are relatively high level -primitives and cause build failures on some 32-bit builds. - -Since we want to keep <asm/nospec-branch.h> low level, convert -them to macros to avoid header hell... - -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: arjan.van.de.ven@intel.com -Cc: bp@alien8.de -Cc: dave.hansen@intel.com -Cc: jmattson@google.com -Cc: karahmed@amazon.de -Cc: kvm@vger.kernel.org -Cc: pbonzini@redhat.com -Cc: rkrcmar@redhat.com -Cc: linux-kernel@vger.kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 26 ++++++++++++++------------ - 1 file changed, 14 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 29e8f30..d0dabea 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -257,20 +257,22 @@ static inline void indirect_branch_prediction_barrier(void) - /* - * With retpoline, we must use IBRS to restrict branch prediction - * before calling into firmware. -+ * -+ * (Implemented as CPP macros due to header hell.) - */ --static inline void firmware_restrict_branch_speculation_start(void) --{ -- preempt_disable(); -- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, -- X86_FEATURE_USE_IBRS_FW); --} -+#define firmware_restrict_branch_speculation_start() \ -+do { \ -+ preempt_disable(); \ -+ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ -+ X86_FEATURE_USE_IBRS_FW); \ -+} while (0) - --static inline void firmware_restrict_branch_speculation_end(void) --{ -- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, -- X86_FEATURE_USE_IBRS_FW); -- preempt_enable(); --} -+#define firmware_restrict_branch_speculation_end() \ -+do { \ -+ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ -+ X86_FEATURE_USE_IBRS_FW); \ -+ preempt_enable(); \ -+} while (0) - - #endif /* __ASSEMBLY__ */ - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch deleted file mode 100644 index 913e3fe5..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 348032cf73954af79ac077ae0c13d6faa99294af Mon Sep 17 00:00:00 2001 -From: Roman Pen <roman.penyaev@profitbricks.com> -Date: Thu, 1 Jun 2017 10:55:03 +0200 -Subject: [PATCH 15/93] KVM: SVM: do not zero out segment attributes if segment - is unusable or not present -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit d9c1b5431d5f0e07575db785a022bce91051ac1d ] - -This is a fix for the problem [1], where VMCB.CPL was set to 0 and interrupt -was taken on userspace stack. The root cause lies in the specific AMD CPU -behaviour which manifests itself as unusable segment attributes on SYSRET. -The corresponding work around for the kernel is the following: - -61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue") - -In other turn virtualization side treated unusable segment incorrectly and -restored CPL from SS attributes, which were zeroed out few lines above. - -In current patch it is assured only that P bit is cleared in VMCB.save state -and segment attributes are not zeroed out if segment is not presented or is -unusable, therefore CPL can be safely restored from DPL field. - -This is only one part of the fix, since QEMU side should be fixed accordingly -not to zero out attributes on its side. Corresponding patch will follow. - -[1] Message id: CAJrWOzD6Xq==b-zYCDdFLgSRMPM-NkNuTSDFEtX=7MreT45i7Q@mail.gmail.com - -Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com> -Signed-off-by: Mikhail Sennikovskii <mikhail.sennikovskii@profitbricks.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim KrÄmář <rkrcmar@redhat.com> -Cc: kvm@vger.kernel.org -Cc: linux-kernel@vger.kernel.org -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/svm.c | 24 +++++++++++------------- - 1 file changed, 11 insertions(+), 13 deletions(-) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 2d96e30..8551a54 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -1876,6 +1876,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, - */ - if (var->unusable) - var->db = 0; -+ /* This is symmetric with svm_set_segment() */ - var->dpl = to_svm(vcpu)->vmcb->save.cpl; - break; - } -@@ -2021,18 +2022,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, - s->base = var->base; - s->limit = var->limit; - s->selector = var->selector; -- if (var->unusable) -- s->attrib = 0; -- else { -- s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); -- s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; -- s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; -- s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; -- s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; -- s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; -- s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; -- s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; -- } -+ s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); -+ s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; -+ s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; -+ s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; -+ s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; -+ s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; -+ s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; -+ s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; - - /* - * This is always accurate, except if SYSRET returned to a segment -@@ -2041,7 +2038,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, - * would entail passing the CPL to userspace and back. - */ - if (seg == VCPU_SREG_SS) -- svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; -+ /* This is symmetric with svm_get_segment() */ -+ svm->vmcb->save.cpl = (var->dpl & 3); - - mark_dirty(svm->vmcb, VMCB_SEG); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch deleted file mode 100644 index 806b1ac0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch +++ /dev/null @@ -1,69 +0,0 @@ -From b53c02711255aa79e4e1a9974ca24610c4fbd7d7 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand <david@redhat.com> -Date: Wed, 25 Jan 2017 11:58:58 +0100 -Subject: [PATCH 15/33] KVM: nVMX: vmx_complete_nested_posted_interrupt() can't - fail - -(cherry picked from commit 6342c50ad12e8ce0736e722184a7dbdea4a3477f) - -vmx_complete_nested_posted_interrupt() can't fail, let's turn it into -a void function. - -Signed-off-by: David Hildenbrand <david@redhat.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 17fcbaf..13dc454 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -4722,7 +4722,7 @@ static bool vmx_get_enable_apicv(void) - return enable_apicv; - } - --static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) -+static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - { - struct vcpu_vmx *vmx = to_vmx(vcpu); - int max_irr; -@@ -4733,13 +4733,13 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) -- return 0; -+ return; - - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); - - if (max_irr == 256) -- return 0; -+ return; - - vapic_page = kmap(vmx->nested.virtual_apic_page); - __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); -@@ -4752,7 +4752,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - vmcs_write16(GUEST_INTR_STATUS, status); - } - } -- return 0; - } - - static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) -@@ -10440,7 +10439,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) - return 0; - } - -- return vmx_complete_nested_posted_interrupt(vcpu); -+ vmx_complete_nested_posted_interrupt(vcpu); -+ return 0; - } - - static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-kaiser-align-addition-to-x86-mm-Makefile.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-kaiser-align-addition-to-x86-mm-Makefile.patch deleted file mode 100644 index 373ea47c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-kaiser-align-addition-to-x86-mm-Makefile.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 7f3dc5773f4a6a737cda30183ea2650016426dee Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 3 Sep 2017 19:51:10 -0700 -Subject: [PATCH 015/103] kaiser: align addition to x86/mm/Makefile - -Use tab not space so they line up properly, kaslr.o also. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/Makefile | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile -index 682c162..c505569 100644 ---- a/arch/x86/mm/Makefile -+++ b/arch/x86/mm/Makefile -@@ -37,5 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o - - obj-$(CONFIG_X86_INTEL_MPX) += mpx.o - obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o --obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o --obj-$(CONFIG_KAISER) += kaiser.o -+obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o -+obj-$(CONFIG_KAISER) += kaiser.o --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-x86-nospec-Fix-header-guards-names.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-x86-nospec-Fix-header-guards-names.patch deleted file mode 100644 index e3c3192e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-x86-nospec-Fix-header-guards-names.patch +++ /dev/null @@ -1,56 +0,0 @@ -From d4cebbf42a124247c55852e555cea3e84b09e892 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Fri, 26 Jan 2018 13:11:37 +0100 -Subject: [PATCH 15/42] x86/nospec: Fix header guards names - -(cherry picked from commit 7a32fc51ca938e67974cbb9db31e1a43f98345a9) - -... to adhere to the _ASM_X86_ naming scheme. - -No functional change. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: riel@redhat.com -Cc: ak@linux.intel.com -Cc: peterz@infradead.org -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: jikos@kernel.org -Cc: luto@amacapital.net -Cc: dave.hansen@intel.com -Cc: torvalds@linux-foundation.org -Cc: keescook@google.com -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Cc: pjt@google.com -Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 34e384c..865192a 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -1,7 +1,7 @@ - /* SPDX-License-Identifier: GPL-2.0 */ - --#ifndef __NOSPEC_BRANCH_H__ --#define __NOSPEC_BRANCH_H__ -+#ifndef _ASM_X86_NOSPEC_BRANCH_H_ -+#define _ASM_X86_NOSPEC_BRANCH_H_ - - #include <asm/alternative.h> - #include <asm/alternative-asm.h> -@@ -232,4 +232,4 @@ static inline void indirect_branch_prediction_barrier(void) - } - - #endif /* __ASSEMBLY__ */ --#endif /* __NOSPEC_BRANCH_H__ */ -+#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch deleted file mode 100644 index cf8424c9..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch +++ /dev/null @@ -1,42 +0,0 @@ -From d79905a595224c714dc8da5df054653c3b958250 Mon Sep 17 00:00:00 2001 -From: Jim Mattson <jmattson@google.com> -Date: Thu, 1 Jun 2017 12:44:46 -0700 -Subject: [PATCH 16/93] KVM: nVMX: Update vmcs12->guest_linear_address on - nested VM-exit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit d281e13b0bfe745a21061a194e386a949784393f ] - -The guest-linear address field is set for VM exits due to attempts to -execute LMSW with a memory operand and VM exits due to attempts to -execute INS or OUTS for which the relevant segment is usable, -regardless of whether or not EPT is in use. - -Fixes: 119a9c01a5922 ("KVM: nVMX: pass valid guest linear-address to the L1") -Signed-off-by: Jim Mattson <jmattson@google.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 8d842d9..273313f 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -10621,8 +10621,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); - } - -- if (nested_cpu_has_ept(vmcs12)) -- vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); -+ vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); - - if (nested_cpu_has_vid(vmcs12)) - vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch deleted file mode 100644 index e7f44b1b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 50fefe1aabf115927dbe944d4607d3696ed2773e Mon Sep 17 00:00:00 2001 -From: David Matlack <dmatlack@google.com> -Date: Tue, 1 Aug 2017 14:00:40 -0700 -Subject: [PATCH 16/33] KVM: nVMX: mark vmcs12 pages dirty on L2 exit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -(cherry picked from commit c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570) - -The host physical addresses of L1's Virtual APIC Page and Posted -Interrupt descriptor are loaded into the VMCS02. The CPU may write -to these pages via their host physical address while L2 is running, -bypassing address-translation-based dirty tracking (e.g. EPT write -protection). Mark them dirty on every exit from L2 to prevent them -from getting out of sync with dirty tracking. - -Also mark the virtual APIC page and the posted interrupt descriptor -dirty when KVM is virtualizing posted interrupt processing. - -Signed-off-by: David Matlack <dmatlack@google.com> -Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 43 insertions(+), 10 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 13dc454..2e88fd1 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -4722,6 +4722,28 @@ static bool vmx_get_enable_apicv(void) - return enable_apicv; - } - -+static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) -+{ -+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); -+ gfn_t gfn; -+ -+ /* -+ * Don't need to mark the APIC access page dirty; it is never -+ * written to by the CPU during APIC virtualization. -+ */ -+ -+ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { -+ gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; -+ kvm_vcpu_mark_page_dirty(vcpu, gfn); -+ } -+ -+ if (nested_cpu_has_posted_intr(vmcs12)) { -+ gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; -+ kvm_vcpu_mark_page_dirty(vcpu, gfn); -+ } -+} -+ -+ - static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - { - struct vcpu_vmx *vmx = to_vmx(vcpu); -@@ -4729,18 +4751,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - void *vapic_page; - u16 status; - -- if (vmx->nested.pi_desc && -- vmx->nested.pi_pending) { -- vmx->nested.pi_pending = false; -- if (!pi_test_and_clear_on(vmx->nested.pi_desc)) -- return; -- -- max_irr = find_last_bit( -- (unsigned long *)vmx->nested.pi_desc->pir, 256); -+ if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) -+ return; - -- if (max_irr == 256) -- return; -+ vmx->nested.pi_pending = false; -+ if (!pi_test_and_clear_on(vmx->nested.pi_desc)) -+ return; - -+ max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); -+ if (max_irr != 256) { - vapic_page = kmap(vmx->nested.virtual_apic_page); - __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); - kunmap(vmx->nested.virtual_apic_page); -@@ -4752,6 +4771,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) - vmcs_write16(GUEST_INTR_STATUS, status); - } - } -+ -+ nested_mark_vmcs12_pages_dirty(vcpu); - } - - static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) -@@ -8009,6 +8030,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) - vmcs_read32(VM_EXIT_INTR_ERROR_CODE), - KVM_ISA_VMX); - -+ /* -+ * The host physical addresses of some pages of guest memory -+ * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU -+ * may write to these pages via their host physical address while -+ * L2 is running, bypassing any address-translation-based dirty -+ * tracking (e.g. EPT write protection). -+ * -+ * Mark them dirty on every exit from L2 to prevent them from -+ * getting out of sync with dirty tracking. -+ */ -+ nested_mark_vmcs12_pages_dirty(vcpu); -+ - if (vmx->nested.nested_run_pending) - return false; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-kaiser-cleanups-while-trying-for-gold-link.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-kaiser-cleanups-while-trying-for-gold-link.patch deleted file mode 100644 index d42f36bb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-kaiser-cleanups-while-trying-for-gold-link.patch +++ /dev/null @@ -1,141 +0,0 @@ -From a63051533f5b1a7dd6ff897afebf2f4034f38e83 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Mon, 21 Aug 2017 20:11:43 -0700 -Subject: [PATCH 016/103] kaiser: cleanups while trying for gold link - -While trying to get our gold link to work, four cleanups: -matched the gdt_page declaration to its definition; -in fiddling unsuccessfully with PERCPU_INPUT(), lined up backslashes; -lined up the backslashes according to convention in percpu-defs.h; -deleted the unused irq_stack_pointer addition to irq_stack_union. - -Sad to report that aligning backslashes does not appear to help gold -align to 8192: but while these did not help, they are worth keeping. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/desc.h | 2 +- - arch/x86/include/asm/processor.h | 5 ----- - include/asm-generic/vmlinux.lds.h | 18 ++++++++---------- - include/linux/percpu-defs.h | 22 +++++++++++----------- - 4 files changed, 20 insertions(+), 27 deletions(-) - -diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h -index 12080d8..2ed5a2b 100644 ---- a/arch/x86/include/asm/desc.h -+++ b/arch/x86/include/asm/desc.h -@@ -43,7 +43,7 @@ struct gdt_page { - struct desc_struct gdt[GDT_ENTRIES]; - } __attribute__((aligned(PAGE_SIZE))); - --DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); -+DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page); - - static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) - { -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index 3d4784e2..8cb52ee 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -335,11 +335,6 @@ union irq_stack_union { - char gs_base[40]; - unsigned long stack_canary; - }; -- -- struct { -- char irq_stack_pointer[64]; -- char unused[IRQ_STACK_SIZE - 64]; -- }; - }; - - DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index 0b16b5d..174f5c8 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -764,16 +764,14 @@ - */ - #define PERCPU_INPUT(cacheline) \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ -- \ -- VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ -- *(.data..percpu..first) \ -- . = ALIGN(cacheline); \ -- *(.data..percpu..user_mapped) \ -- *(.data..percpu..user_mapped..shared_aligned) \ -- . = ALIGN(PAGE_SIZE); \ -- *(.data..percpu..user_mapped..page_aligned) \ -- VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ -- \ -+ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \ -+ *(.data..percpu..first) \ -+ . = ALIGN(cacheline); \ -+ *(.data..percpu..user_mapped) \ -+ *(.data..percpu..user_mapped..shared_aligned) \ -+ . = ALIGN(PAGE_SIZE); \ -+ *(.data..percpu..user_mapped..page_aligned) \ -+ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ -diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h -index 8ea945f..cfe13cb 100644 ---- a/include/linux/percpu-defs.h -+++ b/include/linux/percpu-defs.h -@@ -121,10 +121,10 @@ - #define DEFINE_PER_CPU(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, "") - --#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ -+#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) - --#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ -+#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) - - /* -@@ -156,11 +156,11 @@ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - --#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ -+#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - --#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ -+#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - -@@ -185,18 +185,18 @@ - /* - * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. - */ --#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ -- DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ -- __aligned(PAGE_SIZE) -+#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ -+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ -+ __aligned(PAGE_SIZE) - --#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ -- DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ -- __aligned(PAGE_SIZE) -+#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ -+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ -+ __aligned(PAGE_SIZE) - - /* - * Declaration/definition used for per-CPU variables that must be read mostly. - */ --#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ -+#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, "..read_mostly") - - #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch deleted file mode 100644 index c7571ac4..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 50014cf904736f358e41d1fb1337d10f92b40aa7 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Fri, 26 Jan 2018 13:11:39 +0100 -Subject: [PATCH 16/42] x86/bugs: Drop one "mitigation" from dmesg - -(cherry picked from commit 55fa19d3e51f33d9cd4056d25836d93abf9438db) - -Make - -[ 0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline - -into - -[ 0.031118] Spectre V2: Mitigation: Full generic retpoline - -to reduce the mitigation mitigations strings. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: riel@redhat.com -Cc: ak@linux.intel.com -Cc: peterz@infradead.org -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: jikos@kernel.org -Cc: luto@amacapital.net -Cc: dave.hansen@intel.com -Cc: torvalds@linux-foundation.org -Cc: keescook@google.com -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: tim.c.chen@linux.intel.com -Cc: pjt@google.com -Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 1c4b39d..674ad46 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -90,7 +90,7 @@ static const char *spectre_v2_strings[] = { - }; - - #undef pr_fmt --#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt -+#define pr_fmt(fmt) "Spectre V2 : " fmt - - static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; - static bool spectre_v2_bad_module; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch deleted file mode 100644 index 96687e49..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch +++ /dev/null @@ -1,295 +0,0 @@ -From 8e52c41b7072930e5951b324964f31ef6991f3af Mon Sep 17 00:00:00 2001 -From: Jim Mattson <jmattson@google.com> -Date: Mon, 27 Nov 2017 17:22:25 -0600 -Subject: [PATCH 17/33] KVM: nVMX: Eliminate vmcs02 pool -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -(cherry picked from commit de3a0021a60635de96aa92713c1a31a96747d72c) - -The potential performance advantages of a vmcs02 pool have never been -realized. To simplify the code, eliminate the pool. Instead, a single -vmcs02 is allocated per VCPU when the VCPU enters VMX operation. - -Cc: stable@vger.kernel.org # prereq for Spectre mitigation -Signed-off-by: Jim Mattson <jmattson@google.com> -Signed-off-by: Mark Kanda <mark.kanda@oracle.com> -Reviewed-by: Ameya More <ameya.more@oracle.com> -Reviewed-by: David Hildenbrand <david@redhat.com> -Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 146 +++++++++-------------------------------------------- - 1 file changed, 23 insertions(+), 123 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 2e88fd1..099f221 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -174,7 +174,6 @@ module_param(ple_window_max, int, S_IRUGO); - extern const ulong vmx_return; - - #define NR_AUTOLOAD_MSRS 8 --#define VMCS02_POOL_SIZE 1 - - struct vmcs { - u32 revision_id; -@@ -208,7 +207,7 @@ struct shared_msr_entry { - * stored in guest memory specified by VMPTRLD, but is opaque to the guest, - * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. - * More than one of these structures may exist, if L1 runs multiple L2 guests. -- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the -+ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the - * underlying hardware which will be used to run L2. - * This structure is packed to ensure that its layout is identical across - * machines (necessary for live migration). -@@ -387,13 +386,6 @@ struct __packed vmcs12 { - */ - #define VMCS12_SIZE 0x1000 - --/* Used to remember the last vmcs02 used for some recently used vmcs12s */ --struct vmcs02_list { -- struct list_head list; -- gpa_t vmptr; -- struct loaded_vmcs vmcs02; --}; -- - /* - * The nested_vmx structure is part of vcpu_vmx, and holds information we need - * for correct emulation of VMX (i.e., nested VMX) on this vcpu. -@@ -420,15 +412,15 @@ struct nested_vmx { - */ - bool sync_shadow_vmcs; - -- /* vmcs02_list cache of VMCSs recently used to run L2 guests */ -- struct list_head vmcs02_pool; -- int vmcs02_num; - bool change_vmcs01_virtual_x2apic_mode; - /* L2 must run next, and mustn't decide to exit to L1. */ - bool nested_run_pending; -+ -+ struct loaded_vmcs vmcs02; -+ - /* -- * Guest pages referred to in vmcs02 with host-physical pointers, so -- * we must keep them pinned while L2 runs. -+ * Guest pages referred to in the vmcs02 with host-physical -+ * pointers, so we must keep them pinned while L2 runs. - */ - struct page *apic_access_page; - struct page *virtual_apic_page; -@@ -6657,94 +6649,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) - } - - /* -- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. -- * We could reuse a single VMCS for all the L2 guests, but we also want the -- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this -- * allows keeping them loaded on the processor, and in the future will allow -- * optimizations where prepare_vmcs02 doesn't need to set all the fields on -- * every entry if they never change. -- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE -- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. -- * -- * The following functions allocate and free a vmcs02 in this pool. -- */ -- --/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ --static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) --{ -- struct vmcs02_list *item; -- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) -- if (item->vmptr == vmx->nested.current_vmptr) { -- list_move(&item->list, &vmx->nested.vmcs02_pool); -- return &item->vmcs02; -- } -- -- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { -- /* Recycle the least recently used VMCS. */ -- item = list_last_entry(&vmx->nested.vmcs02_pool, -- struct vmcs02_list, list); -- item->vmptr = vmx->nested.current_vmptr; -- list_move(&item->list, &vmx->nested.vmcs02_pool); -- return &item->vmcs02; -- } -- -- /* Create a new VMCS */ -- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); -- if (!item) -- return NULL; -- item->vmcs02.vmcs = alloc_vmcs(); -- item->vmcs02.shadow_vmcs = NULL; -- if (!item->vmcs02.vmcs) { -- kfree(item); -- return NULL; -- } -- loaded_vmcs_init(&item->vmcs02); -- item->vmptr = vmx->nested.current_vmptr; -- list_add(&(item->list), &(vmx->nested.vmcs02_pool)); -- vmx->nested.vmcs02_num++; -- return &item->vmcs02; --} -- --/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ --static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) --{ -- struct vmcs02_list *item; -- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) -- if (item->vmptr == vmptr) { -- free_loaded_vmcs(&item->vmcs02); -- list_del(&item->list); -- kfree(item); -- vmx->nested.vmcs02_num--; -- return; -- } --} -- --/* -- * Free all VMCSs saved for this vcpu, except the one pointed by -- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs -- * must be &vmx->vmcs01. -- */ --static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) --{ -- struct vmcs02_list *item, *n; -- -- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); -- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { -- /* -- * Something will leak if the above WARN triggers. Better than -- * a use-after-free. -- */ -- if (vmx->loaded_vmcs == &item->vmcs02) -- continue; -- -- free_loaded_vmcs(&item->vmcs02); -- list_del(&item->list); -- kfree(item); -- vmx->nested.vmcs02_num--; -- } --} -- --/* - * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), - * set the success or error code of an emulated VMX instruction, as specified - * by Vol 2B, VMX Instruction Reference, "Conventions". -@@ -7051,6 +6955,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) - return 1; - } - -+ vmx->nested.vmcs02.vmcs = alloc_vmcs(); -+ vmx->nested.vmcs02.shadow_vmcs = NULL; -+ if (!vmx->nested.vmcs02.vmcs) -+ goto out_vmcs02; -+ loaded_vmcs_init(&vmx->nested.vmcs02); -+ - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); -@@ -7073,9 +6983,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) - vmx->vmcs01.shadow_vmcs = shadow_vmcs; - } - -- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); -- vmx->nested.vmcs02_num = 0; -- - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_PINNED); - vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; -@@ -7093,6 +7000,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) - free_page((unsigned long)vmx->nested.msr_bitmap); - - out_msr_bitmap: -+ free_loaded_vmcs(&vmx->nested.vmcs02); -+ -+out_vmcs02: - return -ENOMEM; - } - -@@ -7178,7 +7088,7 @@ static void free_nested(struct vcpu_vmx *vmx) - vmx->vmcs01.shadow_vmcs = NULL; - } - kfree(vmx->nested.cached_vmcs12); -- /* Unpin physical memory we referred to in current vmcs02 */ -+ /* Unpin physical memory we referred to in the vmcs02 */ - if (vmx->nested.apic_access_page) { - nested_release_page(vmx->nested.apic_access_page); - vmx->nested.apic_access_page = NULL; -@@ -7194,7 +7104,7 @@ static void free_nested(struct vcpu_vmx *vmx) - vmx->nested.pi_desc = NULL; - } - -- nested_free_all_saved_vmcss(vmx); -+ free_loaded_vmcs(&vmx->nested.vmcs02); - } - - /* Emulate the VMXOFF instruction */ -@@ -7242,8 +7152,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) - kunmap(page); - nested_release_page(page); - -- nested_free_vmcs02(vmx, vmptr); -- - skip_emulated_instruction(vcpu); - nested_vmx_succeed(vcpu); - return 1; -@@ -8032,10 +7940,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) - - /* - * The host physical addresses of some pages of guest memory -- * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU -- * may write to these pages via their host physical address while -- * L2 is running, bypassing any address-translation-based dirty -- * tracking (e.g. EPT write protection). -+ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC -+ * Page). The CPU may write to these pages via their host -+ * physical address while L2 is running, bypassing any -+ * address-translation-based dirty tracking (e.g. EPT write -+ * protection). - * - * Mark them dirty on every exit from L2 to prevent them from - * getting out of sync with dirty tracking. -@@ -10170,7 +10079,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) - struct vmcs12 *vmcs12; - struct vcpu_vmx *vmx = to_vmx(vcpu); - int cpu; -- struct loaded_vmcs *vmcs02; - bool ia32e; - u32 msr_entry_idx; - -@@ -10310,17 +10218,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) - * the nested entry. - */ - -- vmcs02 = nested_get_current_vmcs02(vmx); -- if (!vmcs02) -- return -ENOMEM; -- - enter_guest_mode(vcpu); - - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) - vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - - cpu = get_cpu(); -- vmx->loaded_vmcs = vmcs02; -+ vmx->loaded_vmcs = &vmx->nested.vmcs02; - vmx_vcpu_put(vcpu); - vmx_vcpu_load(vcpu, cpu); - vcpu->cpu = cpu; -@@ -10833,10 +10737,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, - vm_exit_controls_reset_shadow(vmx); - vmx_segment_cache_clear(vmx); - -- /* if no vmcs02 cache requested, remove the one we used */ -- if (VMCS02_POOL_SIZE == 0) -- nested_free_vmcs02(vmx, vmx->nested.current_vmptr); -- - load_vmcs12_host_state(vcpu, vmcs12); - - /* Update any VMCS fields that might have changed while L2 ran */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch deleted file mode 100644 index f43ed637..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch +++ /dev/null @@ -1,70 +0,0 @@ -From ed14e28d25f96ab356ced2a7e9af56fac6483f4d Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sat, 9 Sep 2017 17:31:18 -0700 -Subject: [PATCH 017/103] kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET - -There's a 0x1000 in various places, which looks better with a name. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 4 ++-- - arch/x86/include/asm/kaiser.h | 7 +++++-- - 2 files changed, 7 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 57f7993..3c8fc97 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1316,7 +1316,7 @@ ENTRY(nmi) - movq %cr3, %rax - pushq %rax - #ifdef CONFIG_KAISER_REAL_SWITCH -- andq $(~0x1000), %rax -+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax - #endif - movq %rax, %cr3 - #endif -@@ -1559,7 +1559,7 @@ end_repeat_nmi: - movq %cr3, %rax - pushq %rax - #ifdef CONFIG_KAISER_REAL_SWITCH -- andq $(~0x1000), %rax -+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax - #endif - movq %rax, %cr3 - #endif -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 7394ba9..051acf6 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -13,13 +13,16 @@ - * A minimalistic kernel mapping holds the parts needed to be mapped in user - * mode, such as the entry/exit functions of the user space, or the stacks. - */ -+ -+#define KAISER_SHADOW_PGD_OFFSET 0x1000 -+ - #ifdef __ASSEMBLY__ - #ifdef CONFIG_KAISER - - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg - #ifdef CONFIG_KAISER_REAL_SWITCH --andq $(~0x1000), \reg -+andq $(~KAISER_SHADOW_PGD_OFFSET), \reg - #endif - movq \reg, %cr3 - .endm -@@ -27,7 +30,7 @@ movq \reg, %cr3 - .macro _SWITCH_TO_USER_CR3 reg - movq %cr3, \reg - #ifdef CONFIG_KAISER_REAL_SWITCH --orq $(0x1000), \reg -+orq $(KAISER_SHADOW_PGD_OFFSET), \reg - #endif - movq \reg, %cr3 - .endm --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch deleted file mode 100644 index cb6045b1..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 1007b2c9e70fe3aaffda12b809da0f3b53642777 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Fri, 20 Apr 2018 14:06:29 +0200 -Subject: [PATCH 17/93] perf/x86: Fix possible Spectre-v1 indexing for - hw_perf_event cache_* - -commit ef9ee4ad38445a30909c48998624861716f2a994 upstream. - -> arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids[cache_type]' (local cap) -> arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids' (local cap) -> arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs[cache_type]' (local cap) -> arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs' (local cap) - -Userspace controls @config which contains 3 (byte) fields used for a 3 -dimensional array deref. - -Reported-by: Dan Carpenter <dan.carpenter@oracle.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: <stable@kernel.org> -Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> -Cc: Arnaldo Carvalho de Melo <acme@redhat.com> -Cc: Jiri Olsa <jolsa@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Stephane Eranian <eranian@google.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Vince Weaver <vincent.weaver@maine.edu> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/events/core.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c -index 38623e2..6b955e3 100644 ---- a/arch/x86/events/core.c -+++ b/arch/x86/events/core.c -@@ -303,17 +303,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) - - config = attr->config; - -- cache_type = (config >> 0) & 0xff; -+ cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; -+ cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; -+ cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; -+ cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); - - val = hw_cache_event_ids[cache_type][cache_op][cache_result]; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch deleted file mode 100644 index f5232d18..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 0af038c29f5df7028f229d2d4bf8ee7163db4cdd Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat, 27 Jan 2018 15:45:14 +0100 -Subject: [PATCH 17/42] x86/cpu/bugs: Make retpoline module warning conditional -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -(cherry picked from commit e383095c7fe8d218e00ec0f83e4b95ed4e627b02) - -If sysfs is disabled and RETPOLINE not defined: - -arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used -[-Wunused-variable] - static bool spectre_v2_bad_module; - -Hide it. - -Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module") -Reported-by: Borislav Petkov <bp@alien8.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 14 +++++++++++--- - 1 file changed, 11 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 674ad46..efe55c5 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -93,9 +93,10 @@ static const char *spectre_v2_strings[] = { - #define pr_fmt(fmt) "Spectre V2 : " fmt - - static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; --static bool spectre_v2_bad_module; - - #ifdef RETPOLINE -+static bool spectre_v2_bad_module; -+ - bool retpoline_module_ok(bool has_retpoline) - { - if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) -@@ -105,6 +106,13 @@ bool retpoline_module_ok(bool has_retpoline) - spectre_v2_bad_module = true; - return false; - } -+ -+static inline const char *spectre_v2_module_string(void) -+{ -+ return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; -+} -+#else -+static inline const char *spectre_v2_module_string(void) { return ""; } - #endif - - static void __init spec2_print_if_insecure(const char *reason) -@@ -299,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, - return sprintf(buf, "Not affected\n"); - - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -- boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", -- spectre_v2_bad_module ? " - vulnerable module loaded" : ""); -+ boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", -+ spectre_v2_module_string()); - } - #endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch deleted file mode 100644 index a22f91a8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 80f4f0e9de9cce1047ac0aac305aca7310e37313 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Thu, 11 Jan 2018 12:16:15 +0100 -Subject: [PATCH 18/33] KVM: VMX: introduce alloc_loaded_vmcs - -(cherry picked from commit f21f165ef922c2146cc5bdc620f542953c41714b) - -Group together the calls to alloc_vmcs and loaded_vmcs_init. Soon we'll also -allocate an MSR bitmap there. - -Cc: stable@vger.kernel.org # prereq for Spectre mitigation -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 38 +++++++++++++++++++++++--------------- - 1 file changed, 23 insertions(+), 15 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 099f221..6814355 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -3514,11 +3514,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) - return vmcs; - } - --static struct vmcs *alloc_vmcs(void) --{ -- return alloc_vmcs_cpu(raw_smp_processor_id()); --} -- - static void free_vmcs(struct vmcs *vmcs) - { - free_pages((unsigned long)vmcs, vmcs_config.order); -@@ -3537,6 +3532,22 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) - WARN_ON(loaded_vmcs->shadow_vmcs != NULL); - } - -+static struct vmcs *alloc_vmcs(void) -+{ -+ return alloc_vmcs_cpu(raw_smp_processor_id()); -+} -+ -+static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) -+{ -+ loaded_vmcs->vmcs = alloc_vmcs(); -+ if (!loaded_vmcs->vmcs) -+ return -ENOMEM; -+ -+ loaded_vmcs->shadow_vmcs = NULL; -+ loaded_vmcs_init(loaded_vmcs); -+ return 0; -+} -+ - static void free_kvm_area(void) - { - int cpu; -@@ -6916,6 +6927,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) - struct vmcs *shadow_vmcs; - const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED - | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; -+ int r; - - /* The Intel VMX Instruction Reference lists a bunch of bits that - * are prerequisite to running VMXON, most notably cr4.VMXE must be -@@ -6955,11 +6967,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) - return 1; - } - -- vmx->nested.vmcs02.vmcs = alloc_vmcs(); -- vmx->nested.vmcs02.shadow_vmcs = NULL; -- if (!vmx->nested.vmcs02.vmcs) -+ r = alloc_loaded_vmcs(&vmx->nested.vmcs02); -+ if (r < 0) - goto out_vmcs02; -- loaded_vmcs_init(&vmx->nested.vmcs02); - - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = -@@ -9090,17 +9100,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) - if (!vmx->guest_msrs) - goto free_pml; - -- vmx->loaded_vmcs = &vmx->vmcs01; -- vmx->loaded_vmcs->vmcs = alloc_vmcs(); -- vmx->loaded_vmcs->shadow_vmcs = NULL; -- if (!vmx->loaded_vmcs->vmcs) -- goto free_msrs; - if (!vmm_exclusive) - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); -- loaded_vmcs_init(vmx->loaded_vmcs); -+ err = alloc_loaded_vmcs(&vmx->vmcs01); - if (!vmm_exclusive) - kvm_cpu_vmxoff(); -+ if (err < 0) -+ goto free_msrs; - -+ vmx->loaded_vmcs = &vmx->vmcs01; - cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch deleted file mode 100644 index 945d478d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 319109fa5b31997c1bfa7a8384fdb5c3f20b3c6a Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 3 Sep 2017 18:30:43 -0700 -Subject: [PATCH 018/103] kaiser: delete KAISER_REAL_SWITCH option - -We fail to see what CONFIG_KAISER_REAL_SWITCH is for: it seems to be -left over from early development, and now just obscures tricky parts -of the code. Delete it before adding PCIDs, or nokaiser boot option. - -(Or if there is some good reason to keep the option, then it needs -a help text - and a "depends on KAISER", so that all those without -KAISER are not asked the question. But we'd much rather delete it.) - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 4 ---- - arch/x86/include/asm/kaiser.h | 4 ---- - security/Kconfig | 4 ---- - 3 files changed, 12 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 3c8fc97..df33f10 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1315,9 +1315,7 @@ ENTRY(nmi) - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax - pushq %rax --#ifdef CONFIG_KAISER_REAL_SWITCH - andq $(~KAISER_SHADOW_PGD_OFFSET), %rax --#endif - movq %rax, %cr3 - #endif - call do_nmi -@@ -1558,9 +1556,7 @@ end_repeat_nmi: - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax - pushq %rax --#ifdef CONFIG_KAISER_REAL_SWITCH - andq $(~KAISER_SHADOW_PGD_OFFSET), %rax --#endif - movq %rax, %cr3 - #endif - -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 051acf6..e0fc45e 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -21,17 +21,13 @@ - - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg --#ifdef CONFIG_KAISER_REAL_SWITCH - andq $(~KAISER_SHADOW_PGD_OFFSET), \reg --#endif - movq \reg, %cr3 - .endm - - .macro _SWITCH_TO_USER_CR3 reg - movq %cr3, \reg --#ifdef CONFIG_KAISER_REAL_SWITCH - orq $(KAISER_SHADOW_PGD_OFFSET), \reg --#endif - movq \reg, %cr3 - .endm - -diff --git a/security/Kconfig b/security/Kconfig -index dc78671..d8ae933 100644 ---- a/security/Kconfig -+++ b/security/Kconfig -@@ -41,10 +41,6 @@ config KAISER - - If you are unsure how to answer this question, answer Y. - --config KAISER_REAL_SWITCH -- bool "KAISER: actually switch page tables" -- default y -- - config SECURITYFS - bool "Enable the securityfs filesystem" - help --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch deleted file mode 100644 index 40bc2cae..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 8708c762c727c3c4a8fb6c75fc1d5585f89ece90 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Fri, 20 Apr 2018 14:25:48 +0200 -Subject: [PATCH 18/93] perf/x86/cstate: Fix possible Spectre-v1 indexing for - pkg_msr - -commit a5f81290ce475489fa2551c01a07470c1a4c932e upstream. - -> arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) - -Userspace controls @attr, sanitize cfg (attr->config) before using it -to index an array. - -Reported-by: Dan Carpenter <dan.carpenter@oracle.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: <stable@kernel.org> -Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> -Cc: Arnaldo Carvalho de Melo <acme@redhat.com> -Cc: Jiri Olsa <jolsa@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Stephane Eranian <eranian@google.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Vince Weaver <vincent.weaver@maine.edu> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/events/intel/cstate.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c -index fec8a46..c6a04c0 100644 ---- a/arch/x86/events/intel/cstate.c -+++ b/arch/x86/events/intel/cstate.c -@@ -90,6 +90,7 @@ - #include <linux/module.h> - #include <linux/slab.h> - #include <linux/perf_event.h> -+#include <linux/nospec.h> - #include <asm/cpu_device_id.h> - #include <asm/intel-family.h> - #include "../perf_event.h" -@@ -300,6 +301,7 @@ static int cstate_pmu_event_init(struct perf_event *event) - } else if (event->pmu == &cstate_pkg_pmu) { - if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) - return -EINVAL; -+ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); - if (!pkg_msr[cfg].attr) - return -EINVAL; - event->hw.event_base = pkg_msr[cfg].msr; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch deleted file mode 100644 index 09e6e0ce..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch +++ /dev/null @@ -1,181 +0,0 @@ -From 9d680bb2dea42b419a94a55a4b65afb1b785b307 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Sat, 27 Jan 2018 16:24:32 +0000 -Subject: [PATCH 18/42] x86/cpufeatures: Clean up Spectre v2 related CPUID - flags - -(cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2) - -We want to expose the hardware features simply in /proc/cpuinfo as "ibrs", -"ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them -as the user-visible bits. - -When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB -capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP -bit is set, set the AMD STIBP that's used for the generic hardware -capability. - -Hide the rest from /proc/cpuinfo by putting "" in the comments. Including -RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are -patches to make the sysfs vulnerabilities information non-readable by -non-root, and the same should apply to all information about which -mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo. - -The feature bit for whether IBPB is actually used, which is needed for -ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB. - -Originally-by: Borislav Petkov <bp@suse.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: ak@linux.intel.com -Cc: dave.hansen@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 18 +++++++++--------- - arch/x86/include/asm/nospec-branch.h | 2 +- - arch/x86/kernel/cpu/bugs.c | 7 +++---- - arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++++---------- - 4 files changed, 34 insertions(+), 24 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 3901545..8eb23f5 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -194,15 +194,15 @@ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ - #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ - --#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ --#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ -+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ - --#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ -+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ - - /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ - #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ - --#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ -+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -@@ -260,9 +260,9 @@ - /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ - #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ - #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ --#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ --#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ --#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ -+#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -+#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -+#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ - - /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ - #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -@@ -301,8 +301,8 @@ - /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ - #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ --#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ --#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ -+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ - #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ - - /* -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 865192a..19ecb54 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -225,7 +225,7 @@ static inline void indirect_branch_prediction_barrier(void) - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", -- X86_FEATURE_IBPB) -+ X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index efe55c5..3a06718 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -272,9 +272,8 @@ static void __init spectre_v2_select_mitigation(void) - } - - /* Initialize Indirect Branch Prediction Barrier if supported */ -- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || -- boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { -- setup_force_cpu_cap(X86_FEATURE_IBPB); -+ if (boot_cpu_has(X86_FEATURE_IBPB)) { -+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Enabling Indirect Branch Prediction Barrier\n"); - } - } -@@ -307,7 +306,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, - return sprintf(buf, "Not affected\n"); - - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -- boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", -+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", - spectre_v2_module_string()); - } - #endif -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 4d23d78..2e257f8 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -140,17 +140,28 @@ static void early_init_intel(struct cpuinfo_x86 *c) - rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); - } - -- if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || -- cpu_has(c, X86_FEATURE_STIBP) || -- cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || -- cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || -- cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { -- pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); -- clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); -+ /* -+ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, -+ * and they also have a different bit for STIBP support. Also, -+ * a hypervisor might have set the individual AMD bits even on -+ * Intel CPUs, for finer-grained selection of what's available. -+ */ -+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { -+ set_cpu_cap(c, X86_FEATURE_IBRS); -+ set_cpu_cap(c, X86_FEATURE_IBPB); -+ } -+ if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) -+ set_cpu_cap(c, X86_FEATURE_STIBP); -+ -+ /* Now if any of them are set, check the blacklist and clear the lot */ -+ if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || -+ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { -+ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); -+ clear_cpu_cap(c, X86_FEATURE_IBRS); -+ clear_cpu_cap(c, X86_FEATURE_IBPB); - clear_cpu_cap(c, X86_FEATURE_STIBP); -- clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); -- clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); -- clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); -+ clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); -+ clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch deleted file mode 100644 index 0a8db555..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch +++ /dev/null @@ -1,585 +0,0 @@ -From cc42f184dfdfed46c394274020b84a1641f24714 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Tue, 16 Jan 2018 16:51:18 +0100 -Subject: [PATCH 19/33] KVM: VMX: make MSR bitmaps per-VCPU - -(cherry picked from commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6) - -Place the MSR bitmap in struct loaded_vmcs, and update it in place -every time the x2apic or APICv state can change. This is rare and -the loop can handle 64 MSRs per iteration, in a similar fashion as -nested_vmx_prepare_msr_bitmap. - -This prepares for choosing, on a per-VM basis, whether to intercept -the SPEC_CTRL and PRED_CMD MSRs. - -Cc: stable@vger.kernel.org # prereq for Spectre mitigation -Suggested-by: Jim Mattson <jmattson@google.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 315 +++++++++++++++++++---------------------------------- - 1 file changed, 114 insertions(+), 201 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 6814355..c6a7563 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -110,6 +110,14 @@ static u64 __read_mostly host_xss; - static bool __read_mostly enable_pml = 1; - module_param_named(pml, enable_pml, bool, S_IRUGO); - -+#define MSR_TYPE_R 1 -+#define MSR_TYPE_W 2 -+#define MSR_TYPE_RW 3 -+ -+#define MSR_BITMAP_MODE_X2APIC 1 -+#define MSR_BITMAP_MODE_X2APIC_APICV 2 -+#define MSR_BITMAP_MODE_LM 4 -+ - #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL - - /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ -@@ -191,6 +199,7 @@ struct loaded_vmcs { - struct vmcs *shadow_vmcs; - int cpu; - int launched; -+ unsigned long *msr_bitmap; - struct list_head loaded_vmcss_on_cpu_link; - }; - -@@ -429,8 +438,6 @@ struct nested_vmx { - bool pi_pending; - u16 posted_intr_nv; - -- unsigned long *msr_bitmap; -- - struct hrtimer preemption_timer; - bool preemption_timer_expired; - -@@ -531,6 +538,7 @@ struct vcpu_vmx { - unsigned long host_rsp; - u8 fail; - bool nmi_known_unmasked; -+ u8 msr_bitmap_mode; - u32 exit_intr_info; - u32 idt_vectoring_info; - ulong rflags; -@@ -902,6 +910,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); - static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); - static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); - static int alloc_identity_pagetable(struct kvm *kvm); -+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); - - static DEFINE_PER_CPU(struct vmcs *, vmxarea); - static DEFINE_PER_CPU(struct vmcs *, current_vmcs); -@@ -921,12 +930,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); - - static unsigned long *vmx_io_bitmap_a; - static unsigned long *vmx_io_bitmap_b; --static unsigned long *vmx_msr_bitmap_legacy; --static unsigned long *vmx_msr_bitmap_longmode; --static unsigned long *vmx_msr_bitmap_legacy_x2apic; --static unsigned long *vmx_msr_bitmap_longmode_x2apic; --static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive; --static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive; - static unsigned long *vmx_vmread_bitmap; - static unsigned long *vmx_vmwrite_bitmap; - -@@ -2517,36 +2520,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) - vmx->guest_msrs[from] = tmp; - } - --static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) --{ -- unsigned long *msr_bitmap; -- -- if (is_guest_mode(vcpu)) -- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; -- else if (cpu_has_secondary_exec_ctrls() && -- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & -- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { -- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { -- if (is_long_mode(vcpu)) -- msr_bitmap = vmx_msr_bitmap_longmode_x2apic; -- else -- msr_bitmap = vmx_msr_bitmap_legacy_x2apic; -- } else { -- if (is_long_mode(vcpu)) -- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive; -- else -- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive; -- } -- } else { -- if (is_long_mode(vcpu)) -- msr_bitmap = vmx_msr_bitmap_longmode; -- else -- msr_bitmap = vmx_msr_bitmap_legacy; -- } -- -- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); --} -- - /* - * Set up the vmcs to automatically save and restore system - * msrs. Don't touch the 64-bit msrs if the guest is in legacy -@@ -2587,7 +2560,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) - vmx->save_nmsrs = save_nmsrs; - - if (cpu_has_vmx_msr_bitmap()) -- vmx_set_msr_bitmap(&vmx->vcpu); -+ vmx_update_msr_bitmap(&vmx->vcpu); - } - - /* -@@ -3529,6 +3502,8 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) - loaded_vmcs_clear(loaded_vmcs); - free_vmcs(loaded_vmcs->vmcs); - loaded_vmcs->vmcs = NULL; -+ if (loaded_vmcs->msr_bitmap) -+ free_page((unsigned long)loaded_vmcs->msr_bitmap); - WARN_ON(loaded_vmcs->shadow_vmcs != NULL); - } - -@@ -3545,7 +3520,18 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) - - loaded_vmcs->shadow_vmcs = NULL; - loaded_vmcs_init(loaded_vmcs); -+ -+ if (cpu_has_vmx_msr_bitmap()) { -+ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); -+ if (!loaded_vmcs->msr_bitmap) -+ goto out_vmcs; -+ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); -+ } - return 0; -+ -+out_vmcs: -+ free_loaded_vmcs(loaded_vmcs); -+ return -ENOMEM; - } - - static void free_kvm_area(void) -@@ -4548,10 +4534,8 @@ static void free_vpid(int vpid) - spin_unlock(&vmx_vpid_lock); - } - --#define MSR_TYPE_R 1 --#define MSR_TYPE_W 2 --static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, -- u32 msr, int type) -+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, -+ u32 msr, int type) - { - int f = sizeof(unsigned long); - -@@ -4585,8 +4569,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - } - } - --static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, -- u32 msr, int type) -+static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, -+ u32 msr, int type) - { - int f = sizeof(unsigned long); - -@@ -4620,6 +4604,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - } - } - -+static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, -+ u32 msr, int type, bool value) -+{ -+ if (value) -+ vmx_enable_intercept_for_msr(msr_bitmap, msr, type); -+ else -+ vmx_disable_intercept_for_msr(msr_bitmap, msr, type); -+} -+ - /* - * If a msr is allowed by L0, we should check whether it is allowed by L1. - * The corresponding bit will be cleared unless both of L0 and L1 allow it. -@@ -4666,58 +4659,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, - } - } - --static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) -+static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) - { -- if (!longmode_only) -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, -- msr, MSR_TYPE_R | MSR_TYPE_W); -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, -- msr, MSR_TYPE_R | MSR_TYPE_W); --} -+ u8 mode = 0; - --static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) --{ -- if (apicv_active) { -- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, -- msr, MSR_TYPE_R); -- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, -- msr, MSR_TYPE_R); -- } else { -- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, -- msr, MSR_TYPE_R); -- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, -- msr, MSR_TYPE_R); -+ if (cpu_has_secondary_exec_ctrls() && -+ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & -+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { -+ mode |= MSR_BITMAP_MODE_X2APIC; -+ if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) -+ mode |= MSR_BITMAP_MODE_X2APIC_APICV; - } -+ -+ if (is_long_mode(vcpu)) -+ mode |= MSR_BITMAP_MODE_LM; -+ -+ return mode; - } - --static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) -+#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) -+ -+static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, -+ u8 mode) - { -- if (apicv_active) { -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, -- msr, MSR_TYPE_R); -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, -- msr, MSR_TYPE_R); -- } else { -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, -- msr, MSR_TYPE_R); -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, -- msr, MSR_TYPE_R); -+ int msr; -+ -+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { -+ unsigned word = msr / BITS_PER_LONG; -+ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; -+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0; -+ } -+ -+ if (mode & MSR_BITMAP_MODE_X2APIC) { -+ /* -+ * TPR reads and writes can be virtualized even if virtual interrupt -+ * delivery is not in use. -+ */ -+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); -+ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { -+ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); -+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); -+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); -+ } - } - } - --static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active) -+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) - { -- if (apicv_active) { -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, -- msr, MSR_TYPE_W); -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, -- msr, MSR_TYPE_W); -- } else { -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, -- msr, MSR_TYPE_W); -- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, -- msr, MSR_TYPE_W); -- } -+ struct vcpu_vmx *vmx = to_vmx(vcpu); -+ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; -+ u8 mode = vmx_msr_bitmap_mode(vcpu); -+ u8 changed = mode ^ vmx->msr_bitmap_mode; -+ -+ if (!changed) -+ return; -+ -+ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, -+ !(mode & MSR_BITMAP_MODE_LM)); -+ -+ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) -+ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); -+ -+ vmx->msr_bitmap_mode = mode; - } - - static bool vmx_get_enable_apicv(void) -@@ -4953,7 +4956,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) - } - - if (cpu_has_vmx_msr_bitmap()) -- vmx_set_msr_bitmap(vcpu); -+ vmx_update_msr_bitmap(vcpu); - } - - static u32 vmx_exec_control(struct vcpu_vmx *vmx) -@@ -5042,7 +5045,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) - vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); - } - if (cpu_has_vmx_msr_bitmap()) -- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); -+ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); - - vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ - -@@ -6371,7 +6374,7 @@ static void wakeup_handler(void) - - static __init int hardware_setup(void) - { -- int r = -ENOMEM, i, msr; -+ int r = -ENOMEM, i; - - rdmsrl_safe(MSR_EFER, &host_efer); - -@@ -6386,41 +6389,13 @@ static __init int hardware_setup(void) - if (!vmx_io_bitmap_b) - goto out; - -- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); -- if (!vmx_msr_bitmap_legacy) -- goto out1; -- -- vmx_msr_bitmap_legacy_x2apic = -- (unsigned long *)__get_free_page(GFP_KERNEL); -- if (!vmx_msr_bitmap_legacy_x2apic) -- goto out2; -- -- vmx_msr_bitmap_legacy_x2apic_apicv_inactive = -- (unsigned long *)__get_free_page(GFP_KERNEL); -- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive) -- goto out3; -- -- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); -- if (!vmx_msr_bitmap_longmode) -- goto out4; -- -- vmx_msr_bitmap_longmode_x2apic = -- (unsigned long *)__get_free_page(GFP_KERNEL); -- if (!vmx_msr_bitmap_longmode_x2apic) -- goto out5; -- -- vmx_msr_bitmap_longmode_x2apic_apicv_inactive = -- (unsigned long *)__get_free_page(GFP_KERNEL); -- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive) -- goto out6; -- - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_vmread_bitmap) -- goto out7; -+ goto out1; - - vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_vmwrite_bitmap) -- goto out8; -+ goto out2; - - memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); - memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); -@@ -6434,12 +6409,9 @@ static __init int hardware_setup(void) - - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - -- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); -- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); -- - if (setup_vmcs_config(&vmcs_config) < 0) { - r = -EIO; -- goto out9; -+ goto out3; - } - - if (boot_cpu_has(X86_FEATURE_NX)) -@@ -6494,48 +6466,8 @@ static __init int hardware_setup(void) - kvm_tsc_scaling_ratio_frac_bits = 48; - } - -- vmx_disable_intercept_for_msr(MSR_FS_BASE, false); -- vmx_disable_intercept_for_msr(MSR_GS_BASE, false); -- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); -- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); -- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); -- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); -- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); -- -- memcpy(vmx_msr_bitmap_legacy_x2apic, -- vmx_msr_bitmap_legacy, PAGE_SIZE); -- memcpy(vmx_msr_bitmap_longmode_x2apic, -- vmx_msr_bitmap_longmode, PAGE_SIZE); -- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, -- vmx_msr_bitmap_legacy, PAGE_SIZE); -- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, -- vmx_msr_bitmap_longmode, PAGE_SIZE); -- - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - -- /* -- * enable_apicv && kvm_vcpu_apicv_active() -- */ -- for (msr = 0x800; msr <= 0x8ff; msr++) -- vmx_disable_intercept_msr_read_x2apic(msr, true); -- -- /* TMCCT */ -- vmx_enable_intercept_msr_read_x2apic(0x839, true); -- /* TPR */ -- vmx_disable_intercept_msr_write_x2apic(0x808, true); -- /* EOI */ -- vmx_disable_intercept_msr_write_x2apic(0x80b, true); -- /* SELF-IPI */ -- vmx_disable_intercept_msr_write_x2apic(0x83f, true); -- -- /* -- * (enable_apicv && !kvm_vcpu_apicv_active()) || -- * !enable_apicv -- */ -- /* TPR */ -- vmx_disable_intercept_msr_read_x2apic(0x808, false); -- vmx_disable_intercept_msr_write_x2apic(0x808, false); -- - if (enable_ept) { - kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, -@@ -6581,22 +6513,10 @@ static __init int hardware_setup(void) - - return alloc_kvm_area(); - --out9: -- free_page((unsigned long)vmx_vmwrite_bitmap); --out8: -- free_page((unsigned long)vmx_vmread_bitmap); --out7: -- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); --out6: -- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); --out5: -- free_page((unsigned long)vmx_msr_bitmap_longmode); --out4: -- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); - out3: -- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); -+ free_page((unsigned long)vmx_vmwrite_bitmap); - out2: -- free_page((unsigned long)vmx_msr_bitmap_legacy); -+ free_page((unsigned long)vmx_vmread_bitmap); - out1: - free_page((unsigned long)vmx_io_bitmap_b); - out: -@@ -6607,12 +6527,6 @@ static __init int hardware_setup(void) - - static __exit void hardware_unsetup(void) - { -- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); -- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); -- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); -- free_page((unsigned long)vmx_msr_bitmap_legacy); -- free_page((unsigned long)vmx_msr_bitmap_longmode); - free_page((unsigned long)vmx_io_bitmap_b); - free_page((unsigned long)vmx_io_bitmap_a); - free_page((unsigned long)vmx_vmwrite_bitmap); -@@ -6971,13 +6885,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) - if (r < 0) - goto out_vmcs02; - -- if (cpu_has_vmx_msr_bitmap()) { -- vmx->nested.msr_bitmap = -- (unsigned long *)__get_free_page(GFP_KERNEL); -- if (!vmx->nested.msr_bitmap) -- goto out_msr_bitmap; -- } -- - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); - if (!vmx->nested.cached_vmcs12) - goto out_cached_vmcs12; -@@ -7007,9 +6914,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) - kfree(vmx->nested.cached_vmcs12); - - out_cached_vmcs12: -- free_page((unsigned long)vmx->nested.msr_bitmap); -- --out_msr_bitmap: - free_loaded_vmcs(&vmx->nested.vmcs02); - - out_vmcs02: -@@ -7088,10 +6992,6 @@ static void free_nested(struct vcpu_vmx *vmx) - vmx->nested.vmxon = false; - free_vpid(vmx->nested.vpid02); - nested_release_vmcs12(vmx); -- if (vmx->nested.msr_bitmap) { -- free_page((unsigned long)vmx->nested.msr_bitmap); -- vmx->nested.msr_bitmap = NULL; -- } - if (enable_shadow_vmcs) { - vmcs_clear(vmx->vmcs01.shadow_vmcs); - free_vmcs(vmx->vmcs01.shadow_vmcs); -@@ -8450,7 +8350,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) - } - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - -- vmx_set_msr_bitmap(vcpu); -+ vmx_update_msr_bitmap(vcpu); - } - - static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) -@@ -9068,6 +8968,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) - { - int err; - struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); -+ unsigned long *msr_bitmap; - int cpu; - - if (!vmx) -@@ -9108,6 +9009,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) - if (err < 0) - goto free_msrs; - -+ msr_bitmap = vmx->vmcs01.msr_bitmap; -+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); -+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); -+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); -+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); -+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); -+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); -+ vmx->msr_bitmap_mode = 0; -+ - vmx->loaded_vmcs = &vmx->vmcs01; - cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); -@@ -9495,7 +9405,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - int msr; - struct page *page; - unsigned long *msr_bitmap_l1; -- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; -+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; - - /* This shortcut is ok because we support only x2APIC MSRs so far. */ - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) -@@ -10007,6 +9917,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) - if (kvm_has_tsc_control) - decache_tsc_multiplier(vmx); - -+ if (cpu_has_vmx_msr_bitmap()) -+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); -+ - if (enable_vpid) { - /* - * There is no direct mapping between vpid02 and vpid12, the -@@ -10694,7 +10607,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - - if (cpu_has_vmx_msr_bitmap()) -- vmx_set_msr_bitmap(vcpu); -+ vmx_update_msr_bitmap(vcpu); - - if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, - vmcs12->vm_exit_msr_load_count)) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch deleted file mode 100644 index 8b73b30f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch +++ /dev/null @@ -1,122 +0,0 @@ -From c5cd21271fbd17c27cb4dbfa0a70b9108529d184 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sat, 9 Sep 2017 21:27:32 -0700 -Subject: [PATCH 019/103] kaiser: vmstat show NR_KAISERTABLE as nr_overhead - -The kaiser update made an interesting choice, never to free any shadow -page tables. Contention on global spinlock was worrying, particularly -with it held across page table scans when freeing. Something had to be -done: I was going to add refcounting; but simply never to free them is -an appealing choice, minimizing contention without complicating the code -(the more a page table is found already, the less the spinlock is used). - -But leaking pages in this way is also a worry: can we get away with it? -At the very least, we need a count to show how bad it actually gets: -in principle, one might end up wasting about 1/256 of memory that way -(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512 -for when they are user-mapped from the vmalloc area on another occasion -(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed). - -Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the -shared pgd entries, and 1 for each intermediate page table added -thereafter for user-mapping - but leave out the 1 per mm, for its -shadow pgd, because that distracts from the monotonic increase. -Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled). - -In practice, it doesn't look so bad so far: more like 1/12000 after -nine hours of gtests below; and movable pageblock segregation should -tend to cluster the kaiser tables into a subset of the address space -(if not, they will be bad for compaction too). But production may -tell a different story: keep an eye on this number, and bring back -lighter freeing if it gets out of control (maybe a shrinker). - -["nr_overhead" should of course say "nr_kaisertable", if it needs -to stay; but for the moment we are being coy, preferring that when -Joe Blow notices a new line in his /proc/vmstat, he does not get -too curious about what this "kaiser" stuff might be.] - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 16 +++++++++++----- - include/linux/mmzone.h | 3 ++- - mm/vmstat.c | 1 + - 3 files changed, 14 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 7a7e850..bd22ef5 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) - if (!new_pmd_page) - return NULL; - spin_lock(&shadow_table_allocation_lock); -- if (pud_none(*pud)) -+ if (pud_none(*pud)) { - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); -- else -+ __inc_zone_page_state(virt_to_page((void *) -+ new_pmd_page), NR_KAISERTABLE); -+ } else - free_page(new_pmd_page); - spin_unlock(&shadow_table_allocation_lock); - } -@@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) - if (!new_pte_page) - return NULL; - spin_lock(&shadow_table_allocation_lock); -- if (pmd_none(*pmd)) -+ if (pmd_none(*pmd)) { - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); -- else -+ __inc_zone_page_state(virt_to_page((void *) -+ new_pte_page), NR_KAISERTABLE); -+ } else - free_page(new_pte_page); - spin_unlock(&shadow_table_allocation_lock); - } -@@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(void) - pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); - for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { - pgd_t new_pgd; -- pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); -+ pud_t *pud = pud_alloc_one(&init_mm, -+ PAGE_OFFSET + i * PGDIR_SIZE); - if (!pud) { - WARN_ON(1); - break; - } -+ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE); - new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); - /* - * Make sure not to stomp on some other pgd entry. -diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index 7e273e2..0547d4f 100644 ---- a/include/linux/mmzone.h -+++ b/include/linux/mmzone.h -@@ -124,8 +124,9 @@ enum zone_stat_item { - NR_SLAB_UNRECLAIMABLE, - NR_PAGETABLE, /* used for pagetables */ - NR_KERNEL_STACK_KB, /* measured in KiB */ -- /* Second 128 byte cacheline */ -+ NR_KAISERTABLE, - NR_BOUNCE, -+ /* Second 128 byte cacheline */ - #if IS_ENABLED(CONFIG_ZSMALLOC) - NR_ZSPAGES, /* allocated in zsmalloc */ - #endif -diff --git a/mm/vmstat.c b/mm/vmstat.c -index 604f26a..6a088df 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -932,6 +932,7 @@ const char * const vmstat_text[] = { - "nr_slab_unreclaimable", - "nr_page_table_pages", - "nr_kernel_stack", -+ "nr_overhead", - "nr_bounce", - #if IS_ENABLED(CONFIG_ZSMALLOC) - "nr_zspages", --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch deleted file mode 100644 index 876e4bd9..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 2c1bc0d092e3885ee643c9d5755957a1297b5245 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Fri, 20 Apr 2018 14:23:36 +0200 -Subject: [PATCH 19/93] perf/x86/msr: Fix possible Spectre-v1 indexing in the - MSR driver - -commit 06ce6e9b6d6c09d4129c6e24a1314a395d816c10 upstream. - -> arch/x86/events/msr.c:178 msr_event_init() warn: potential spectre issue 'msr' (local cap) - -Userspace controls @attr, sanitize cfg (attr->config) before using it -to index an array. - -Reported-by: Dan Carpenter <dan.carpenter@oracle.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: <stable@kernel.org> -Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> -Cc: Arnaldo Carvalho de Melo <acme@redhat.com> -Cc: Jiri Olsa <jolsa@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Stephane Eranian <eranian@google.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Vince Weaver <vincent.weaver@maine.edu> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/events/msr.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c -index 4bb3ec6..be0b196 100644 ---- a/arch/x86/events/msr.c -+++ b/arch/x86/events/msr.c -@@ -1,4 +1,5 @@ - #include <linux/perf_event.h> -+#include <linux/nospec.h> - #include <asm/intel-family.h> - - enum perf_msr_id { -@@ -136,9 +137,6 @@ static int msr_event_init(struct perf_event *event) - if (event->attr.type != event->pmu->type) - return -ENOENT; - -- if (cfg >= PERF_MSR_EVENT_MAX) -- return -EINVAL; -- - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || -@@ -149,6 +147,11 @@ static int msr_event_init(struct perf_event *event) - event->attr.sample_period) /* no sampling */ - return -EINVAL; - -+ if (cfg >= PERF_MSR_EVENT_MAX) -+ return -EINVAL; -+ -+ cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); -+ - if (!msr[cfg].attr) - return -EINVAL; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch deleted file mode 100644 index 60269d5c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch +++ /dev/null @@ -1,261 +0,0 @@ -From 53b3bd3747acd3d6633feaa63a998f854d90551c Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@alien8.de> -Date: Sat, 27 Jan 2018 16:24:33 +0000 -Subject: [PATCH 19/42] x86/retpoline: Simplify vmexit_fill_RSB() - -(cherry picked from commit 1dde7415e99933bb7293d6b2843752cbdb43ec11) - -Simplify it to call an asm-function instead of pasting 41 insn bytes at -every call site. Also, add alignment to the macro as suggested here: - - https://support.google.com/faqs/answer/7625886 - -[dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler] - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: ak@linux.intel.com -Cc: dave.hansen@intel.com -Cc: karahmed@amazon.de -Cc: arjan@linux.intel.com -Cc: torvalds@linux-foundation.org -Cc: peterz@infradead.org -Cc: bp@alien8.de -Cc: pbonzini@redhat.com -Cc: tim.c.chen@linux.intel.com -Cc: gregkh@linux-foundation.org -Link: https://lkml.kernel.org/r/1517070274-12128-3-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_32.S | 3 +- - arch/x86/entry/entry_64.S | 3 +- - arch/x86/include/asm/asm-prototypes.h | 3 ++ - arch/x86/include/asm/nospec-branch.h | 70 ++++------------------------------- - arch/x86/lib/Makefile | 1 + - arch/x86/lib/retpoline.S | 56 ++++++++++++++++++++++++++++ - 6 files changed, 71 insertions(+), 65 deletions(-) - -diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S -index a76dc73..f5434b4 100644 ---- a/arch/x86/entry/entry_32.S -+++ b/arch/x86/entry/entry_32.S -@@ -237,7 +237,8 @@ ENTRY(__switch_to_asm) - * exist, overwrite the RSB with entries which capture - * speculative execution to prevent attack. - */ -- FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -+ /* Clobbers %ebx */ -+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW - #endif - - /* restore callee-saved registers */ -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 16146eb..e422e15 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -435,7 +435,8 @@ ENTRY(__switch_to_asm) - * exist, overwrite the RSB with entries which capture - * speculative execution to prevent attack. - */ -- FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -+ /* Clobbers %rbx */ -+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW - #endif - - /* restore callee-saved registers */ -diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h -index 5a25ada..1666542 100644 ---- a/arch/x86/include/asm/asm-prototypes.h -+++ b/arch/x86/include/asm/asm-prototypes.h -@@ -37,4 +37,7 @@ INDIRECT_THUNK(dx) - INDIRECT_THUNK(si) - INDIRECT_THUNK(di) - INDIRECT_THUNK(bp) -+asmlinkage void __fill_rsb(void); -+asmlinkage void __clear_rsb(void); -+ - #endif /* CONFIG_RETPOLINE */ -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 19ecb54..df4ecec 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -7,50 +7,6 @@ - #include <asm/alternative-asm.h> - #include <asm/cpufeatures.h> - --/* -- * Fill the CPU return stack buffer. -- * -- * Each entry in the RSB, if used for a speculative 'ret', contains an -- * infinite 'pause; lfence; jmp' loop to capture speculative execution. -- * -- * This is required in various cases for retpoline and IBRS-based -- * mitigations for the Spectre variant 2 vulnerability. Sometimes to -- * eliminate potentially bogus entries from the RSB, and sometimes -- * purely to ensure that it doesn't get empty, which on some CPUs would -- * allow predictions from other (unwanted!) sources to be used. -- * -- * We define a CPP macro such that it can be used from both .S files and -- * inline assembly. It's possible to do a .macro and then include that -- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. -- */ -- --#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ --#define RSB_FILL_LOOPS 16 /* To avoid underflow */ -- --/* -- * Google experimented with loop-unrolling and this turned out to be -- * the optimal version — two calls, each with their own speculation -- * trap should their return address end up getting used, in a loop. -- */ --#define __FILL_RETURN_BUFFER(reg, nr, sp) \ -- mov $(nr/2), reg; \ --771: \ -- call 772f; \ --773: /* speculation trap */ \ -- pause; \ -- lfence; \ -- jmp 773b; \ --772: \ -- call 774f; \ --775: /* speculation trap */ \ -- pause; \ -- lfence; \ -- jmp 775b; \ --774: \ -- dec reg; \ -- jnz 771b; \ -- add $(BITS_PER_LONG/8) * nr, sp; -- - #ifdef __ASSEMBLY__ - - /* -@@ -121,17 +77,10 @@ - #endif - .endm - -- /* -- * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP -- * monstrosity above, manually. -- */ --.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -+/* This clobbers the BX register */ -+.macro FILL_RETURN_BUFFER nr:req ftr:req - #ifdef CONFIG_RETPOLINE -- ANNOTATE_NOSPEC_ALTERNATIVE -- ALTERNATIVE "jmp .Lskip_rsb_\@", \ -- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ -- \ftr --.Lskip_rsb_\@: -+ ALTERNATIVE "", "call __clear_rsb", \ftr - #endif - .endm - -@@ -206,15 +155,10 @@ extern char __indirect_thunk_end[]; - static inline void vmexit_fill_RSB(void) - { - #ifdef CONFIG_RETPOLINE -- unsigned long loops; -- -- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE -- ALTERNATIVE("jmp 910f", -- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), -- X86_FEATURE_RETPOLINE) -- "910:" -- : "=r" (loops), ASM_CALL_CONSTRAINT -- : : "memory" ); -+ alternative_input("", -+ "call __fill_rsb", -+ X86_FEATURE_RETPOLINE, -+ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); - #endif - } - -diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile -index 6bf1898..4ad7c4d 100644 ---- a/arch/x86/lib/Makefile -+++ b/arch/x86/lib/Makefile -@@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o - lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o - lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o - lib-$(CONFIG_RETPOLINE) += retpoline.o -+OBJECT_FILES_NON_STANDARD_retpoline.o :=y - - obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o - -diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S -index c909961..480edc3 100644 ---- a/arch/x86/lib/retpoline.S -+++ b/arch/x86/lib/retpoline.S -@@ -7,6 +7,7 @@ - #include <asm/alternative-asm.h> - #include <asm/export.h> - #include <asm/nospec-branch.h> -+#include <asm/bitsperlong.h> - - .macro THUNK reg - .section .text.__x86.indirect_thunk -@@ -46,3 +47,58 @@ GENERATE_THUNK(r13) - GENERATE_THUNK(r14) - GENERATE_THUNK(r15) - #endif -+ -+/* -+ * Fill the CPU return stack buffer. -+ * -+ * Each entry in the RSB, if used for a speculative 'ret', contains an -+ * infinite 'pause; lfence; jmp' loop to capture speculative execution. -+ * -+ * This is required in various cases for retpoline and IBRS-based -+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to -+ * eliminate potentially bogus entries from the RSB, and sometimes -+ * purely to ensure that it doesn't get empty, which on some CPUs would -+ * allow predictions from other (unwanted!) sources to be used. -+ * -+ * Google experimented with loop-unrolling and this turned out to be -+ * the optimal version - two calls, each with their own speculation -+ * trap should their return address end up getting used, in a loop. -+ */ -+.macro STUFF_RSB nr:req sp:req -+ mov $(\nr / 2), %_ASM_BX -+ .align 16 -+771: -+ call 772f -+773: /* speculation trap */ -+ pause -+ lfence -+ jmp 773b -+ .align 16 -+772: -+ call 774f -+775: /* speculation trap */ -+ pause -+ lfence -+ jmp 775b -+ .align 16 -+774: -+ dec %_ASM_BX -+ jnz 771b -+ add $((BITS_PER_LONG/8) * \nr), \sp -+.endm -+ -+#define RSB_FILL_LOOPS 16 /* To avoid underflow */ -+ -+ENTRY(__fill_rsb) -+ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP -+ ret -+END(__fill_rsb) -+EXPORT_SYMBOL_GPL(__fill_rsb) -+ -+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -+ -+ENTRY(__clear_rsb) -+ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP -+ ret -+END(__clear_rsb) -+EXPORT_SYMBOL_GPL(__clear_rsb) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch deleted file mode 100644 index 731a182a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch +++ /dev/null @@ -1,352 +0,0 @@ -From b70d7889c078c97d11ae6412760f3231fda324cd Mon Sep 17 00:00:00 2001 -From: Ashok Raj <ashok.raj@intel.com> -Date: Thu, 1 Feb 2018 22:59:43 +0100 -Subject: [PATCH 20/33] KVM/x86: Add IBPB support - -(cherry picked from commit 15d45071523d89b3fb7372e2135fbd72f6af9506) - -The Indirect Branch Predictor Barrier (IBPB) is an indirect branch -control mechanism. It keeps earlier branches from influencing -later ones. - -Unlike IBRS and STIBP, IBPB does not define a new mode of operation. -It's a command that ensures predicted branch targets aren't used after -the barrier. Although IBRS and IBPB are enumerated by the same CPUID -enumeration, IBPB is very different. - -IBPB helps mitigate against three potential attacks: - -* Mitigate guests from being attacked by other guests. - - This is addressed by issing IBPB when we do a guest switch. - -* Mitigate attacks from guest/ring3->host/ring3. - These would require a IBPB during context switch in host, or after - VMEXIT. The host process has two ways to mitigate - - Either it can be compiled with retpoline - - If its going through context switch, and has set !dumpable then - there is a IBPB in that path. - (Tim's patch: https://patchwork.kernel.org/patch/10192871) - - The case where after a VMEXIT you return back to Qemu might make - Qemu attackable from guest when Qemu isn't compiled with retpoline. - There are issues reported when doing IBPB on every VMEXIT that resulted - in some tsc calibration woes in guest. - -* Mitigate guest/ring0->host/ring0 attacks. - When host kernel is using retpoline it is safe against these attacks. - If host kernel isn't using retpoline we might need to do a IBPB flush on - every VMEXIT. - -Even when using retpoline for indirect calls, in certain conditions 'ret' -can use the BTB on Skylake-era CPUs. There are other mitigations -available like RSB stuffing/clearing. - -* IBPB is issued only for SVM during svm_free_vcpu(). - VMX has a vmclear and SVM doesn't. Follow discussion here: - https://lkml.org/lkml/2018/1/15/146 - -Please refer to the following spec for more details on the enumeration -and control. - -Refer here to get documentation about mitigations. - -https://software.intel.com/en-us/side-channel-security-support - -[peterz: rebase and changelog rewrite] -[karahmed: - rebase - - vmx: expose PRED_CMD if guest has it in CPUID - - svm: only pass through IBPB if guest has it in CPUID - - vmx: support !cpu_has_vmx_msr_bitmap()] - - vmx: support nested] -[dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS) - PRED_CMD is a write-only MSR] - -Signed-off-by: Ashok Raj <ashok.raj@intel.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Andrea Arcangeli <aarcange@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: kvm@vger.kernel.org -Cc: Asit Mallick <asit.k.mallick@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: Jun Nakajima <jun.nakajima@intel.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com -Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/cpuid.c | 11 +++++++- - arch/x86/kvm/cpuid.h | 12 ++++++++ - arch/x86/kvm/svm.c | 28 +++++++++++++++++++ - arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++-- - 4 files changed, 127 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index afa7bbb..42323be 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | - 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); - -+ /* cpuid 0x80000008.ebx */ -+ const u32 kvm_cpuid_8000_0008_ebx_x86_features = -+ F(IBPB); -+ - /* cpuid 0xC0000001.edx */ - const u32 kvm_cpuid_C000_0001_edx_x86_features = - F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | -@@ -607,7 +611,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - if (!g_phys_as) - g_phys_as = phys_as; - entry->eax = g_phys_as | (virt_as << 8); -- entry->ebx = entry->edx = 0; -+ entry->edx = 0; -+ /* IBPB isn't necessarily present in hardware cpuid */ -+ if (boot_cpu_has(X86_FEATURE_IBPB)) -+ entry->ebx |= F(IBPB); -+ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; -+ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - break; - } - case 0x80000019: -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index 35058c2..f4a2a1a 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -152,6 +152,18 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) - return best && (best->edx & bit(X86_FEATURE_RDTSCP)); - } - -+static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) -+{ -+ struct kvm_cpuid_entry2 *best; -+ -+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); -+ if (best && (best->ebx & bit(X86_FEATURE_IBPB))) -+ return true; -+ best = kvm_find_cpuid_entry(vcpu, 7, 0); -+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); -+} -+ -+ - /* - * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 - */ -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 491f077..43e45b9 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -248,6 +248,7 @@ static const struct svm_direct_access_msrs { - { .index = MSR_CSTAR, .always = true }, - { .index = MSR_SYSCALL_MASK, .always = true }, - #endif -+ { .index = MSR_IA32_PRED_CMD, .always = false }, - { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, - { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, - { .index = MSR_IA32_LASTINTFROMIP, .always = false }, -@@ -510,6 +511,7 @@ struct svm_cpu_data { - struct kvm_ldttss_desc *tss_desc; - - struct page *save_area; -+ struct vmcb *current_vmcb; - }; - - static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); -@@ -1641,11 +1643,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) - __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, svm); -+ /* -+ * The vmcb page can be recycled, causing a false negative in -+ * svm_vcpu_load(). So do a full IBPB now. -+ */ -+ indirect_branch_prediction_barrier(); - } - - static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) - { - struct vcpu_svm *svm = to_svm(vcpu); -+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu); - int i; - - if (unlikely(cpu != vcpu->cpu)) { -@@ -1674,6 +1682,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) - if (static_cpu_has(X86_FEATURE_RDTSCP)) - wrmsrl(MSR_TSC_AUX, svm->tsc_aux); - -+ if (sd->current_vmcb != svm->vmcb) { -+ sd->current_vmcb = svm->vmcb; -+ indirect_branch_prediction_barrier(); -+ } - avic_vcpu_load(vcpu, cpu); - } - -@@ -3587,6 +3599,22 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr); - break; -+ case MSR_IA32_PRED_CMD: -+ if (!msr->host_initiated && -+ !guest_cpuid_has_ibpb(vcpu)) -+ return 1; -+ -+ if (data & ~PRED_CMD_IBPB) -+ return 1; -+ -+ if (!data) -+ break; -+ -+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); -+ if (is_guest_mode(vcpu)) -+ break; -+ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); -+ break; - case MSR_STAR: - svm->vmcb->save.star = data; - break; -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index c6a7563..855df75 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -550,6 +550,7 @@ struct vcpu_vmx { - u64 msr_host_kernel_gs_base; - u64 msr_guest_kernel_gs_base; - #endif -+ - u32 vm_entry_controls_shadow; - u32 vm_exit_controls_shadow; - /* -@@ -911,6 +912,8 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); - static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); - static int alloc_identity_pagetable(struct kvm *kvm); - static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); -+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, -+ u32 msr, int type); - - static DEFINE_PER_CPU(struct vmcs *, vmxarea); - static DEFINE_PER_CPU(struct vmcs *, current_vmcs); -@@ -1841,6 +1844,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) - vmcs_write32(EXCEPTION_BITMAP, eb); - } - -+/* -+ * Check if MSR is intercepted for L01 MSR bitmap. -+ */ -+static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) -+{ -+ unsigned long *msr_bitmap; -+ int f = sizeof(unsigned long); -+ -+ if (!cpu_has_vmx_msr_bitmap()) -+ return true; -+ -+ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; -+ -+ if (msr <= 0x1fff) { -+ return !!test_bit(msr, msr_bitmap + 0x800 / f); -+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { -+ msr &= 0x1fff; -+ return !!test_bit(msr, msr_bitmap + 0xc00 / f); -+ } -+ -+ return true; -+} -+ - static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, - unsigned long entry, unsigned long exit) - { -@@ -2252,6 +2278,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { - per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; - vmcs_load(vmx->loaded_vmcs->vmcs); -+ indirect_branch_prediction_barrier(); - } - - if (!already_loaded) { -@@ -3048,6 +3075,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr_info); - break; -+ case MSR_IA32_PRED_CMD: -+ if (!msr_info->host_initiated && -+ !guest_cpuid_has_ibpb(vcpu)) -+ return 1; -+ -+ if (data & ~PRED_CMD_IBPB) -+ return 1; -+ -+ if (!data) -+ break; -+ -+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); -+ -+ /* -+ * For non-nested: -+ * When it's written (to non-zero) for the first time, pass -+ * it through. -+ * -+ * For nested: -+ * The handling of the MSR bitmap for L2 guests is done in -+ * nested_vmx_merge_msr_bitmap. We should not touch the -+ * vmcs02.msr_bitmap here since it gets completely overwritten -+ * in the merging. -+ */ -+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, -+ MSR_TYPE_W); -+ break; - case MSR_IA32_CR_PAT: - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) -@@ -9406,9 +9460,23 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - struct page *page; - unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; -+ /* -+ * pred_cmd is trying to verify two things: -+ * -+ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This -+ * ensures that we do not accidentally generate an L02 MSR bitmap -+ * from the L12 MSR bitmap that is too permissive. -+ * 2. That L1 or L2s have actually used the MSR. This avoids -+ * unnecessarily merging of the bitmap if the MSR is unused. This -+ * works properly because we only update the L01 MSR bitmap lazily. -+ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only -+ * updated to reflect this when L1 (or its L2s) actually write to -+ * the MSR. -+ */ -+ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - -- /* This shortcut is ok because we support only x2APIC MSRs so far. */ -- if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) -+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && -+ !pred_cmd) - return false; - - page = nested_get_page(vcpu, vmcs12->msr_bitmap); -@@ -9443,6 +9511,13 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - MSR_TYPE_W); - } - } -+ -+ if (pred_cmd) -+ nested_vmx_disable_intercept_for_msr( -+ msr_bitmap_l1, msr_bitmap_l0, -+ MSR_IA32_PRED_CMD, -+ MSR_TYPE_W); -+ - kunmap(page); - nested_release_page_clean(page); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch deleted file mode 100644 index 1cff10af..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch +++ /dev/null @@ -1,424 +0,0 @@ -From d26480ad859d58897cd409ed66ff4bc5e3ba079d Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Wed, 30 Aug 2017 16:23:00 -0700 -Subject: [PATCH 020/103] kaiser: enhanced by kernel and user PCIDs - -Merged performance improvements to Kaiser, using distinct kernel -and user Process Context Identifiers to minimize the TLB flushing. - -[This work actually all from Dave Hansen 2017-08-30: -still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.] - -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 10 ++++-- - arch/x86/entry/entry_64_compat.S | 1 + - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/kaiser.h | 15 +++++++-- - arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++++ - arch/x86/include/asm/tlbflush.h | 52 ++++++++++++++++++++++++----- - arch/x86/include/uapi/asm/processor-flags.h | 3 +- - arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++++ - arch/x86/kvm/x86.c | 3 +- - arch/x86/mm/kaiser.c | 7 ++++ - arch/x86/mm/tlb.c | 46 +++++++++++++++++++++++-- - 11 files changed, 181 insertions(+), 17 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index df33f10..4a0ebf4 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1315,7 +1315,10 @@ ENTRY(nmi) - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax - pushq %rax -- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax -+ /* mask off "user" bit of pgd address and 12 PCID bits: */ -+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -+ /* Add back kernel PCID and "no flush" bit */ -+ orq X86_CR3_PCID_KERN_VAR, %rax - movq %rax, %cr3 - #endif - call do_nmi -@@ -1556,7 +1559,10 @@ end_repeat_nmi: - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax - pushq %rax -- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax -+ /* mask off "user" bit of pgd address and 12 PCID bits: */ -+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -+ /* Add back kernel PCID and "no flush" bit */ -+ orq X86_CR3_PCID_KERN_VAR, %rax - movq %rax, %cr3 - #endif - -diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S -index f0e384e..0eb5801 100644 ---- a/arch/x86/entry/entry_64_compat.S -+++ b/arch/x86/entry/entry_64_compat.S -@@ -13,6 +13,7 @@ - #include <asm/irqflags.h> - #include <asm/asm.h> - #include <asm/smap.h> -+#include <asm/pgtable_types.h> - #include <asm/kaiser.h> - #include <linux/linkage.h> - #include <linux/err.h> -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index ed10b5b..dc50883 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -189,6 +189,7 @@ - - #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ - #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */ - - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ - #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index e0fc45e..360ff3b 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -1,5 +1,8 @@ - #ifndef _ASM_X86_KAISER_H - #define _ASM_X86_KAISER_H -+ -+#include <uapi/asm/processor-flags.h> /* For PCID constants */ -+ - /* - * This file includes the definitions for the KAISER feature. - * KAISER is a counter measure against x86_64 side channel attacks on -@@ -21,13 +24,21 @@ - - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg --andq $(~KAISER_SHADOW_PGD_OFFSET), \reg -+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -+orq X86_CR3_PCID_KERN_VAR, \reg - movq \reg, %cr3 - .endm - - .macro _SWITCH_TO_USER_CR3 reg - movq %cr3, \reg --orq $(KAISER_SHADOW_PGD_OFFSET), \reg -+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -+/* -+ * This can obviously be one instruction by putting the -+ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. -+ * But, just leave it now for simplicity. -+ */ -+orq X86_CR3_PCID_USER_VAR, \reg -+orq $(KAISER_SHADOW_PGD_OFFSET), \reg - movq \reg, %cr3 - .endm - -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index 8bc8d02..ada77fd 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -141,6 +141,32 @@ - _PAGE_SOFT_DIRTY) - #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) - -+/* The ASID is the lower 12 bits of CR3 */ -+#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL)) -+ -+/* Mask for all the PCID-related bits in CR3: */ -+#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) -+#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) -+#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) -+#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) -+ -+#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) -+#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) -+#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) -+#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) -+#else -+#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) -+#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) -+/* -+ * PCIDs are unsupported on 32-bit and none of these bits can be -+ * set in CR3: -+ */ -+#define X86_CR3_PCID_KERN_FLUSH (0) -+#define X86_CR3_PCID_USER_FLUSH (0) -+#define X86_CR3_PCID_KERN_NOFLUSH (0) -+#define X86_CR3_PCID_USER_NOFLUSH (0) -+#endif -+ - /* - * The cache modes defined here are used to translate between pure SW usage - * and the HW defined cache mode bits and/or PAT entries. -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index c13041e..28b4182 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -12,7 +12,6 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) - { - struct { u64 d[2]; } desc = { { pcid, addr } }; -- - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global -@@ -135,14 +134,25 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) - - static inline void __native_flush_tlb(void) - { -+ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { -+ /* -+ * If current->mm == NULL then we borrow a mm which may change during a -+ * task switch and therefore we must not be preempted while we write CR3 -+ * back: -+ */ -+ preempt_disable(); -+ native_write_cr3(native_read_cr3()); -+ preempt_enable(); -+ return; -+ } - /* -- * If current->mm == NULL then we borrow a mm which may change during a -- * task switch and therefore we must not be preempted while we write CR3 -- * back: -+ * We are no longer using globals with KAISER, so a -+ * "nonglobals" flush would work too. But, this is more -+ * conservative. -+ * -+ * Note, this works with CR4.PCIDE=0 or 1. - */ -- preempt_disable(); -- native_write_cr3(native_read_cr3()); -- preempt_enable(); -+ invpcid_flush_all(); - } - - static inline void __native_flush_tlb_global_irq_disabled(void) -@@ -164,6 +174,8 @@ static inline void __native_flush_tlb_global(void) - /* - * Using INVPCID is considerably faster than a pair of writes - * to CR4 sandwiched inside an IRQ flag save/restore. -+ * -+ * Note, this works with CR4.PCIDE=0 or 1. - */ - invpcid_flush_all(); - return; -@@ -183,7 +195,31 @@ static inline void __native_flush_tlb_global(void) - - static inline void __native_flush_tlb_single(unsigned long addr) - { -- asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); -+ /* -+ * SIMICS #GP's if you run INVPCID with type 2/3 -+ * and X86_CR4_PCIDE clear. Shame! -+ * -+ * The ASIDs used below are hard-coded. But, we must not -+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call -+ * invpcid in the case we are called early. -+ */ -+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { -+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); -+ return; -+ } -+ /* Flush the address out of both PCIDs. */ -+ /* -+ * An optimization here might be to determine addresses -+ * that are only kernel-mapped and only flush the kernel -+ * ASID. But, userspace flushes are probably much more -+ * important performance-wise. -+ * -+ * Make sure to do only a single invpcid when KAISER is -+ * disabled and we have only a single ASID. -+ */ -+ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) -+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); -+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); - } - - static inline void __flush_tlb_all(void) -diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h -index 567de50..6768d13 100644 ---- a/arch/x86/include/uapi/asm/processor-flags.h -+++ b/arch/x86/include/uapi/asm/processor-flags.h -@@ -77,7 +77,8 @@ - #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) - #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ - #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) --#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ -+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ -+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) - - /* - * Intel CPU features in CR4 -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 3efde13..b4c0ae5 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -324,11 +324,45 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) - } - } - -+/* -+ * These can have bit 63 set, so we can not just use a plain "or" -+ * instruction to get their value or'd into CR3. It would take -+ * another register. So, we use a memory reference to these -+ * instead. -+ * -+ * This is also handy because systems that do not support -+ * PCIDs just end up or'ing a 0 into their CR3, which does -+ * no harm. -+ */ -+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; -+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; -+ - static void setup_pcid(struct cpuinfo_x86 *c) - { - if (cpu_has(c, X86_FEATURE_PCID)) { - if (cpu_has(c, X86_FEATURE_PGE)) { - cr4_set_bits(X86_CR4_PCIDE); -+ /* -+ * These variables are used by the entry/exit -+ * code to change PCIDs. -+ */ -+#ifdef CONFIG_KAISER -+ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; -+ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; -+#endif -+ /* -+ * INVPCID has two "groups" of types: -+ * 1/2: Invalidate an individual address -+ * 3/4: Invalidate all contexts -+ * -+ * 1/2 take a PCID, but 3/4 do not. So, 3/4 -+ * ignore the PCID argument in the descriptor. -+ * But, we have to be careful not to call 1/2 -+ * with an actual non-zero PCID in them before -+ * we do the above cr4_set_bits(). -+ */ -+ if (cpu_has(c, X86_FEATURE_INVPCID)) -+ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE); - } else { - /* - * flush_tlb_all(), as currently implemented, won't -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index e5bc139..51a700a 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) - return 1; - - /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ -- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) -+ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) || -+ !is_long_mode(vcpu)) - return 1; - } - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index bd22ef5..f5c75f7 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(void) - } while (0) - - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; -+extern unsigned long X86_CR3_PCID_KERN_VAR; -+extern unsigned long X86_CR3_PCID_USER_VAR; - /* - * If anything in here fails, we will likely die on one of the - * first kernel->user transitions and init will die. But, we -@@ -289,6 +291,11 @@ void __init kaiser_init(void) - kaiser_add_user_map_early(&debug_idt_table, - sizeof(gate_desc) * NR_VECTORS, - __PAGE_KERNEL); -+ -+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, -+ __PAGE_KERNEL); -+ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, -+ __PAGE_KERNEL); - } - - /* Add a mapping to the shadow mapping, and synchronize the mappings */ -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index a7655f6..a376246 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -36,6 +36,46 @@ struct flush_tlb_info { - unsigned long flush_end; - }; - -+static void load_new_mm_cr3(pgd_t *pgdir) -+{ -+ unsigned long new_mm_cr3 = __pa(pgdir); -+ -+ /* -+ * KAISER, plus PCIDs needs some extra work here. But, -+ * if either of features is not present, we need no -+ * PCIDs here and just do a normal, full TLB flush with -+ * the write_cr3() -+ */ -+ if (!IS_ENABLED(CONFIG_KAISER) || -+ !cpu_feature_enabled(X86_FEATURE_PCID)) -+ goto out_set_cr3; -+ /* -+ * We reuse the same PCID for different tasks, so we must -+ * flush all the entires for the PCID out when we change -+ * tasks. -+ */ -+ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); -+ -+ /* -+ * The flush from load_cr3() may leave old TLB entries -+ * for userspace in place. We must flush that context -+ * separately. We can theoretically delay doing this -+ * until we actually load up the userspace CR3, but -+ * that's a bit tricky. We have to have the "need to -+ * flush userspace PCID" bit per-cpu and check it in the -+ * exit-to-userspace paths. -+ */ -+ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); -+ -+out_set_cr3: -+ /* -+ * Caution: many callers of this function expect -+ * that load_cr3() is serializing and orders TLB -+ * fills with respect to the mm_cpumask writes. -+ */ -+ write_cr3(new_mm_cr3); -+} -+ - /* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. -@@ -47,7 +87,7 @@ void leave_mm(int cpu) - BUG(); - if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { - cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); -- load_cr3(swapper_pg_dir); -+ load_new_mm_cr3(swapper_pg_dir); - /* - * This gets called in the idle path where RCU - * functions differently. Tracing normally -@@ -126,7 +166,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - * ordering guarantee we need. - * - */ -- load_cr3(next->pgd); -+ load_new_mm_cr3(next->pgd); - - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - -@@ -175,7 +215,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - * As above, load_cr3() is serializing and orders TLB - * fills with respect to the mm_cpumask write. - */ -- load_cr3(next->pgd); -+ load_new_mm_cr3(next->pgd); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - load_mm_cr4(next); - load_mm_ldt(next); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch deleted file mode 100644 index c4c48d56..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch +++ /dev/null @@ -1,59 +0,0 @@ -From faf22307f64c353212c5c132f45f5e7414cea4bf Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra <peterz@infradead.org> -Date: Fri, 20 Apr 2018 14:08:58 +0200 -Subject: [PATCH 20/93] perf/x86: Fix possible Spectre-v1 indexing for - x86_pmu::event_map() - -commit 46b1b577229a091b137831becaa0fae8690ee15a upstream. - -> arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) -> arch/x86/events/intel/core.c:337 intel_pmu_event_map() warn: potential spectre issue 'intel_perfmon_event_map' -> arch/x86/events/intel/knc.c:122 knc_pmu_event_map() warn: potential spectre issue 'knc_perfmon_event_map' -> arch/x86/events/intel/p4.c:722 p4_pmu_event_map() warn: potential spectre issue 'p4_general_events' -> arch/x86/events/intel/p6.c:116 p6_pmu_event_map() warn: potential spectre issue 'p6_perfmon_event_map' -> arch/x86/events/amd/core.c:132 amd_pmu_event_map() warn: potential spectre issue 'amd_perfmon_event_map' - -Userspace controls @attr, sanitize @attr->config before passing it on -to x86_pmu::event_map(). - -Reported-by: Dan Carpenter <dan.carpenter@oracle.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: <stable@kernel.org> -Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> -Cc: Arnaldo Carvalho de Melo <acme@redhat.com> -Cc: Jiri Olsa <jolsa@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Stephane Eranian <eranian@google.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Vince Weaver <vincent.weaver@maine.edu> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/events/core.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c -index 6b955e3..d36ada3 100644 ---- a/arch/x86/events/core.c -+++ b/arch/x86/events/core.c -@@ -26,6 +26,7 @@ - #include <linux/cpu.h> - #include <linux/bitops.h> - #include <linux/device.h> -+#include <linux/nospec.h> - - #include <asm/apic.h> - #include <asm/stacktrace.h> -@@ -423,6 +424,8 @@ int x86_setup_perfctr(struct perf_event *event) - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - -+ attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); -+ - /* - * The generic map: - */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch deleted file mode 100644 index 9a62cf34..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 3ae5467002f15c1915b67a45af81dded8b451533 Mon Sep 17 00:00:00 2001 -From: Dou Liyang <douly.fnst@cn.fujitsu.com> -Date: Tue, 30 Jan 2018 14:13:50 +0800 -Subject: [PATCH 20/42] x86/spectre: Check CONFIG_RETPOLINE in command line - parser - -(cherry picked from commit 9471eee9186a46893726e22ebb54cade3f9bc043) - -The spectre_v2 option 'auto' does not check whether CONFIG_RETPOLINE is -enabled. As a consequence it fails to emit the appropriate warning and sets -feature flags which have no effect at all. - -Add the missing IS_ENABLED() check. - -Fixes: da285121560e ("x86/spectre: Add boot time option to select Spectre v2 mitigation") -Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: ak@linux.intel.com -Cc: peterz@infradead.org -Cc: Tomohiro <misono.tomohiro@jp.fujitsu.com> -Cc: dave.hansen@intel.com -Cc: bp@alien8.de -Cc: arjan@linux.intel.com -Cc: dwmw@amazon.co.uk -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/f5892721-7528-3647-08fb-f8d10e65ad87@cn.fujitsu.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 3a06718..51624c6 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -212,10 +212,10 @@ static void __init spectre_v2_select_mitigation(void) - return; - - case SPECTRE_V2_CMD_FORCE: -- /* FALLTRHU */ - case SPECTRE_V2_CMD_AUTO: -- goto retpoline_auto; -- -+ if (IS_ENABLED(CONFIG_RETPOLINE)) -+ goto retpoline_auto; -+ break; - case SPECTRE_V2_CMD_RETPOLINE_AMD: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_amd; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch deleted file mode 100644 index 538a1137..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch +++ /dev/null @@ -1,156 +0,0 @@ -From dc7636423649302a329856f238df8820b9c7dc28 Mon Sep 17 00:00:00 2001 -From: KarimAllah Ahmed <karahmed@amazon.de> -Date: Thu, 1 Feb 2018 22:59:44 +0100 -Subject: [PATCH 21/33] KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES - -(cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd) - -Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO -(bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the -contents will come directly from the hardware, but user-space can still -override it. - -[dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional] - -Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> -Reviewed-by: Darren Kenny <darren.kenny@oracle.com> -Reviewed-by: Jim Mattson <jmattson@google.com> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Andrea Arcangeli <aarcange@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Jun Nakajima <jun.nakajima@intel.com> -Cc: kvm@vger.kernel.org -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Asit Mallick <asit.k.mallick@intel.com> -Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Ashok Raj <ashok.raj@intel.com> -Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/cpuid.c | 8 +++++++- - arch/x86/kvm/cpuid.h | 8 ++++++++ - arch/x86/kvm/vmx.c | 15 +++++++++++++++ - arch/x86/kvm/x86.c | 1 + - 4 files changed, 31 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 42323be..4d3555b 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - /* cpuid 7.0.ecx*/ - const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; - -+ /* cpuid 7.0.edx*/ -+ const u32 kvm_cpuid_7_0_edx_x86_features = -+ F(ARCH_CAPABILITIES); -+ - /* all calls to cpuid_count() should be made on the same cpu */ - get_cpu(); - -@@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - /* PKU is not yet implemented for shadow paging. */ - if (!tdp_enabled) - entry->ecx &= ~F(PKU); -+ entry->edx &= kvm_cpuid_7_0_edx_x86_features; -+ cpuid_mask(&entry->edx, CPUID_7_EDX); - } else { - entry->ebx = 0; - entry->ecx = 0; -+ entry->edx = 0; - } - entry->eax = 0; -- entry->edx = 0; - break; - } - case 9: -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index f4a2a1a..a69906c 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -163,6 +163,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) - return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); - } - -+static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) -+{ -+ struct kvm_cpuid_entry2 *best; -+ -+ best = kvm_find_cpuid_entry(vcpu, 7, 0); -+ return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); -+} -+ - - /* - * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 855df75..d8e3c02 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -551,6 +551,8 @@ struct vcpu_vmx { - u64 msr_guest_kernel_gs_base; - #endif - -+ u64 arch_capabilities; -+ - u32 vm_entry_controls_shadow; - u32 vm_exit_controls_shadow; - /* -@@ -2976,6 +2978,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - case MSR_IA32_TSC: - msr_info->data = guest_read_tsc(vcpu); - break; -+ case MSR_IA32_ARCH_CAPABILITIES: -+ if (!msr_info->host_initiated && -+ !guest_cpuid_has_arch_capabilities(vcpu)) -+ return 1; -+ msr_info->data = to_vmx(vcpu)->arch_capabilities; -+ break; - case MSR_IA32_SYSENTER_CS: - msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); - break; -@@ -3102,6 +3110,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, - MSR_TYPE_W); - break; -+ case MSR_IA32_ARCH_CAPABILITIES: -+ if (!msr_info->host_initiated) -+ return 1; -+ vmx->arch_capabilities = data; -+ break; - case MSR_IA32_CR_PAT: - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) -@@ -5173,6 +5186,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) - ++vmx->nmsrs; - } - -+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) -+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); - - vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index abbb37a..d01742e 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { - #endif - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, -+ MSR_IA32_ARCH_CAPABILITIES - }; - - static unsigned num_msrs_to_save; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch deleted file mode 100644 index 25d3516f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch +++ /dev/null @@ -1,403 +0,0 @@ -From 3fcc7d8f13c49e10eaab57657cee9d42eb1b5fe7 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Thu, 17 Aug 2017 15:00:37 -0700 -Subject: [PATCH 021/103] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush - user - -We have many machines (Westmere, Sandybridge, Ivybridge) supporting -PCID but not INVPCID: on these load_new_mm_cr3() simply crashed. - -Flushing user context inside load_new_mm_cr3() without the use of -invpcid is difficult: momentarily switch from kernel to user context -and back to do so? I'm not sure whether that can be safely done at -all, and would risk polluting user context with kernel internals, -and kernel context with stale user externals. - -Instead, follow the hint in the comment that was there: change -X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3() -can leave a note in it, for SWITCH_USER_CR3 on return to userspace to -flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH. - -Which works well enough that there's no need to do it this way only -when invpcid is unsupported: it's a good alternative to invpcid here. -But there's a couple of inlines in asm/tlbflush.h that need to do the -same trick, so it's best to localize all this per-cpu business in -mm/kaiser.c: moving that part of the initialization from setup_pcid() -to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the -function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a -KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit. - -I did try to make the feature tests in asm/tlbflush.h more consistent -with each other: there seem to be far too many ways of performing such -tests, and I don't have a good grasp of their differences. At first -I converted them all to be static_cpu_has(): but that proved to be a -mistake, as the comment in __native_flush_tlb_single() hints; so then -I reversed and made them all this_cpu_has(). Probably all gratuitous -change, but that's the way it's working at present. - -I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR -gets re-initialized by each cpu (before and after these changes): -no problem when (as usual) all cpus on a machine have the same -features, but in principle incorrect. However, my experiment -to per-cpu-ify that one did not end well... - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/kaiser.h | 18 +++++++------ - arch/x86/include/asm/tlbflush.h | 56 ++++++++++++++++++++++++++++------------- - arch/x86/kernel/cpu/common.c | 22 +--------------- - arch/x86/mm/kaiser.c | 50 +++++++++++++++++++++++++++++++----- - arch/x86/mm/tlb.c | 46 +++++++++++++-------------------- - 5 files changed, 113 insertions(+), 79 deletions(-) - -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 360ff3b..009bca5 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -32,13 +32,12 @@ movq \reg, %cr3 - .macro _SWITCH_TO_USER_CR3 reg - movq %cr3, \reg - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg --/* -- * This can obviously be one instruction by putting the -- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. -- * But, just leave it now for simplicity. -- */ --orq X86_CR3_PCID_USER_VAR, \reg --orq $(KAISER_SHADOW_PGD_OFFSET), \reg -+orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg -+js 9f -+// FLUSH this time, reset to NOFLUSH for next time -+// But if nopcid? Consider using 0x80 for user pcid? -+movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) -+9: - movq \reg, %cr3 - .endm - -@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - */ - DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - -+extern unsigned long X86_CR3_PCID_KERN_VAR; -+DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); -+ -+extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; -+ - /** - * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping - * @addr: the start address of the range -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 28b4182..4fff696 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -12,6 +12,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) - { - struct { u64 d[2]; } desc = { { pcid, addr } }; -+ - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global -@@ -132,27 +133,42 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) - cr4_set_bits(mask); - } - -+/* -+ * Declare a couple of kaiser interfaces here for convenience, -+ * to avoid the need for asm/kaiser.h in unexpected places. -+ */ -+#ifdef CONFIG_KAISER -+extern void kaiser_setup_pcid(void); -+extern void kaiser_flush_tlb_on_return_to_user(void); -+#else -+static inline void kaiser_setup_pcid(void) -+{ -+} -+static inline void kaiser_flush_tlb_on_return_to_user(void) -+{ -+} -+#endif -+ - static inline void __native_flush_tlb(void) - { -- if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { -+ if (this_cpu_has(X86_FEATURE_INVPCID)) { - /* -- * If current->mm == NULL then we borrow a mm which may change during a -- * task switch and therefore we must not be preempted while we write CR3 -- * back: -+ * Note, this works with CR4.PCIDE=0 or 1. - */ -- preempt_disable(); -- native_write_cr3(native_read_cr3()); -- preempt_enable(); -+ invpcid_flush_all_nonglobals(); - return; - } -+ - /* -- * We are no longer using globals with KAISER, so a -- * "nonglobals" flush would work too. But, this is more -- * conservative. -- * -- * Note, this works with CR4.PCIDE=0 or 1. -+ * If current->mm == NULL then we borrow a mm which may change during a -+ * task switch and therefore we must not be preempted while we write CR3 -+ * back: - */ -- invpcid_flush_all(); -+ preempt_disable(); -+ if (this_cpu_has(X86_FEATURE_PCID)) -+ kaiser_flush_tlb_on_return_to_user(); -+ native_write_cr3(native_read_cr3()); -+ preempt_enable(); - } - - static inline void __native_flush_tlb_global_irq_disabled(void) -@@ -168,9 +184,13 @@ static inline void __native_flush_tlb_global_irq_disabled(void) - - static inline void __native_flush_tlb_global(void) - { -+#ifdef CONFIG_KAISER -+ /* Globals are not used at all */ -+ __native_flush_tlb(); -+#else - unsigned long flags; - -- if (static_cpu_has(X86_FEATURE_INVPCID)) { -+ if (this_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Using INVPCID is considerably faster than a pair of writes - * to CR4 sandwiched inside an IRQ flag save/restore. -@@ -187,10 +207,9 @@ static inline void __native_flush_tlb_global(void) - * be called from deep inside debugging code.) - */ - raw_local_irq_save(flags); -- - __native_flush_tlb_global_irq_disabled(); -- - raw_local_irq_restore(flags); -+#endif - } - - static inline void __native_flush_tlb_single(unsigned long addr) -@@ -201,9 +220,12 @@ static inline void __native_flush_tlb_single(unsigned long addr) - * - * The ASIDs used below are hard-coded. But, we must not - * call invpcid(type=1/2) before CR4.PCIDE=1. Just call -- * invpcid in the case we are called early. -+ * invlpg in the case we are called early. - */ -+ - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { -+ if (this_cpu_has(X86_FEATURE_PCID)) -+ kaiser_flush_tlb_on_return_to_user(); - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - return; - } -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index b4c0ae5..e6be5f3 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -324,33 +324,12 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) - } - } - --/* -- * These can have bit 63 set, so we can not just use a plain "or" -- * instruction to get their value or'd into CR3. It would take -- * another register. So, we use a memory reference to these -- * instead. -- * -- * This is also handy because systems that do not support -- * PCIDs just end up or'ing a 0 into their CR3, which does -- * no harm. -- */ --__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; --__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; -- - static void setup_pcid(struct cpuinfo_x86 *c) - { - if (cpu_has(c, X86_FEATURE_PCID)) { - if (cpu_has(c, X86_FEATURE_PGE)) { - cr4_set_bits(X86_CR4_PCIDE); - /* -- * These variables are used by the entry/exit -- * code to change PCIDs. -- */ --#ifdef CONFIG_KAISER -- X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; -- X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; --#endif -- /* - * INVPCID has two "groups" of types: - * 1/2: Invalidate an individual address - * 3/4: Invalidate all contexts -@@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x86 *c) - clear_cpu_cap(c, X86_FEATURE_PCID); - } - } -+ kaiser_setup_pcid(); - } - - /* -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index f5c75f7..7056840 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -11,12 +11,26 @@ - #include <linux/uaccess.h> - - #include <asm/kaiser.h> -+#include <asm/tlbflush.h> /* to verify its kaiser declarations */ - #include <asm/pgtable.h> - #include <asm/pgalloc.h> - #include <asm/desc.h> -+ - #ifdef CONFIG_KAISER -+__visible -+DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); -+ -+/* -+ * These can have bit 63 set, so we can not just use a plain "or" -+ * instruction to get their value or'd into CR3. It would take -+ * another register. So, we use a memory reference to these instead. -+ * -+ * This is also handy because systems that do not support PCIDs -+ * just end up or'ing a 0 into their CR3, which does no harm. -+ */ -+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR; -+DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); - --__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - /* - * At runtime, the only things we map are some things for CPU - * hotplug, and stacks for new processes. No two CPUs will ever -@@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(void) - WARN_ON(__ret); \ - } while (0) - --extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; --extern unsigned long X86_CR3_PCID_KERN_VAR; --extern unsigned long X86_CR3_PCID_USER_VAR; - /* - * If anything in here fails, we will likely die on one of the - * first kernel->user transitions and init will die. But, we -@@ -294,8 +305,6 @@ void __init kaiser_init(void) - - kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, - __PAGE_KERNEL); -- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, -- __PAGE_KERNEL); - } - - /* Add a mapping to the shadow mapping, and synchronize the mappings */ -@@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) - } - return pgd; - } -+ -+void kaiser_setup_pcid(void) -+{ -+ unsigned long kern_cr3 = 0; -+ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; -+ -+ if (this_cpu_has(X86_FEATURE_PCID)) { -+ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH; -+ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; -+ } -+ /* -+ * These variables are used by the entry/exit -+ * code to change PCID and pgd and TLB flushing. -+ */ -+ X86_CR3_PCID_KERN_VAR = kern_cr3; -+ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3); -+} -+ -+/* -+ * Make a note that this cpu will need to flush USER tlb on return to user. -+ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling: -+ * if cpu does not, then the NOFLUSH bit will never have been set. -+ */ -+void kaiser_flush_tlb_on_return_to_user(void) -+{ -+ this_cpu_write(X86_CR3_PCID_USER_VAR, -+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); -+} -+EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); - #endif /* CONFIG_KAISER */ -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index a376246..a2532d4 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -6,13 +6,14 @@ - #include <linux/interrupt.h> - #include <linux/export.h> - #include <linux/cpu.h> -+#include <linux/debugfs.h> - - #include <asm/tlbflush.h> - #include <asm/mmu_context.h> - #include <asm/cache.h> - #include <asm/apic.h> - #include <asm/uv/uv.h> --#include <linux/debugfs.h> -+#include <asm/kaiser.h> - - /* - * Smarter SMP flushing macros. -@@ -40,34 +41,23 @@ static void load_new_mm_cr3(pgd_t *pgdir) - { - unsigned long new_mm_cr3 = __pa(pgdir); - -- /* -- * KAISER, plus PCIDs needs some extra work here. But, -- * if either of features is not present, we need no -- * PCIDs here and just do a normal, full TLB flush with -- * the write_cr3() -- */ -- if (!IS_ENABLED(CONFIG_KAISER) || -- !cpu_feature_enabled(X86_FEATURE_PCID)) -- goto out_set_cr3; -- /* -- * We reuse the same PCID for different tasks, so we must -- * flush all the entires for the PCID out when we change -- * tasks. -- */ -- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); -- -- /* -- * The flush from load_cr3() may leave old TLB entries -- * for userspace in place. We must flush that context -- * separately. We can theoretically delay doing this -- * until we actually load up the userspace CR3, but -- * that's a bit tricky. We have to have the "need to -- * flush userspace PCID" bit per-cpu and check it in the -- * exit-to-userspace paths. -- */ -- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); -+#ifdef CONFIG_KAISER -+ if (this_cpu_has(X86_FEATURE_PCID)) { -+ /* -+ * We reuse the same PCID for different tasks, so we must -+ * flush all the entries for the PCID out when we change tasks. -+ * Flush KERN below, flush USER when returning to userspace in -+ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro. -+ * -+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could -+ * do it here, but can only be used if X86_FEATURE_INVPCID is -+ * available - and many machines support pcid without invpcid. -+ */ -+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; -+ kaiser_flush_tlb_on_return_to_user(); -+ } -+#endif /* CONFIG_KAISER */ - --out_set_cr3: - /* - * Caution: many callers of this function expect - * that load_cr3() is serializing and orders TLB --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch deleted file mode 100644 index 39d81c71..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 5fb8da20577a159d311db9c29e62dbb782529571 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Sun, 20 May 2018 20:51:10 +0100 -Subject: [PATCH 21/93] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running - under Xen - -commit def9331a12977770cc6132d79f8e6565871e8e38 upstream - -When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set -on AMD cpus. - -This bug/feature bit is kind of special as it will be used very early -when switching threads. Setting the bit and clearing it a little bit -later leaves a critical window where things can go wrong. This time -window has enlarged a little bit by using setup_clear_cpu_cap() instead -of the hypervisor's set_cpu_features callback. It seems this larger -window now makes it rather easy to hit the problem. - -The proper solution is to never set the bit in case of Xen. - -Signed-off-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Juergen Gross <jgross@suse.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/amd.c | 5 +++-- - arch/x86/xen/enlighten.c | 4 +--- - 2 files changed, 4 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index c375bc6..747f8a2 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -824,8 +824,9 @@ static void init_amd(struct cpuinfo_x86 *c) - if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) - set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); - -- /* AMD CPUs don't reset SS attributes on SYSRET */ -- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); -+ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ -+ if (!cpu_has(c, X86_FEATURE_XENPV)) -+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - } - - #ifdef CONFIG_X86_32 -diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index 5226379..8b97c87 100644 ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -1968,10 +1968,8 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); - - static void xen_set_cpu_features(struct cpuinfo_x86 *c) - { -- if (xen_pv_domain()) { -- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); -+ if (xen_pv_domain()) - set_cpu_cap(c, X86_FEATURE_XENPV); -- } - } - - static void xen_pin_vcpu(int cpu) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch deleted file mode 100644 index c476da81..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch +++ /dev/null @@ -1,207 +0,0 @@ -From 18dacfea13d15dbf2fa1037cf76ee463c52af031 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sun, 28 Jan 2018 10:38:49 -0800 -Subject: [PATCH 21/42] x86/entry/64: Remove the SYSCALL64 fast path - -(cherry picked from commit 21d375b6b34ff511a507de27bf316b3dde6938d9) - -The SYCALLL64 fast path was a nice, if small, optimization back in the good -old days when syscalls were actually reasonably fast. Now there is PTI to -slow everything down, and indirect branches are verboten, making everything -messier. The retpoline code in the fast path is particularly nasty. - -Just get rid of the fast path. The slow path is barely slower. - -[ tglx: Split out the 'push all extra regs' part ] - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Kernel Hardening <kernel-hardening@lists.openwall.com> -Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 123 +------------------------------------------- - arch/x86/entry/syscall_64.c | 7 +-- - 2 files changed, 3 insertions(+), 127 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index e422e15..4360253 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -179,94 +179,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) - pushq %r11 /* pt_regs->r11 */ - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - -- /* -- * If we need to do entry work or if we guess we'll need to do -- * exit work, go straight to the slow path. -- */ -- movq PER_CPU_VAR(current_task), %r11 -- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) -- jnz entry_SYSCALL64_slow_path -- --entry_SYSCALL_64_fastpath: -- /* -- * Easy case: enable interrupts and issue the syscall. If the syscall -- * needs pt_regs, we'll call a stub that disables interrupts again -- * and jumps to the slow path. -- */ -- TRACE_IRQS_ON -- ENABLE_INTERRUPTS(CLBR_NONE) --#if __SYSCALL_MASK == ~0 -- cmpq $__NR_syscall_max, %rax --#else -- andl $__SYSCALL_MASK, %eax -- cmpl $__NR_syscall_max, %eax --#endif -- ja 1f /* return -ENOSYS (already in pt_regs->ax) */ -- movq %r10, %rcx -- -- /* -- * This call instruction is handled specially in stub_ptregs_64. -- * It might end up jumping to the slow path. If it jumps, RAX -- * and all argument registers are clobbered. -- */ --#ifdef CONFIG_RETPOLINE -- movq sys_call_table(, %rax, 8), %rax -- call __x86_indirect_thunk_rax --#else -- call *sys_call_table(, %rax, 8) --#endif --.Lentry_SYSCALL_64_after_fastpath_call: -- -- movq %rax, RAX(%rsp) --1: -- -- /* -- * If we get here, then we know that pt_regs is clean for SYSRET64. -- * If we see that no exit work is required (which we are required -- * to check with IRQs off), then we can go straight to SYSRET64. -- */ -- DISABLE_INTERRUPTS(CLBR_NONE) -- TRACE_IRQS_OFF -- movq PER_CPU_VAR(current_task), %r11 -- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) -- jnz 1f -- -- LOCKDEP_SYS_EXIT -- TRACE_IRQS_ON /* user mode is traced as IRQs on */ -- movq RIP(%rsp), %rcx -- movq EFLAGS(%rsp), %r11 -- RESTORE_C_REGS_EXCEPT_RCX_R11 -- /* -- * This opens a window where we have a user CR3, but are -- * running in the kernel. This makes using the CS -- * register useless for telling whether or not we need to -- * switch CR3 in NMIs. Normal interrupts are OK because -- * they are off here. -- */ -- SWITCH_USER_CR3 -- movq RSP(%rsp), %rsp -- USERGS_SYSRET64 -- --1: -- /* -- * The fast path looked good when we started, but something changed -- * along the way and we need to switch to the slow path. Calling -- * raise(3) will trigger this, for example. IRQs are off. -- */ -- TRACE_IRQS_ON -- ENABLE_INTERRUPTS(CLBR_NONE) -- SAVE_EXTRA_REGS -- movq %rsp, %rdi -- call syscall_return_slowpath /* returns with IRQs disabled */ -- jmp return_from_SYSCALL_64 -- --entry_SYSCALL64_slow_path: - /* IRQs are off. */ - SAVE_EXTRA_REGS - movq %rsp, %rdi - call do_syscall_64 /* returns with IRQs disabled */ - --return_from_SYSCALL_64: - RESTORE_EXTRA_REGS - TRACE_IRQS_IRETQ /* we're about to change IF */ - -@@ -339,6 +256,7 @@ return_from_SYSCALL_64: - syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_RCX_R11 -+ - /* - * This opens a window where we have a user CR3, but are - * running in the kernel. This makes using the CS -@@ -363,45 +281,6 @@ opportunistic_sysret_failed: - jmp restore_c_regs_and_iret - END(entry_SYSCALL_64) - --ENTRY(stub_ptregs_64) -- /* -- * Syscalls marked as needing ptregs land here. -- * If we are on the fast path, we need to save the extra regs, -- * which we achieve by trying again on the slow path. If we are on -- * the slow path, the extra regs are already saved. -- * -- * RAX stores a pointer to the C function implementing the syscall. -- * IRQs are on. -- */ -- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) -- jne 1f -- -- /* -- * Called from fast path -- disable IRQs again, pop return address -- * and jump to slow path -- */ -- DISABLE_INTERRUPTS(CLBR_NONE) -- TRACE_IRQS_OFF -- popq %rax -- jmp entry_SYSCALL64_slow_path -- --1: -- JMP_NOSPEC %rax /* Called from C */ --END(stub_ptregs_64) -- --.macro ptregs_stub func --ENTRY(ptregs_\func) -- leaq \func(%rip), %rax -- jmp stub_ptregs_64 --END(ptregs_\func) --.endm -- --/* Instantiate ptregs_stub for each ptregs-using syscall */ --#define __SYSCALL_64_QUAL_(sym) --#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym --#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) --#include <asm/syscalls_64.h> -- - /* - * %rdi: prev task - * %rsi: next task -diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c -index 9dbc5ab..6705edd 100644 ---- a/arch/x86/entry/syscall_64.c -+++ b/arch/x86/entry/syscall_64.c -@@ -6,14 +6,11 @@ - #include <asm/asm-offsets.h> - #include <asm/syscall.h> - --#define __SYSCALL_64_QUAL_(sym) sym --#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym -- --#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - #include <asm/syscalls_64.h> - #undef __SYSCALL_64 - --#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), -+#define __SYSCALL_64(nr, sym, qual) [nr] = sym, - - extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch deleted file mode 100644 index 9a833616..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch +++ /dev/null @@ -1,305 +0,0 @@ -From 3a5351279f63e7822bbfe5c0f4ee3d5a1a5bced1 Mon Sep 17 00:00:00 2001 -From: KarimAllah Ahmed <karahmed@amazon.de> -Date: Thu, 1 Feb 2018 22:59:45 +0100 -Subject: [PATCH 22/33] KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL - -(cherry picked from commit d28b387fb74da95d69d2615732f50cceb38e9a4d) - -[ Based on a patch from Ashok Raj <ashok.raj@intel.com> ] - -Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for -guests that will only mitigate Spectre V2 through IBRS+IBPB and will not -be using a retpoline+IBPB based approach. - -To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for -guests that do not actually use the MSR, only start saving and restoring -when a non-zero is written to it. - -No attempt is made to handle STIBP here, intentionally. Filtering STIBP -may be added in a future patch, which may require trapping all writes -if we don't want to pass it through directly to the guest. - -[dwmw2: Clean up CPUID bits, save/restore manually, handle reset] - -Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Darren Kenny <darren.kenny@oracle.com> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Reviewed-by: Jim Mattson <jmattson@google.com> -Cc: Andrea Arcangeli <aarcange@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Jun Nakajima <jun.nakajima@intel.com> -Cc: kvm@vger.kernel.org -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Asit Mallick <asit.k.mallick@intel.com> -Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Ashok Raj <ashok.raj@intel.com> -Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/cpuid.c | 8 ++-- - arch/x86/kvm/cpuid.h | 11 ++++++ - arch/x86/kvm/vmx.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++- - arch/x86/kvm/x86.c | 2 +- - 4 files changed, 118 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 4d3555b..bcebe84 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - - /* cpuid 0x80000008.ebx */ - const u32 kvm_cpuid_8000_0008_ebx_x86_features = -- F(IBPB); -+ F(IBPB) | F(IBRS); - - /* cpuid 0xC0000001.edx */ - const u32 kvm_cpuid_C000_0001_edx_x86_features = -@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - - /* cpuid 7.0.edx*/ - const u32 kvm_cpuid_7_0_edx_x86_features = -- F(ARCH_CAPABILITIES); -+ F(SPEC_CTRL) | F(ARCH_CAPABILITIES); - - /* all calls to cpuid_count() should be made on the same cpu */ - get_cpu(); -@@ -618,9 +618,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - g_phys_as = phys_as; - entry->eax = g_phys_as | (virt_as << 8); - entry->edx = 0; -- /* IBPB isn't necessarily present in hardware cpuid */ -+ /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); -+ if (boot_cpu_has(X86_FEATURE_IBRS)) -+ entry->ebx |= F(IBRS); - entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; - cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - break; -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index a69906c..841e80d 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -163,6 +163,17 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) - return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); - } - -+static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) -+{ -+ struct kvm_cpuid_entry2 *best; -+ -+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); -+ if (best && (best->ebx & bit(X86_FEATURE_IBRS))) -+ return true; -+ best = kvm_find_cpuid_entry(vcpu, 7, 0); -+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); -+} -+ - static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) - { - struct kvm_cpuid_entry2 *best; -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index d8e3c02..c564d03 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -552,6 +552,7 @@ struct vcpu_vmx { - #endif - - u64 arch_capabilities; -+ u64 spec_ctrl; - - u32 vm_entry_controls_shadow; - u32 vm_exit_controls_shadow; -@@ -1847,6 +1848,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) - } - - /* -+ * Check if MSR is intercepted for currently loaded MSR bitmap. -+ */ -+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) -+{ -+ unsigned long *msr_bitmap; -+ int f = sizeof(unsigned long); -+ -+ if (!cpu_has_vmx_msr_bitmap()) -+ return true; -+ -+ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; -+ -+ if (msr <= 0x1fff) { -+ return !!test_bit(msr, msr_bitmap + 0x800 / f); -+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { -+ msr &= 0x1fff; -+ return !!test_bit(msr, msr_bitmap + 0xc00 / f); -+ } -+ -+ return true; -+} -+ -+/* - * Check if MSR is intercepted for L01 MSR bitmap. - */ - static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) -@@ -2978,6 +3002,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - case MSR_IA32_TSC: - msr_info->data = guest_read_tsc(vcpu); - break; -+ case MSR_IA32_SPEC_CTRL: -+ if (!msr_info->host_initiated && -+ !guest_cpuid_has_ibrs(vcpu)) -+ return 1; -+ -+ msr_info->data = to_vmx(vcpu)->spec_ctrl; -+ break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has_arch_capabilities(vcpu)) -@@ -3083,6 +3114,36 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr_info); - break; -+ case MSR_IA32_SPEC_CTRL: -+ if (!msr_info->host_initiated && -+ !guest_cpuid_has_ibrs(vcpu)) -+ return 1; -+ -+ /* The STIBP bit doesn't fault even if it's not advertised */ -+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) -+ return 1; -+ -+ vmx->spec_ctrl = data; -+ -+ if (!data) -+ break; -+ -+ /* -+ * For non-nested: -+ * When it's written (to non-zero) for the first time, pass -+ * it through. -+ * -+ * For nested: -+ * The handling of the MSR bitmap for L2 guests is done in -+ * nested_vmx_merge_msr_bitmap. We should not touch the -+ * vmcs02.msr_bitmap here since it gets completely overwritten -+ * in the merging. We update the vmcs01 here for L1 as well -+ * since it will end up touching the MSR anyway now. -+ */ -+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, -+ MSR_IA32_SPEC_CTRL, -+ MSR_TYPE_RW); -+ break; - case MSR_IA32_PRED_CMD: - if (!msr_info->host_initiated && - !guest_cpuid_has_ibpb(vcpu)) -@@ -5216,6 +5277,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) - u64 cr0; - - vmx->rmode.vm86_active = 0; -+ vmx->spec_ctrl = 0; - - vmx->soft_vnmi_blocked = 0; - -@@ -8806,6 +8868,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - - vmx_arm_hv_timer(vcpu); - -+ /* -+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if -+ * it's non-zero. Since vmentry is serialising on affected CPUs, there -+ * is no need to worry about the conditional branch over the wrmsr -+ * being speculatively taken. -+ */ -+ if (vmx->spec_ctrl) -+ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); -+ - vmx->__launched = vmx->loaded_vmcs->launched; - asm( - /* Store host registers */ -@@ -8924,6 +8995,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - #endif - ); - -+ /* -+ * We do not use IBRS in the kernel. If this vCPU has used the -+ * SPEC_CTRL MSR it may have left it on; save the value and -+ * turn it off. This is much more efficient than blindly adding -+ * it to the atomic save/restore list. Especially as the former -+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. -+ * -+ * For non-nested case: -+ * If the L01 MSR bitmap does not intercept the MSR, then we need to -+ * save it. -+ * -+ * For nested case: -+ * If the L02 MSR bitmap does not intercept the MSR, then we need to -+ * save it. -+ */ -+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -+ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); -+ -+ if (vmx->spec_ctrl) -+ wrmsrl(MSR_IA32_SPEC_CTRL, 0); -+ - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -@@ -9476,7 +9568,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; - /* -- * pred_cmd is trying to verify two things: -+ * pred_cmd & spec_ctrl are trying to verify two things: - * - * 1. L0 gave a permission to L1 to actually passthrough the MSR. This - * ensures that we do not accidentally generate an L02 MSR bitmap -@@ -9489,9 +9581,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - * the MSR. - */ - bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); -+ bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); - - if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && -- !pred_cmd) -+ !pred_cmd && !spec_ctrl) - return false; - - page = nested_get_page(vcpu, vmcs12->msr_bitmap); -@@ -9527,6 +9620,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - } - } - -+ if (spec_ctrl) -+ nested_vmx_disable_intercept_for_msr( -+ msr_bitmap_l1, msr_bitmap_l0, -+ MSR_IA32_SPEC_CTRL, -+ MSR_TYPE_R | MSR_TYPE_W); -+ - if (pred_cmd) - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index d01742e..d2ea523 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -975,7 +975,7 @@ static u32 msrs_to_save[] = { - #endif - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, -- MSR_IA32_ARCH_CAPABILITIES -+ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES - }; - - static unsigned num_msrs_to_save; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch deleted file mode 100644 index 1175e587..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch +++ /dev/null @@ -1,135 +0,0 @@ -From e37573e8b118ee1b8afa1b045d3a3dde73712c6a Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Fri, 8 Sep 2017 19:26:30 -0700 -Subject: [PATCH 022/103] kaiser: PCID 0 for kernel and 128 for user - -Why was 4 chosen for kernel PCID and 6 for user PCID? -No good reason in a backport where PCIDs are only used for Kaiser. - -If we continue with those, then we shall need to add Andy Lutomirski's -4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa() -and __read_cr3()"), which deals with the problem of read_cr3() callers -finding stray bits in the cr3 that they expected to be page-aligned; -and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64: -Mask off CR3's PCID bits in the saved CR3"). - -But if 0 is used for kernel PCID, then there's no need to add in those -commits - whenever the kernel looks, it sees 0 in the lower bits; and -0 for kernel seems an obvious choice. - -And I naughtily propose 128 for user PCID. Because there's a place -in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH, -but needs to reset that to NOFLUSH for the next occasion. Currently -it does so with a "movb $(0x80)" into the high byte of the per-cpu -quadword, but that will cause a machine without PCID support to crash. -Now, if %al just happened to have 0x80 in it at that point, on a -machine with PCID support, but 0 on a machine without PCID support... - -(That will go badly wrong once the pgd can be at a physical address -above 2^56, but even with 5-level paging, physical goes up to 2^52.) - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/kaiser.h | 19 ++++++++++++------- - arch/x86/include/asm/pgtable_types.h | 7 ++++--- - arch/x86/mm/tlb.c | 3 +++ - 3 files changed, 19 insertions(+), 10 deletions(-) - -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 009bca5..110a73e 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -29,14 +29,19 @@ orq X86_CR3_PCID_KERN_VAR, \reg - movq \reg, %cr3 - .endm - --.macro _SWITCH_TO_USER_CR3 reg -+.macro _SWITCH_TO_USER_CR3 reg regb -+/* -+ * regb must be the low byte portion of reg: because we have arranged -+ * for the low byte of the user PCID to serve as the high byte of NOFLUSH -+ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are -+ * not enabled): so that the one register can update both memory and cr3. -+ */ - movq %cr3, \reg - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg - orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg - js 9f --// FLUSH this time, reset to NOFLUSH for next time --// But if nopcid? Consider using 0x80 for user pcid? --movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) -+/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ -+movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) - 9: - movq \reg, %cr3 - .endm -@@ -49,7 +54,7 @@ popq %rax - - .macro SWITCH_USER_CR3 - pushq %rax --_SWITCH_TO_USER_CR3 %rax -+_SWITCH_TO_USER_CR3 %rax %al - popq %rax - .endm - -@@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - - .macro SWITCH_USER_CR3_NO_STACK - movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) --_SWITCH_TO_USER_CR3 %rax -+_SWITCH_TO_USER_CR3 %rax %al - movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - .endm - -@@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - - .macro SWITCH_KERNEL_CR3 reg - .endm --.macro SWITCH_USER_CR3 reg -+.macro SWITCH_USER_CR3 reg regb - .endm - .macro SWITCH_USER_CR3_NO_STACK - .endm -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index ada77fd..7cf2883 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -146,16 +146,17 @@ - - /* Mask for all the PCID-related bits in CR3: */ - #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) -+#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) -+ - #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) --#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) --#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) -+/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ -+#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) - - #define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) - #define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) - #define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) - #define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) - #else --#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) - #define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) - /* - * PCIDs are unsupported on 32-bit and none of these bits can be -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index a2532d4..852c665 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -52,6 +52,9 @@ static void load_new_mm_cr3(pgd_t *pgdir) - * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could - * do it here, but can only be used if X86_FEATURE_INVPCID is - * available - and many machines support pcid without invpcid. -+ * -+ * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0; -+ * but keep that line in there in case something changes. - */ - new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; - kaiser_flush_tlb_on_return_to_user(); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-entry-64-Push-extra-regs-right-away.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-entry-64-Push-extra-regs-right-away.patch deleted file mode 100644 index 904e0528..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-entry-64-Push-extra-regs-right-away.patch +++ /dev/null @@ -1,49 +0,0 @@ -From c65286e3b8a7060e768c7b7e4c565922c205cb7f Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sun, 28 Jan 2018 10:38:49 -0800 -Subject: [PATCH 22/42] x86/entry/64: Push extra regs right away - -(cherry picked from commit d1f7732009e0549eedf8ea1db948dc37be77fd46) - -With the fast path removed there is no point in splitting the push of the -normal and the extra register set. Just push the extra regs right away. - -[ tglx: Split out from 'x86/entry/64: Remove the SYSCALL64 fast path' ] - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Kernel Hardening <kernel-hardening@lists.openwall.com> -Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 4360253..c915eeb 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -177,10 +177,14 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) - pushq %r9 /* pt_regs->r9 */ - pushq %r10 /* pt_regs->r10 */ - pushq %r11 /* pt_regs->r11 */ -- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ -+ pushq %rbx /* pt_regs->rbx */ -+ pushq %rbp /* pt_regs->rbp */ -+ pushq %r12 /* pt_regs->r12 */ -+ pushq %r13 /* pt_regs->r13 */ -+ pushq %r14 /* pt_regs->r14 */ -+ pushq %r15 /* pt_regs->r15 */ - - /* IRQs are off. */ -- SAVE_EXTRA_REGS - movq %rsp, %rdi - call do_syscall_64 /* returns with IRQs disabled */ - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-nospec-Simplify-alternative_msr_write.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-nospec-Simplify-alternative_msr_write.patch deleted file mode 100644 index aef6dcc5..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-nospec-Simplify-alternative_msr_write.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 0ba8203bd88d5640bd6b062b09d3514d5787161d Mon Sep 17 00:00:00 2001 -From: Linus Torvalds <torvalds@linux-foundation.org> -Date: Tue, 1 May 2018 15:55:51 +0200 -Subject: [PATCH 22/93] x86/nospec: Simplify alternative_msr_write() - -commit 1aa7a5735a41418d8e01fa7c9565eb2657e2ea3f upstream - -The macro is not type safe and I did look for why that "g" constraint for -the asm doesn't work: it's because the asm is more fundamentally wrong. - -It does - - movl %[val], %%eax - -but "val" isn't a 32-bit value, so then gcc will pass it in a register, -and generate code like - - movl %rsi, %eax - -and gas will complain about a nonsensical 'mov' instruction (it's moving a -64-bit register to a 32-bit one). - -Passing it through memory will just hide the real bug - gcc still thinks -the memory location is 64-bit, but the "movl" will only load the first 32 -bits and it all happens to work because x86 is little-endian. - -Convert it to a type safe inline function with a little trick which hands -the feature into the ALTERNATIVE macro. - -Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 19 ++++++++++--------- - 1 file changed, 10 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index f928ad9..870acfc 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -241,15 +241,16 @@ static inline void vmexit_fill_RSB(void) - #endif - } - --#define alternative_msr_write(_msr, _val, _feature) \ -- asm volatile(ALTERNATIVE("", \ -- "movl %[msr], %%ecx\n\t" \ -- "movl %[val], %%eax\n\t" \ -- "movl $0, %%edx\n\t" \ -- "wrmsr", \ -- _feature) \ -- : : [msr] "i" (_msr), [val] "i" (_val) \ -- : "eax", "ecx", "edx", "memory") -+static __always_inline -+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) -+{ -+ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) -+ : : "c" (msr), -+ "a" (val), -+ "d" (val >> 32), -+ [feature] "i" (feature) -+ : "memory"); -+} - - static inline void indirect_branch_prediction_barrier(void) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch deleted file mode 100644 index 905134c7..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch +++ /dev/null @@ -1,192 +0,0 @@ -From c8b2b4bc3e5eddb48f6eda57e9138a2ea2d39345 Mon Sep 17 00:00:00 2001 -From: KarimAllah Ahmed <karahmed@amazon.de> -Date: Sat, 3 Feb 2018 15:56:23 +0100 -Subject: [PATCH 23/33] KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL - -(cherry picked from commit b2ac58f90540e39324e7a29a7ad471407ae0bf48) - -[ Based on a patch from Paolo Bonzini <pbonzini@redhat.com> ] - -... basically doing exactly what we do for VMX: - -- Passthrough SPEC_CTRL to guests (if enabled in guest CPUID) -- Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest - actually used it. - -Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Darren Kenny <darren.kenny@oracle.com> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Andrea Arcangeli <aarcange@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Jun Nakajima <jun.nakajima@intel.com> -Cc: kvm@vger.kernel.org -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Asit Mallick <asit.k.mallick@intel.com> -Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Ashok Raj <ashok.raj@intel.com> -Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/svm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 88 insertions(+) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 43e45b9..4a36977 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -183,6 +183,8 @@ struct vcpu_svm { - u64 gs_base; - } host; - -+ u64 spec_ctrl; -+ - u32 *msrpm; - - ulong nmi_iret_rip; -@@ -248,6 +250,7 @@ static const struct svm_direct_access_msrs { - { .index = MSR_CSTAR, .always = true }, - { .index = MSR_SYSCALL_MASK, .always = true }, - #endif -+ { .index = MSR_IA32_SPEC_CTRL, .always = false }, - { .index = MSR_IA32_PRED_CMD, .always = false }, - { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, - { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, -@@ -863,6 +866,25 @@ static bool valid_msr_intercept(u32 index) - return false; - } - -+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) -+{ -+ u8 bit_write; -+ unsigned long tmp; -+ u32 offset; -+ u32 *msrpm; -+ -+ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: -+ to_svm(vcpu)->msrpm; -+ -+ offset = svm_msrpm_offset(msr); -+ bit_write = 2 * (msr & 0x0f) + 1; -+ tmp = msrpm[offset]; -+ -+ BUG_ON(offset == MSR_INVALID); -+ -+ return !!test_bit(bit_write, &tmp); -+} -+ - static void set_msr_interception(u32 *msrpm, unsigned msr, - int read, int write) - { -@@ -1534,6 +1556,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) - u32 dummy; - u32 eax = 1; - -+ svm->spec_ctrl = 0; -+ - if (!init_event) { - svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; -@@ -3515,6 +3539,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - case MSR_VM_CR: - msr_info->data = svm->nested.vm_cr_msr; - break; -+ case MSR_IA32_SPEC_CTRL: -+ if (!msr_info->host_initiated && -+ !guest_cpuid_has_ibrs(vcpu)) -+ return 1; -+ -+ msr_info->data = svm->spec_ctrl; -+ break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; -@@ -3599,6 +3630,33 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr); - break; -+ case MSR_IA32_SPEC_CTRL: -+ if (!msr->host_initiated && -+ !guest_cpuid_has_ibrs(vcpu)) -+ return 1; -+ -+ /* The STIBP bit doesn't fault even if it's not advertised */ -+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) -+ return 1; -+ -+ svm->spec_ctrl = data; -+ -+ if (!data) -+ break; -+ -+ /* -+ * For non-nested: -+ * When it's written (to non-zero) for the first time, pass -+ * it through. -+ * -+ * For nested: -+ * The handling of the MSR bitmap for L2 guests is done in -+ * nested_svm_vmrun_msrpm. -+ * We update the L1 MSR bit as well since it will end up -+ * touching the MSR anyway now. -+ */ -+ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); -+ break; - case MSR_IA32_PRED_CMD: - if (!msr->host_initiated && - !guest_cpuid_has_ibpb(vcpu)) -@@ -4842,6 +4900,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - - local_irq_enable(); - -+ /* -+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if -+ * it's non-zero. Since vmentry is serialising on affected CPUs, there -+ * is no need to worry about the conditional branch over the wrmsr -+ * being speculatively taken. -+ */ -+ if (svm->spec_ctrl) -+ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); -+ - asm volatile ( - "push %%" _ASM_BP "; \n\t" - "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" -@@ -4934,6 +5001,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - #endif - ); - -+ /* -+ * We do not use IBRS in the kernel. If this vCPU has used the -+ * SPEC_CTRL MSR it may have left it on; save the value and -+ * turn it off. This is much more efficient than blindly adding -+ * it to the atomic save/restore list. Especially as the former -+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. -+ * -+ * For non-nested case: -+ * If the L01 MSR bitmap does not intercept the MSR, then we need to -+ * save it. -+ * -+ * For nested case: -+ * If the L02 MSR bitmap does not intercept the MSR, then we need to -+ * save it. -+ */ -+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -+ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); -+ -+ if (svm->spec_ctrl) -+ wrmsrl(MSR_IA32_SPEC_CTRL, 0); -+ - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch deleted file mode 100644 index 425208de..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 1db4ad61489277c4a3ac82f43be15d0b31f12eea Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 27 Aug 2017 16:24:27 -0700 -Subject: [PATCH 023/103] kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user - -Mostly this commit is just unshouting X86_CR3_PCID_KERN_VAR and -X86_CR3_PCID_USER_VAR: we usually name variables in lower-case. - -But why does x86_cr3_pcid_noflush need to be __aligned(PAGE_SIZE)? -Ah, it's a leftover from when kaiser_add_user_map() once complained -about mapping the same page twice. Make it __read_mostly instead. -(I'm a little uneasy about all the unrelated data which shares its -page getting user-mapped too, but that was so before, and not a big -deal: though we call it user-mapped, it's not mapped with _PAGE_USER.) - -And there is a little change around the two calls to do_nmi(). -Previously they set the NOFLUSH bit (if PCID supported) when -forcing to kernel context before do_nmi(); now they also have the -NOFLUSH bit set (if PCID supported) when restoring context after: -nothing done in do_nmi() should require a TLB to be flushed here. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 8 ++++---- - arch/x86/include/asm/kaiser.h | 11 +++++------ - arch/x86/mm/kaiser.c | 13 +++++++------ - 3 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 4a0ebf4..e158fd5 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1314,11 +1314,11 @@ ENTRY(nmi) - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax -+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ -+ orq x86_cr3_pcid_noflush, %rax - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -- /* Add back kernel PCID and "no flush" bit */ -- orq X86_CR3_PCID_KERN_VAR, %rax - movq %rax, %cr3 - #endif - call do_nmi -@@ -1558,11 +1558,11 @@ end_repeat_nmi: - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax -+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ -+ orq x86_cr3_pcid_noflush, %rax - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -- /* Add back kernel PCID and "no flush" bit */ -- orq X86_CR3_PCID_KERN_VAR, %rax - movq %rax, %cr3 - #endif - -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 110a73e..48d8d70 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -25,7 +25,7 @@ - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg --orq X86_CR3_PCID_KERN_VAR, \reg -+orq x86_cr3_pcid_noflush, \reg - movq \reg, %cr3 - .endm - -@@ -37,11 +37,10 @@ movq \reg, %cr3 - * not enabled): so that the one register can update both memory and cr3. - */ - movq %cr3, \reg --andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg --orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg -+orq PER_CPU_VAR(x86_cr3_pcid_user), \reg - js 9f - /* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ --movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) -+movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7) - 9: - movq \reg, %cr3 - .endm -@@ -94,8 +93,8 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - */ - DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - --extern unsigned long X86_CR3_PCID_KERN_VAR; --DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); -+extern unsigned long x86_cr3_pcid_noflush; -+DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); - - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 7056840..fa1cb09 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -28,8 +28,8 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - * This is also handy because systems that do not support PCIDs - * just end up or'ing a 0 into their CR3, which does no harm. - */ --__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR; --DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); -+unsigned long x86_cr3_pcid_noflush __read_mostly; -+DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user); - - /* - * At runtime, the only things we map are some things for CPU -@@ -303,7 +303,8 @@ void __init kaiser_init(void) - sizeof(gate_desc) * NR_VECTORS, - __PAGE_KERNEL); - -- kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, -+ kaiser_add_user_map_early(&x86_cr3_pcid_noflush, -+ sizeof(x86_cr3_pcid_noflush), - __PAGE_KERNEL); - } - -@@ -381,8 +382,8 @@ void kaiser_setup_pcid(void) - * These variables are used by the entry/exit - * code to change PCID and pgd and TLB flushing. - */ -- X86_CR3_PCID_KERN_VAR = kern_cr3; -- this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3); -+ x86_cr3_pcid_noflush = kern_cr3; -+ this_cpu_write(x86_cr3_pcid_user, user_cr3); - } - - /* -@@ -392,7 +393,7 @@ void kaiser_setup_pcid(void) - */ - void kaiser_flush_tlb_on_return_to_user(void) - { -- this_cpu_write(X86_CR3_PCID_USER_VAR, -+ this_cpu_write(x86_cr3_pcid_user, - X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); - } - EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch deleted file mode 100644 index 6d18a50c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch +++ /dev/null @@ -1,75 +0,0 @@ -From c3a018c5b5ae383b51700cd636995916fc8c1f61 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:16 -0400 -Subject: [PATCH 23/93] x86/bugs: Concentrate bug detection into a separate - function - -commit 4a28bfe3267b68e22c663ac26185aa16c9b879ef upstream - -Combine the various logic which goes through all those -x86_cpu_id matching structures in one function. - -Suggested-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/common.c | 21 +++++++++++---------- - 1 file changed, 11 insertions(+), 10 deletions(-) - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 301bbd1..357c589 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -879,21 +879,27 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - {} - }; - --static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) -+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - { - u64 ia32_cap = 0; - -+ if (x86_match_cpu(cpu_no_speculation)) -+ return; -+ -+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); -+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -+ - if (x86_match_cpu(cpu_no_meltdown)) -- return false; -+ return; - - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - - /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) -- return false; -+ return; - -- return true; -+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - } - - /* -@@ -942,12 +948,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - - setup_force_cpu_cap(X86_FEATURE_ALWAYS); - -- if (!x86_match_cpu(cpu_no_speculation)) { -- if (cpu_vulnerable_to_meltdown(c)) -- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); -- setup_force_cpu_bug(X86_BUG_SPECTRE_V1); -- setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -- } -+ cpu_set_bug_bits(c); - - fpu__init_system(c); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-Documentation-Document-array_index_nospec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-Documentation-Document-array_index_nospec.patch deleted file mode 100644 index e63a87b0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-Documentation-Document-array_index_nospec.patch +++ /dev/null @@ -1,128 +0,0 @@ -From be059366798cbe4d7f4e9d86232e17b2368154ce Mon Sep 17 00:00:00 2001 -From: Mark Rutland <mark.rutland@arm.com> -Date: Mon, 29 Jan 2018 17:02:16 -0800 -Subject: [PATCH 24/42] Documentation: Document array_index_nospec - -(cherry picked from commit f84a56f73dddaeac1dba8045b007f742f61cd2da) - -Document the rationale and usage of the new array_index_nospec() helper. - -Signed-off-by: Mark Rutland <mark.rutland@arm.com> -Signed-off-by: Will Deacon <will.deacon@arm.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Kees Cook <keescook@chromium.org> -Cc: linux-arch@vger.kernel.org -Cc: Jonathan Corbet <corbet@lwn.net> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: gregkh@linuxfoundation.org -Cc: kernel-hardening@lists.openwall.com -Cc: torvalds@linux-foundation.org -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/speculation.txt | 90 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 90 insertions(+) - create mode 100644 Documentation/speculation.txt - -diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt -new file mode 100644 -index 0000000..e9e6cba ---- /dev/null -+++ b/Documentation/speculation.txt -@@ -0,0 +1,90 @@ -+This document explains potential effects of speculation, and how undesirable -+effects can be mitigated portably using common APIs. -+ -+=========== -+Speculation -+=========== -+ -+To improve performance and minimize average latencies, many contemporary CPUs -+employ speculative execution techniques such as branch prediction, performing -+work which may be discarded at a later stage. -+ -+Typically speculative execution cannot be observed from architectural state, -+such as the contents of registers. However, in some cases it is possible to -+observe its impact on microarchitectural state, such as the presence or -+absence of data in caches. Such state may form side-channels which can be -+observed to extract secret information. -+ -+For example, in the presence of branch prediction, it is possible for bounds -+checks to be ignored by code which is speculatively executed. Consider the -+following code: -+ -+ int load_array(int *array, unsigned int index) -+ { -+ if (index >= MAX_ARRAY_ELEMS) -+ return 0; -+ else -+ return array[index]; -+ } -+ -+Which, on arm64, may be compiled to an assembly sequence such as: -+ -+ CMP <index>, #MAX_ARRAY_ELEMS -+ B.LT less -+ MOV <returnval>, #0 -+ RET -+ less: -+ LDR <returnval>, [<array>, <index>] -+ RET -+ -+It is possible that a CPU mis-predicts the conditional branch, and -+speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This -+value will subsequently be discarded, but the speculated load may affect -+microarchitectural state which can be subsequently measured. -+ -+More complex sequences involving multiple dependent memory accesses may -+result in sensitive information being leaked. Consider the following -+code, building on the prior example: -+ -+ int load_dependent_arrays(int *arr1, int *arr2, int index) -+ { -+ int val1, val2, -+ -+ val1 = load_array(arr1, index); -+ val2 = load_array(arr2, val1); -+ -+ return val2; -+ } -+ -+Under speculation, the first call to load_array() may return the value -+of an out-of-bounds address, while the second call will influence -+microarchitectural state dependent on this value. This may provide an -+arbitrary read primitive. -+ -+==================================== -+Mitigating speculation side-channels -+==================================== -+ -+The kernel provides a generic API to ensure that bounds checks are -+respected even under speculation. Architectures which are affected by -+speculation-based side-channels are expected to implement these -+primitives. -+ -+The array_index_nospec() helper in <linux/nospec.h> can be used to -+prevent information from being leaked via side-channels. -+ -+A call to array_index_nospec(index, size) returns a sanitized index -+value that is bounded to [0, size) even under cpu speculation -+conditions. -+ -+This can be used to protect the earlier load_array() example: -+ -+ int load_array(int *array, unsigned int index) -+ { -+ if (index >= MAX_ARRAY_ELEMS) -+ return 0; -+ else { -+ index = array_index_nospec(index, MAX_ARRAY_ELEMS); -+ return array[index]; -+ } -+ } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch deleted file mode 100644 index 8feed73a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 36417bad8e288e64df1067207030c67304c26ee5 Mon Sep 17 00:00:00 2001 -From: Liran Alon <liran.alon@oracle.com> -Date: Thu, 9 Nov 2017 20:27:20 +0200 -Subject: [PATCH 24/33] KVM: nVMX: Fix races when sending nested PI while dest - enters/leaves L2 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 6b6977117f50d60455ace86b2d256f6fb4f3de05 upstream. - -Consider the following scenario: -1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI -to CPU B via virtual posted-interrupt mechanism. -2. CPU B is currently executing L2 guest. -3. vmx_deliver_nested_posted_interrupt() calls -kvm_vcpu_trigger_posted_interrupt() which will note that -vcpu->mode == IN_GUEST_MODE. -4. Assume that before CPU A sends the physical POSTED_INTR_NESTED_VECTOR -IPI, CPU B exits from L2 to L0 during event-delivery -(valid IDT-vectoring-info). -5. CPU A now sends the physical IPI. The IPI is received in host and -it's handler (smp_kvm_posted_intr_nested_ipi()) does nothing. -6. Assume that before CPU A sets pi_pending=true and KVM_REQ_EVENT, -CPU B continues to run in L0 and reach vcpu_enter_guest(). As -KVM_REQ_EVENT is not set yet, vcpu_enter_guest() will continue and resume -L2 guest. -7. At this point, CPU A sets pi_pending=true and KVM_REQ_EVENT but -it's too late! CPU B already entered L2 and KVM_REQ_EVENT will only be -consumed at next L2 entry! - -Another scenario to consider: -1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI -to CPU B via virtual posted-interrupt mechanism. -2. Assume that before CPU A calls kvm_vcpu_trigger_posted_interrupt(), -CPU B is at L0 and is about to resume into L2. Further assume that it is -in vcpu_enter_guest() after check for KVM_REQ_EVENT. -3. At this point, CPU A calls kvm_vcpu_trigger_posted_interrupt() which -will note that vcpu->mode != IN_GUEST_MODE. Therefore, do nothing and -return false. Then, will set pi_pending=true and KVM_REQ_EVENT. -4. Now CPU B continue and resumes into L2 guest without processing -the posted-interrupt until next L2 entry! - -To fix both issues, we just need to change -vmx_deliver_nested_posted_interrupt() to set pi_pending=true and -KVM_REQ_EVENT before calling kvm_vcpu_trigger_posted_interrupt(). - -It will fix the first scenario by chaging step (6) to note that -KVM_REQ_EVENT and pi_pending=true and therefore process -nested posted-interrupt. - -It will fix the second scenario by two possible ways: -1. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B has changed -vcpu->mode to IN_GUEST_MODE, physical IPI will be sent and will be received -when CPU resumes into L2. -2. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B hasn't yet -changed vcpu->mode to IN_GUEST_MODE, then after CPU B will change -vcpu->mode it will call kvm_request_pending() which will return true and -therefore force another round of vcpu_enter_guest() which will note that -KVM_REQ_EVENT and pi_pending=true and therefore process nested -posted-interrupt. - -Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing") -Signed-off-by: Liran Alon <liran.alon@oracle.com> -Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com> -Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com> -[Add kvm_vcpu_kick to also handle the case where L1 doesn't intercept L2 HLT - and L2 executes HLT instruction. - Paolo] -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index c564d03..85078c7 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -4944,14 +4944,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, - - if (is_guest_mode(vcpu) && - vector == vmx->nested.posted_intr_nv) { -- /* the PIR and ON have been set by L1. */ -- kvm_vcpu_trigger_posted_interrupt(vcpu); - /* - * If a posted intr is not recognized by hardware, - * we will accomplish it in the next vmentry. - */ - vmx->nested.pi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); -+ /* the PIR and ON have been set by L1. */ -+ if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) -+ kvm_vcpu_kick(vcpu); - return 0; - } - return -1; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch deleted file mode 100644 index ce6a3595..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 2a172cc2491d54b70e5e54a19d2affa38f009a33 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Tue, 26 Sep 2017 18:43:07 -0700 -Subject: [PATCH 024/103] kaiser: paranoid_entry pass cr3 need to paranoid_exit - -Neel Natu points out that paranoid_entry() was wrong to assume that -an entry that did not need swapgs would not need SWITCH_KERNEL_CR3: -paranoid_entry (used for debug breakpoint, int3, double fault or MCE; -though I think it's only the MCE case that is cause for concern here) -can break in at an awkward time, between cr3 switch and swapgs, but -its handling always needs kernel gs and kernel cr3. - -Easy to fix in itself, but paranoid_entry() also needs to convey to -paranoid_exit() (and my reading of macro idtentry says paranoid_entry -and paranoid_exit are always paired) how to restore the prior state. -The swapgs state is already conveyed by %ebx (0 or 1), so extend that -also to convey when SWITCH_USER_CR3 will be needed (2 or 3). - -(Yes, I'd much prefer that 0 meant no swapgs, whereas it's the other -way round: and a convention shared with error_entry() and error_exit(), -which I don't want to touch. Perhaps I should have inverted the bit -for switch cr3 too, but did not.) - -paranoid_exit() would be straightforward, except for TRACE_IRQS: it -did TRACE_IRQS_IRETQ when doing swapgs, but TRACE_IRQS_IRETQ_DEBUG -when not: which is it supposed to use when SWITCH_USER_CR3 is split -apart from that? As best as I can determine, commit 5963e317b1e9 -("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") -missed the swapgs case, and should have used TRACE_IRQS_IRETQ_DEBUG -there too (the discrepancy has nothing to do with the liberal use -of _NO_STACK and _UNSAFE_STACK hereabouts: TRACE_IRQS_OFF_DEBUG has -just been used in all cases); discrepancy lovingly preserved across -several paranoid_exit() cleanups, but I'm now removing it. - -Neel further indicates that to use SWITCH_USER_CR3_NO_STACK there in -paranoid_exit() is now not only unnecessary but unsafe: might corrupt -syscall entry's unsafe_stack_register_backup of %rax. Just use -SWITCH_USER_CR3: and delete SWITCH_USER_CR3_NO_STACK altogether, -before we make the mistake of using it again. - -hughd adds: this commit fixes an issue in the Kaiser-without-PCIDs -part of the series, and ought to be moved earlier, if you decided -to make a release of Kaiser-without-PCIDs. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 46 +++++++++++++++++++++++++++++++--------- - arch/x86/entry/entry_64_compat.S | 2 +- - arch/x86/include/asm/kaiser.h | 8 ------- - 3 files changed, 37 insertions(+), 19 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index e158fd5..41bf650 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1053,7 +1053,11 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec - /* - * Save all registers in pt_regs, and switch gs if needed. - * Use slow, but surefire "are we in kernel?" check. -- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise -+ * -+ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit -+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit -+ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit -+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit - */ - ENTRY(paranoid_entry) - cld -@@ -1065,9 +1069,26 @@ ENTRY(paranoid_entry) - testl %edx, %edx - js 1f /* negative -> in kernel */ - SWAPGS -- SWITCH_KERNEL_CR3 - xorl %ebx, %ebx --1: ret -+1: -+#ifdef CONFIG_KAISER -+ /* -+ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3 -+ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit. -+ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done -+ * unconditionally, but we need to find out whether the reverse -+ * should be done on return (conveyed to paranoid_exit in %ebx). -+ */ -+ movq %cr3, %rax -+ testl $KAISER_SHADOW_PGD_OFFSET, %eax -+ jz 2f -+ orl $2, %ebx -+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -+ orq x86_cr3_pcid_noflush, %rax -+ movq %rax, %cr3 -+2: -+#endif -+ ret - END(paranoid_entry) - - /* -@@ -1080,20 +1101,25 @@ END(paranoid_entry) - * be complicated. Fortunately, we there's no good reason - * to try to handle preemption here. - * -- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) -+ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3 -+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 -+ * ebx=2: needs both swapgs and SWITCH_USER_CR3 -+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs - */ - ENTRY(paranoid_exit) - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF_DEBUG -- testl %ebx, %ebx /* swapgs needed? */ -+ TRACE_IRQS_IRETQ_DEBUG -+#ifdef CONFIG_KAISER -+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */ -+ jz paranoid_exit_no_switch -+ SWITCH_USER_CR3 -+paranoid_exit_no_switch: -+#endif -+ testl $1, %ebx /* swapgs needed? */ - jnz paranoid_exit_no_swapgs -- TRACE_IRQS_IRETQ -- SWITCH_USER_CR3_NO_STACK - SWAPGS_UNSAFE_STACK -- jmp paranoid_exit_restore - paranoid_exit_no_swapgs: -- TRACE_IRQS_IRETQ_DEBUG --paranoid_exit_restore: - RESTORE_EXTRA_REGS - RESTORE_C_REGS - REMOVE_PT_GPREGS_FROM_STACK 8 -diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S -index 0eb5801..d76a976 100644 ---- a/arch/x86/entry/entry_64_compat.S -+++ b/arch/x86/entry/entry_64_compat.S -@@ -343,7 +343,7 @@ ENTRY(entry_INT80_compat) - - /* Go back to user mode. */ - TRACE_IRQS_ON -- SWITCH_USER_CR3_NO_STACK -+ SWITCH_USER_CR3 - SWAPGS - jmp restore_regs_and_iret - END(entry_INT80_compat) -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 48d8d70..3dc5f4c 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -63,20 +63,12 @@ _SWITCH_TO_KERNEL_CR3 %rax - movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - .endm - --.macro SWITCH_USER_CR3_NO_STACK --movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) --_SWITCH_TO_USER_CR3 %rax %al --movq PER_CPU_VAR(unsafe_stack_register_backup), %rax --.endm -- - #else /* CONFIG_KAISER */ - - .macro SWITCH_KERNEL_CR3 reg - .endm - .macro SWITCH_USER_CR3 reg regb - .endm --.macro SWITCH_USER_CR3_NO_STACK --.endm - .macro SWITCH_KERNEL_CR3_NO_STACK - .endm - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch deleted file mode 100644 index b86011cc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 2ea1e87e0557d4994d239cf75a12cd624d3c7ef9 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:17 -0400 -Subject: [PATCH 24/93] x86/bugs: Concentrate bug reporting into a separate - function - -commit d1059518b4789cabe34bb4b714d07e6089c82ca1 upstream - -Those SysFS functions have a similar preamble, as such make common -code to handle them. - -Suggested-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 46 ++++++++++++++++++++++++++++++++-------------- - 1 file changed, 32 insertions(+), 14 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index b8b0b6e..4d9c5fe 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -313,30 +313,48 @@ static void __init spectre_v2_select_mitigation(void) - #undef pr_fmt - - #ifdef CONFIG_SYSFS --ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -+ -+ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, -+ char *buf, unsigned int bug) - { -- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) -+ if (!boot_cpu_has_bug(bug)) - return sprintf(buf, "Not affected\n"); -- if (boot_cpu_has(X86_FEATURE_KAISER)) -- return sprintf(buf, "Mitigation: PTI\n"); -+ -+ switch (bug) { -+ case X86_BUG_CPU_MELTDOWN: -+ if (boot_cpu_has(X86_FEATURE_KAISER)) -+ return sprintf(buf, "Mitigation: PTI\n"); -+ -+ break; -+ -+ case X86_BUG_SPECTRE_V1: -+ return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -+ -+ case X86_BUG_SPECTRE_V2: -+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", -+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", -+ spectre_v2_module_string()); -+ -+ default: -+ break; -+ } -+ - return sprintf(buf, "Vulnerable\n"); - } - -+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); -+} -+ - ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) - { -- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) -- return sprintf(buf, "Not affected\n"); -- return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); - } - - ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) - { -- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) -- return sprintf(buf, "Not affected\n"); -- -- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", -- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", -- spectre_v2_module_string()); -+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); - } - #endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch deleted file mode 100644 index eb633c9c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 15ca5afe3e56a0f80151aa4b6f06233b39736a2e Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Sat, 10 Feb 2018 23:39:24 +0000 -Subject: [PATCH 25/33] KVM/x86: Reduce retpoline performance impact in - slot_handle_level_range(), by always inlining iterator helper methods -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 928a4c39484281f8ca366f53a1db79330d058401 upstream. - -With retpoline, tight loops of "call this function for every XXX" are -very much pessimised by taking a prediction miss *every* time. This one -is by far the biggest contributor to the guest launch time with retpoline. - -By marking the iterator slot_handle_…() functions always_inline, we can -ensure that the indirect function call can be optimised away into a -direct call and it actually generates slightly smaller code because -some of the other conditionals can get optimised away too. - -Performance is now pretty close to what we see with nospectre_v2 on -the command line. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Tested-by: Filippo Sironi <sironi@amazon.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Reviewed-by: Filippo Sironi <sironi@amazon.de> -Acked-by: Paolo Bonzini <pbonzini@redhat.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: arjan.van.de.ven@intel.com -Cc: dave.hansen@intel.com -Cc: jmattson@google.com -Cc: karahmed@amazon.de -Cc: kvm@vger.kernel.org -Cc: rkrcmar@redhat.com -Link: http://lkml.kernel.org/r/1518305967-31356-4-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/mmu.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index d9c7e98..ee4af7a 100644 ---- a/arch/x86/kvm/mmu.c -+++ b/arch/x86/kvm/mmu.c -@@ -4636,7 +4636,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) - typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); - - /* The caller should hold mmu-lock before calling this function. */ --static bool -+static __always_inline bool - slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, int start_level, int end_level, - gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) -@@ -4666,7 +4666,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, - return flush; - } - --static bool -+static __always_inline bool - slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, int start_level, int end_level, - bool lock_flush_tlb) -@@ -4677,7 +4677,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - lock_flush_tlb); - } - --static bool -+static __always_inline bool - slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, bool lock_flush_tlb) - { -@@ -4685,7 +4685,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); - } - --static bool -+static __always_inline bool - slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, bool lock_flush_tlb) - { -@@ -4693,7 +4693,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); - } - --static bool -+static __always_inline bool - slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, bool lock_flush_tlb) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch deleted file mode 100644 index 994c7017..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch +++ /dev/null @@ -1,121 +0,0 @@ -From d1d620936019d80fd9be22b6fb09d3a15d4dbf7f Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:22 -0800 -Subject: [PATCH 25/42] array_index_nospec: Sanitize speculative array - de-references - -(cherry picked from commit f3804203306e098dae9ca51540fcd5eb700d7f40) - -array_index_nospec() is proposed as a generic mechanism to mitigate -against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary -checks via speculative execution. The array_index_nospec() -implementation is expected to be safe for current generation CPUs across -multiple architectures (ARM, x86). - -Based on an original implementation by Linus Torvalds, tweaked to remove -speculative flows by Alexei Starovoitov, and tweaked again by Linus to -introduce an x86 assembly implementation for the mask generation. - -Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org> -Co-developed-by: Alexei Starovoitov <ast@kernel.org> -Suggested-by: Cyril Novikov <cnovikov@lynx.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: kernel-hardening@lists.openwall.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Catalin Marinas <catalin.marinas@arm.com> -Cc: Will Deacon <will.deacon@arm.com> -Cc: Russell King <linux@armlinux.org.uk> -Cc: gregkh@linuxfoundation.org -Cc: torvalds@linux-foundation.org -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/nospec.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 72 insertions(+) - create mode 100644 include/linux/nospec.h - -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -new file mode 100644 -index 0000000..b99bced ---- /dev/null -+++ b/include/linux/nospec.h -@@ -0,0 +1,72 @@ -+// SPDX-License-Identifier: GPL-2.0 -+// Copyright(c) 2018 Linus Torvalds. All rights reserved. -+// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. -+// Copyright(c) 2018 Intel Corporation. All rights reserved. -+ -+#ifndef _LINUX_NOSPEC_H -+#define _LINUX_NOSPEC_H -+ -+/** -+ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise -+ * @index: array element index -+ * @size: number of elements in array -+ * -+ * When @index is out of bounds (@index >= @size), the sign bit will be -+ * set. Extend the sign bit to all bits and invert, giving a result of -+ * zero for an out of bounds index, or ~0 if within bounds [0, @size). -+ */ -+#ifndef array_index_mask_nospec -+static inline unsigned long array_index_mask_nospec(unsigned long index, -+ unsigned long size) -+{ -+ /* -+ * Warn developers about inappropriate array_index_nospec() usage. -+ * -+ * Even if the CPU speculates past the WARN_ONCE branch, the -+ * sign bit of @index is taken into account when generating the -+ * mask. -+ * -+ * This warning is compiled out when the compiler can infer that -+ * @index and @size are less than LONG_MAX. -+ */ -+ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, -+ "array_index_nospec() limited to range of [0, LONG_MAX]\n")) -+ return 0; -+ -+ /* -+ * Always calculate and emit the mask even if the compiler -+ * thinks the mask is not needed. The compiler does not take -+ * into account the value of @index under speculation. -+ */ -+ OPTIMIZER_HIDE_VAR(index); -+ return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); -+} -+#endif -+ -+/* -+ * array_index_nospec - sanitize an array index after a bounds check -+ * -+ * For a code sequence like: -+ * -+ * if (index < size) { -+ * index = array_index_nospec(index, size); -+ * val = array[index]; -+ * } -+ * -+ * ...if the CPU speculates past the bounds check then -+ * array_index_nospec() will clamp the index within the range of [0, -+ * size). -+ */ -+#define array_index_nospec(index, size) \ -+({ \ -+ typeof(index) _i = (index); \ -+ typeof(size) _s = (size); \ -+ unsigned long _mask = array_index_mask_nospec(_i, _s); \ -+ \ -+ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ -+ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ -+ \ -+ _i &= _mask; \ -+ _i; \ -+}) -+#endif /* _LINUX_NOSPEC_H */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch deleted file mode 100644 index ef6fe022..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 88ae1de3aa1ad852d683202954e7c3acb96f0d38 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Mon, 2 Oct 2017 10:57:24 -0700 -Subject: [PATCH 025/103] kaiser: kaiser_remove_mapping() move along the pgd - -When removing the bogus comment from kaiser_remove_mapping(), -I really ought to have checked the extent of its bogosity: as -Neel points out, there is nothing to stop unmap_pud_range_nofree() -from continuing beyond the end of a pud (and starting in the wrong -position on the next). - -Fix kaiser_remove_mapping() to constrain the extent and advance pgd -pointer correctly: use pgd_addr_end() macro as used throughout base -mm (but don't assume page-rounded start and size in this case). - -But this bug was very unlikely to trigger in this backport: since -any buddy allocation is contained within a single pud extent, and -we are not using vmapped stacks (and are only mapping one page of -stack anyway): the only way to hit this bug here would be when -freeing a large modified ldt. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index fa1cb09..cc0950f 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -319,11 +319,13 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) - extern void unmap_pud_range_nofree(pgd_t *pgd, - unsigned long start, unsigned long end); - unsigned long end = start + size; -- unsigned long addr; -+ unsigned long addr, next; -+ pgd_t *pgd; - -- for (addr = start; addr < end; addr += PGDIR_SIZE) { -- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr)); -- unmap_pud_range_nofree(pgd, addr, end); -+ pgd = native_get_shadow_pgd(pgd_offset_k(start)); -+ for (addr = start; addr < end; pgd++, addr = next) { -+ next = pgd_addr_end(addr, end); -+ unmap_pud_range_nofree(pgd, addr, next); - } - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch deleted file mode 100644 index da25f2fe..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch +++ /dev/null @@ -1,143 +0,0 @@ -From f35005b1a8b68f66c980652ef5299cb422eb9123 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:18 -0400 -Subject: [PATCH 25/93] x86/bugs: Read SPEC_CTRL MSR during boot and re-use - reserved bits - -commit 1b86883ccb8d5d9506529d42dbe1a5257cb30b18 upstream - -The 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to all -the other bits as reserved. The Intel SDM glossary defines reserved as -implementation specific - aka unknown. - -As such at bootup this must be taken it into account and proper masking for -the bits in use applied. - -A copy of this document is available at -https://bugzilla.kernel.org/show_bug.cgi?id=199511 - -[ tglx: Made x86_spec_ctrl_base __ro_after_init ] - -Suggested-by: Jon Masters <jcm@redhat.com> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 24 ++++++++++++++++++++---- - arch/x86/kernel/cpu/bugs.c | 28 ++++++++++++++++++++++++++++ - 2 files changed, 48 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 870acfc..9ec3d4d 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -217,6 +217,17 @@ enum spectre_v2_mitigation { - SPECTRE_V2_IBRS, - }; - -+/* -+ * The Intel specification for the SPEC_CTRL MSR requires that we -+ * preserve any already set reserved bits at boot time (e.g. for -+ * future additions that this kernel is not currently aware of). -+ * We then set any additional mitigation bits that we want -+ * ourselves and always use this as the base for SPEC_CTRL. -+ * We also use this when handling guest entry/exit as below. -+ */ -+extern void x86_spec_ctrl_set(u64); -+extern u64 x86_spec_ctrl_get_default(void); -+ - extern char __indirect_thunk_start[]; - extern char __indirect_thunk_end[]; - -@@ -254,8 +265,9 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) - - static inline void indirect_branch_prediction_barrier(void) - { -- alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, -- X86_FEATURE_USE_IBPB); -+ u64 val = PRED_CMD_IBPB; -+ -+ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); - } - - /* -@@ -266,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void) - */ - #define firmware_restrict_branch_speculation_start() \ - do { \ -+ u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ -+ \ - preempt_disable(); \ -- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ -+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ - X86_FEATURE_USE_IBRS_FW); \ - } while (0) - - #define firmware_restrict_branch_speculation_end() \ - do { \ -- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ -+ u64 val = x86_spec_ctrl_get_default(); \ -+ \ -+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ - X86_FEATURE_USE_IBRS_FW); \ - preempt_enable(); \ - } while (0) -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 4d9c5fe..6ff972a 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -27,6 +27,12 @@ - - static void __init spectre_v2_select_mitigation(void); - -+/* -+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any -+ * writes to SPEC_CTRL contain whatever reserved bits have been set. -+ */ -+static u64 __ro_after_init x86_spec_ctrl_base; -+ - void __init check_bugs(void) - { - identify_boot_cpu(); -@@ -36,6 +42,13 @@ void __init check_bugs(void) - print_cpu_info(&boot_cpu_data); - } - -+ /* -+ * Read the SPEC_CTRL MSR to account for reserved bits which may -+ * have unknown values. -+ */ -+ if (boot_cpu_has(X86_FEATURE_IBRS)) -+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -+ - /* Select the proper spectre mitigation before patching alternatives */ - spectre_v2_select_mitigation(); - -@@ -94,6 +107,21 @@ static const char *spectre_v2_strings[] = { - - static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; - -+void x86_spec_ctrl_set(u64 val) -+{ -+ if (val & ~SPEC_CTRL_IBRS) -+ WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); -+ else -+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); -+} -+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); -+ -+u64 x86_spec_ctrl_get_default(void) -+{ -+ return x86_spec_ctrl_base; -+} -+EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -+ - #ifdef RETPOLINE - static bool spectre_v2_bad_module; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch deleted file mode 100644 index 38255613..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 75a724909e81cd4612490d633ab269495377d332 Mon Sep 17 00:00:00 2001 -From: Wanpeng Li <wanpeng.li@hotmail.com> -Date: Wed, 13 Dec 2017 10:46:40 +0100 -Subject: [PATCH 26/33] KVM: x86: fix escape of guest dr6 to the host -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit efdab992813fb2ed825745625b83c05032e9cda2 upstream. - -syzkaller reported: - - WARNING: CPU: 0 PID: 12927 at arch/x86/kernel/traps.c:780 do_debug+0x222/0x250 - CPU: 0 PID: 12927 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #16 - RIP: 0010:do_debug+0x222/0x250 - Call Trace: - <#DB> - debug+0x3e/0x70 - RIP: 0010:copy_user_enhanced_fast_string+0x10/0x20 - </#DB> - _copy_from_user+0x5b/0x90 - SyS_timer_create+0x33/0x80 - entry_SYSCALL_64_fastpath+0x23/0x9a - -The testcase sets a watchpoint (with perf_event_open) on a buffer that is -passed to timer_create() as the struct sigevent argument. In timer_create(), -copy_from_user()'s rep movsb triggers the BP. The testcase also sets -the debug registers for the guest. - -However, KVM only restores host debug registers when the host has active -watchpoints, which triggers a race condition when running the testcase with -multiple threads. The guest's DR6.BS bit can escape to the host before -another thread invokes timer_create(), and do_debug() complains. - -The fix is to respect do_debug()'s dr6 invariant when leaving KVM. - -Reported-by: Dmitry Vyukov <dvyukov@google.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: David Hildenbrand <david@redhat.com> -Cc: Dmitry Vyukov <dvyukov@google.com> -Reviewed-by: David Hildenbrand <david@redhat.com> -Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/x86.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index d2ea523..af333e1 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -2833,6 +2833,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) - kvm_x86_ops->vcpu_put(vcpu); - kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = rdtsc(); -+ /* -+ * If userspace has set any breakpoints or watchpoints, dr6 is restored -+ * on every vmexit, but if not, we might have a stale dr6 from the -+ * guest. do_debug expects dr6 to be cleared after it runs, do the same. -+ */ -+ set_debugreg(0, 6); - } - - static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch deleted file mode 100644 index 0c447e66..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 6e9bf06ed78e96705b4477bfe0607668860cabfe Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Mon, 4 Dec 2017 20:13:35 -0800 -Subject: [PATCH 026/103] kaiser: fix unlikely error in alloc_ldt_struct() - -An error from kaiser_add_mapping() here is not at all likely, but -Eric Biggers rightly points out that __free_ldt_struct() relies on -new_ldt->size being initialized: move that up. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/ldt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c -index 8331bad..536e6ab 100644 ---- a/arch/x86/kernel/ldt.c -+++ b/arch/x86/kernel/ldt.c -@@ -78,11 +78,11 @@ static struct ldt_struct *alloc_ldt_struct(int size) - - ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size, - __PAGE_KERNEL); -+ new_ldt->size = size; - if (ret) { - __free_ldt_struct(new_ldt); - return NULL; - } -- new_ldt->size = size; - return new_ldt; - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-Implement-array_index_mask_nospec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-Implement-array_index_mask_nospec.patch deleted file mode 100644 index 3731f5b0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-Implement-array_index_mask_nospec.patch +++ /dev/null @@ -1,68 +0,0 @@ -From bc71a58ec0aadad07a49878204eb38273f0c1b9e Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:28 -0800 -Subject: [PATCH 26/42] x86: Implement array_index_mask_nospec - -(cherry picked from commit babdde2698d482b6c0de1eab4f697cf5856c5859) - -array_index_nospec() uses a mask to sanitize user controllable array -indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask -otherwise. While the default array_index_mask_nospec() handles the -carry-bit from the (index - size) result in software. - -The x86 array_index_mask_nospec() does the same, but the carry-bit is -handled in the processor CF flag without conditional instructions in the -control flow. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/barrier.h | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h -index bfb28ca..ca22173 100644 ---- a/arch/x86/include/asm/barrier.h -+++ b/arch/x86/include/asm/barrier.h -@@ -23,6 +23,30 @@ - #define wmb() asm volatile("sfence" ::: "memory") - #endif - -+/** -+ * array_index_mask_nospec() - generate a mask that is ~0UL when the -+ * bounds check succeeds and 0 otherwise -+ * @index: array element index -+ * @size: number of elements in array -+ * -+ * Returns: -+ * 0 - (index < size) -+ */ -+static inline unsigned long array_index_mask_nospec(unsigned long index, -+ unsigned long size) -+{ -+ unsigned long mask; -+ -+ asm ("cmp %1,%2; sbb %0,%0;" -+ :"=r" (mask) -+ :"r"(size),"r" (index) -+ :"cc"); -+ return mask; -+} -+ -+/* Override the default implementation from linux/nospec.h. */ -+#define array_index_mask_nospec array_index_mask_nospec -+ - #ifdef CONFIG_X86_PPRO_FENCE - #define dma_rmb() rmb() - #else --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch deleted file mode 100644 index d0e8ddcb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch +++ /dev/null @@ -1,137 +0,0 @@ -From d9dc73cbf12047f0d0e171366bfb962b3a592e6f Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:19 -0400 -Subject: [PATCH 26/93] x86/bugs, KVM: Support the combination of guest and - host IBRS - -commit 5cf687548705412da47c9cec342fd952d71ed3d5 upstream - -A guest may modify the SPEC_CTRL MSR from the value used by the -kernel. Since the kernel doesn't use IBRS, this means a value of zero is -what is needed in the host. - -But the 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to -the other bits as reserved so the kernel should respect the boot time -SPEC_CTRL value and use that. - -This allows to deal with future extensions to the SPEC_CTRL interface if -any at all. - -Note: This uses wrmsrl() instead of native_wrmsl(). I does not make any -difference as paravirt will over-write the callq *0xfff.. with the wrmsrl -assembler code. - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 10 ++++++++++ - arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++++++++ - arch/x86/kvm/svm.c | 6 ++---- - arch/x86/kvm/vmx.c | 6 ++---- - 4 files changed, 32 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 9ec3d4d..d1c2630 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -228,6 +228,16 @@ enum spectre_v2_mitigation { - extern void x86_spec_ctrl_set(u64); - extern u64 x86_spec_ctrl_get_default(void); - -+/* -+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR -+ * the guest has, while on VMEXIT we restore the host view. This -+ * would be easier if SPEC_CTRL were architecturally maskable or -+ * shadowable for guests but this is not (currently) the case. -+ * Takes the guest view of SPEC_CTRL MSR as a parameter. -+ */ -+extern void x86_spec_ctrl_set_guest(u64); -+extern void x86_spec_ctrl_restore_host(u64); -+ - extern char __indirect_thunk_start[]; - extern char __indirect_thunk_end[]; - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 6ff972a..f5cad2f 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -122,6 +122,24 @@ u64 x86_spec_ctrl_get_default(void) - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); - -+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) -+{ -+ if (!boot_cpu_has(X86_FEATURE_IBRS)) -+ return; -+ if (x86_spec_ctrl_base != guest_spec_ctrl) -+ wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); -+} -+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); -+ -+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) -+{ -+ if (!boot_cpu_has(X86_FEATURE_IBRS)) -+ return; -+ if (x86_spec_ctrl_base != guest_spec_ctrl) -+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -+} -+EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); -+ - #ifdef RETPOLINE - static bool spectre_v2_bad_module; - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 8551a54..a07579f 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -4905,8 +4905,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ -- if (svm->spec_ctrl) -- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); -+ x86_spec_ctrl_set_guest(svm->spec_ctrl); - - asm volatile ( - "push %%" _ASM_BP "; \n\t" -@@ -5018,8 +5017,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - -- if (svm->spec_ctrl) -- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); -+ x86_spec_ctrl_restore_host(svm->spec_ctrl); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 273313f..c386d13 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -8898,8 +8898,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ -- if (vmx->spec_ctrl) -- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); -+ x86_spec_ctrl_set_guest(vmx->spec_ctrl); - - vmx->__launched = vmx->loaded_vmcs->launched; - asm( -@@ -9037,8 +9036,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - -- if (vmx->spec_ctrl) -- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); -+ x86_spec_ctrl_restore_host(vmx->spec_ctrl); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch deleted file mode 100644 index 64e5f55e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch +++ /dev/null @@ -1,686 +0,0 @@ -From 6ceca45ce264990a8831d3e5f7ff6e8c0d10df3a Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 24 Sep 2017 16:59:49 -0700 -Subject: [PATCH 027/103] kaiser: add "nokaiser" boot option, using ALTERNATIVE - -Added "nokaiser" boot option: an early param like "noinvpcid". -Most places now check int kaiser_enabled (#defined 0 when not -CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S -and entry_64_compat.S are using the ALTERNATIVE technique, which -patches in the preferred instructions at runtime. That technique -is tied to x86 cpu features, so X86_FEATURE_KAISER is fabricated. - -Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that, -but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when -nokaiser like when !CONFIG_KAISER, but not setting either when kaiser - -neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL -won't get set in some obscure corner, or something add PGE into CR4. -By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled, -all page table setup which uses pte_pfn() masks it out of the ptes. - -It's slightly shameful that the same declaration versus definition of -kaiser_enabled appears in not one, not two, but in three header files -(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way, -than with #including any of those in any of the others; and did not -feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes -them all, so we shall hear about it if they get out of synch. - -Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER -from kaiser.c; removed the unused native_get_normal_pgd(); removed -the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some -comments. But more interestingly, set CR4.PSE in secondary_startup_64: -the manual is clear that it does not matter whether it's 0 or 1 when -4-level-pts are enabled, but I was distracted to find cr4 different on -BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask(). - -Signed-off-by: Hugh Dickins <hughd@google.com> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 2 ++ - arch/x86/entry/entry_64.S | 15 ++++++------ - arch/x86/include/asm/cpufeatures.h | 3 +++ - arch/x86/include/asm/kaiser.h | 27 ++++++++++++++++------ - arch/x86/include/asm/pgtable.h | 20 +++++++++++----- - arch/x86/include/asm/pgtable_64.h | 13 ++++------- - arch/x86/include/asm/pgtable_types.h | 4 ---- - arch/x86/include/asm/tlbflush.h | 39 ++++++++++++++++++++------------ - arch/x86/kernel/cpu/common.c | 28 ++++++++++++++++++++++- - arch/x86/kernel/espfix_64.c | 3 ++- - arch/x86/kernel/head_64.S | 4 ++-- - arch/x86/mm/init.c | 2 +- - arch/x86/mm/init_64.c | 10 ++++++++ - arch/x86/mm/kaiser.c | 26 +++++++++++++++++---- - arch/x86/mm/pgtable.c | 8 ++----- - arch/x86/mm/tlb.c | 4 +--- - tools/arch/x86/include/asm/cpufeatures.h | 3 +++ - 17 files changed, 146 insertions(+), 65 deletions(-) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index a303387..e2642ec 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -2753,6 +2753,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - - nojitter [IA-64] Disables jitter checking for ITC timers. - -+ nokaiser [X86-64] Disable KAISER isolation of kernel from user. -+ - no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver - - no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 41bf650..bbb38ac 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1079,7 +1079,7 @@ ENTRY(paranoid_entry) - * unconditionally, but we need to find out whether the reverse - * should be done on return (conveyed to paranoid_exit in %ebx). - */ -- movq %cr3, %rax -+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - testl $KAISER_SHADOW_PGD_OFFSET, %eax - jz 2f - orl $2, %ebx -@@ -1111,6 +1111,7 @@ ENTRY(paranoid_exit) - TRACE_IRQS_OFF_DEBUG - TRACE_IRQS_IRETQ_DEBUG - #ifdef CONFIG_KAISER -+ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ - testl $2, %ebx /* SWITCH_USER_CR3 needed? */ - jz paranoid_exit_no_switch - SWITCH_USER_CR3 -@@ -1339,13 +1340,14 @@ ENTRY(nmi) - #ifdef CONFIG_KAISER - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ -- movq %cr3, %rax -+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ - orq x86_cr3_pcid_noflush, %rax - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax - movq %rax, %cr3 -+2: - #endif - call do_nmi - -@@ -1355,8 +1357,7 @@ ENTRY(nmi) - * kernel code that needs user CR3, but do we ever return - * to "user mode" where we need the kernel CR3? - */ -- popq %rax -- mov %rax, %cr3 -+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER - #endif - - /* -@@ -1583,13 +1584,14 @@ end_repeat_nmi: - #ifdef CONFIG_KAISER - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ -- movq %cr3, %rax -+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ - orq x86_cr3_pcid_noflush, %rax - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax - movq %rax, %cr3 -+2: - #endif - - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ -@@ -1601,8 +1603,7 @@ end_repeat_nmi: - * kernel code that needs user CR3, like just just before - * a sysret. - */ -- popq %rax -- mov %rax, %cr3 -+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER - #endif - - testl %ebx, %ebx /* swapgs needed? */ -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index dc50883..20271d6 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -198,6 +198,9 @@ - #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ - -+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ -+ - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ - #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 3dc5f4c..96643a9 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -46,28 +46,33 @@ movq \reg, %cr3 - .endm - - .macro SWITCH_KERNEL_CR3 --pushq %rax -+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER - _SWITCH_TO_KERNEL_CR3 %rax - popq %rax -+8: - .endm - - .macro SWITCH_USER_CR3 --pushq %rax -+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER - _SWITCH_TO_USER_CR3 %rax %al - popq %rax -+8: - .endm - - .macro SWITCH_KERNEL_CR3_NO_STACK --movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) -+ALTERNATIVE "jmp 8f", \ -+ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \ -+ X86_FEATURE_KAISER - _SWITCH_TO_KERNEL_CR3 %rax - movq PER_CPU_VAR(unsafe_stack_register_backup), %rax -+8: - .endm - - #else /* CONFIG_KAISER */ - --.macro SWITCH_KERNEL_CR3 reg -+.macro SWITCH_KERNEL_CR3 - .endm --.macro SWITCH_USER_CR3 reg regb -+.macro SWITCH_USER_CR3 - .endm - .macro SWITCH_KERNEL_CR3_NO_STACK - .endm -@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); - - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; - -+extern int kaiser_enabled; -+#else -+#define kaiser_enabled 0 -+#endif /* CONFIG_KAISER */ -+ -+/* -+ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, -+ * so as to build with tests on kaiser_enabled instead of #ifdefs. -+ */ -+ - /** - * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping - * @addr: the start address of the range -@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsigned long start, unsigned long size); - */ - extern void kaiser_init(void); - --#endif /* CONFIG_KAISER */ -- - #endif /* __ASSEMBLY */ - - #endif /* _ASM_X86_KAISER_H */ -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index 1cee98e..217e83a 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -18,6 +18,12 @@ - #ifndef __ASSEMBLY__ - #include <asm/x86_init.h> - -+#ifdef CONFIG_KAISER -+extern int kaiser_enabled; -+#else -+#define kaiser_enabled 0 -+#endif -+ - void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); - void ptdump_walk_pgd_level_checkwx(void); - -@@ -697,7 +703,7 @@ static inline int pgd_bad(pgd_t pgd) - * page table by accident; it will fault on the first - * instruction it tries to run. See native_set_pgd(). - */ -- if (IS_ENABLED(CONFIG_KAISER)) -+ if (kaiser_enabled) - ignore_flags |= _PAGE_NX; - - return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; -@@ -913,12 +919,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, - */ - static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) - { -- memcpy(dst, src, count * sizeof(pgd_t)); -+ memcpy(dst, src, count * sizeof(pgd_t)); - #ifdef CONFIG_KAISER -- /* Clone the shadow pgd part as well */ -- memcpy(native_get_shadow_pgd(dst), -- native_get_shadow_pgd(src), -- count * sizeof(pgd_t)); -+ if (kaiser_enabled) { -+ /* Clone the shadow pgd part as well */ -+ memcpy(native_get_shadow_pgd(dst), -+ native_get_shadow_pgd(src), -+ count * sizeof(pgd_t)); -+ } - #endif - } - -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index 177caf3..cf68b5c 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); - - static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) - { -+#ifdef CONFIG_DEBUG_VM -+ /* linux/mmdebug.h may not have been included at this point */ -+ BUG_ON(!kaiser_enabled); -+#endif - return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); - } -- --static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) --{ -- return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); --} - #else - static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) - { -@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) - BUILD_BUG_ON(1); - return NULL; - } --static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) --{ -- return pgdp; --} - #endif /* CONFIG_KAISER */ - - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index 7cf2883..f0d9a1a 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -45,11 +45,7 @@ - #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) - #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) - #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) --#ifdef CONFIG_KAISER --#define _PAGE_GLOBAL (_AT(pteval_t, 0)) --#else - #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) --#endif - #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) - #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) - #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 4fff696..13a74f6 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -138,9 +138,11 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) - * to avoid the need for asm/kaiser.h in unexpected places. - */ - #ifdef CONFIG_KAISER -+extern int kaiser_enabled; - extern void kaiser_setup_pcid(void); - extern void kaiser_flush_tlb_on_return_to_user(void); - #else -+#define kaiser_enabled 0 - static inline void kaiser_setup_pcid(void) - { - } -@@ -165,7 +167,7 @@ static inline void __native_flush_tlb(void) - * back: - */ - preempt_disable(); -- if (this_cpu_has(X86_FEATURE_PCID)) -+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) - kaiser_flush_tlb_on_return_to_user(); - native_write_cr3(native_read_cr3()); - preempt_enable(); -@@ -176,20 +178,30 @@ static inline void __native_flush_tlb_global_irq_disabled(void) - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); -- /* clear PGE */ -- native_write_cr4(cr4 & ~X86_CR4_PGE); -- /* write old PGE again and flush TLBs */ -- native_write_cr4(cr4); -+ if (cr4 & X86_CR4_PGE) { -+ /* clear PGE and flush TLB of all entries */ -+ native_write_cr4(cr4 & ~X86_CR4_PGE); -+ /* restore PGE as it was before */ -+ native_write_cr4(cr4); -+ } else { -+ /* -+ * x86_64 microcode update comes this way when CR4.PGE is not -+ * enabled, and it's safer for all callers to allow this case. -+ */ -+ native_write_cr3(native_read_cr3()); -+ } - } - - static inline void __native_flush_tlb_global(void) - { --#ifdef CONFIG_KAISER -- /* Globals are not used at all */ -- __native_flush_tlb(); --#else - unsigned long flags; - -+ if (kaiser_enabled) { -+ /* Globals are not used at all */ -+ __native_flush_tlb(); -+ return; -+ } -+ - if (this_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Using INVPCID is considerably faster than a pair of writes -@@ -209,7 +221,6 @@ static inline void __native_flush_tlb_global(void) - raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); - raw_local_irq_restore(flags); --#endif - } - - static inline void __native_flush_tlb_single(unsigned long addr) -@@ -224,7 +235,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) - */ - - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { -- if (this_cpu_has(X86_FEATURE_PCID)) -+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) - kaiser_flush_tlb_on_return_to_user(); - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - return; -@@ -239,9 +250,9 @@ static inline void __native_flush_tlb_single(unsigned long addr) - * Make sure to do only a single invpcid when KAISER is - * disabled and we have only a single ASID. - */ -- if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) -- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); -- invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); -+ if (kaiser_enabled) -+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); -+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); - } - - static inline void __flush_tlb_all(void) -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index e6be5f3..8b03874 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s) - return 1; - } - __setup("nopcid", x86_pcid_setup); -+ -+static int __init x86_nokaiser_setup(char *s) -+{ -+ /* nokaiser doesn't accept parameters */ -+ if (s) -+ return -EINVAL; -+#ifdef CONFIG_KAISER -+ kaiser_enabled = 0; -+ setup_clear_cpu_cap(X86_FEATURE_KAISER); -+ pr_info("nokaiser: KAISER feature disabled\n"); -+#endif -+ return 0; -+} -+early_param("nokaiser", x86_nokaiser_setup); - #endif - - static int __init x86_noinvpcid_setup(char *s) -@@ -327,7 +341,7 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) - static void setup_pcid(struct cpuinfo_x86 *c) - { - if (cpu_has(c, X86_FEATURE_PCID)) { -- if (cpu_has(c, X86_FEATURE_PGE)) { -+ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) { - cr4_set_bits(X86_CR4_PCIDE); - /* - * INVPCID has two "groups" of types: -@@ -799,6 +813,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c) - c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); - - init_scattered_cpuid_features(c); -+#ifdef CONFIG_KAISER -+ if (kaiser_enabled) -+ set_cpu_cap(c, X86_FEATURE_KAISER); -+#endif - } - - static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) -@@ -1537,6 +1555,14 @@ void cpu_init(void) - * try to read it. - */ - cr4_init_shadow(); -+ if (!kaiser_enabled) { -+ /* -+ * secondary_startup_64() deferred setting PGE in cr4: -+ * probe_page_size_mask() sets it on the boot cpu, -+ * but it needs to be set on each secondary cpu. -+ */ -+ cr4_set_bits(X86_CR4_PGE); -+ } - - /* - * Load microcode on this cpu if a valid microcode is available. -diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c -index 560c2fd..e33b385 100644 ---- a/arch/x86/kernel/espfix_64.c -+++ b/arch/x86/kernel/espfix_64.c -@@ -132,9 +132,10 @@ void __init init_espfix_bsp(void) - * area to ensure it is mapped into the shadow user page - * tables. - */ -- if (IS_ENABLED(CONFIG_KAISER)) -+ if (kaiser_enabled) { - set_pgd(native_get_shadow_pgd(pgd_p), - __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); -+ } - - /* Randomize the locations */ - init_espfix_random(); -diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S -index 5775379..d04479b 100644 ---- a/arch/x86/kernel/head_64.S -+++ b/arch/x86/kernel/head_64.S -@@ -190,8 +190,8 @@ ENTRY(secondary_startup_64) - movq $(init_level4_pgt - __START_KERNEL_map), %rax - 1: - -- /* Enable PAE mode and PGE */ -- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx -+ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */ -+ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx - movq %rcx, %cr4 - - /* Setup early boot stage 4 level pagetables. */ -diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c -index 22af912..05a9855 100644 ---- a/arch/x86/mm/init.c -+++ b/arch/x86/mm/init.c -@@ -177,7 +177,7 @@ static void __init probe_page_size_mask(void) - cr4_set_bits_and_update_boot(X86_CR4_PSE); - - /* Enable PGE if available */ -- if (boot_cpu_has(X86_FEATURE_PGE)) { -+ if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) { - cr4_set_bits_and_update_boot(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } else -diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c -index 14b9dd7..a0e8df6 100644 ---- a/arch/x86/mm/init_64.c -+++ b/arch/x86/mm/init_64.c -@@ -324,6 +324,16 @@ void __init cleanup_highmap(void) - continue; - if (vaddr < (unsigned long) _text || vaddr > end) - set_pmd(pmd, __pmd(0)); -+ else if (kaiser_enabled) { -+ /* -+ * level2_kernel_pgt is initialized with _PAGE_GLOBAL: -+ * clear that now. This is not important, so long as -+ * CR4.PGE remains clear, but it removes an anomaly. -+ * Physical mapping setup below avoids _PAGE_GLOBAL -+ * by use of massage_pgprot() inside pfn_pte() etc. -+ */ -+ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL)); -+ } - } - } - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index cc0950f..11032dc 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -16,7 +16,9 @@ - #include <asm/pgalloc.h> - #include <asm/desc.h> - --#ifdef CONFIG_KAISER -+int kaiser_enabled __read_mostly = 1; -+EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ -+ - __visible - DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - -@@ -167,8 +169,8 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) - return pte_offset_kernel(pmd, address); - } - --int kaiser_add_user_map(const void *__start_addr, unsigned long size, -- unsigned long flags) -+static int kaiser_add_user_map(const void *__start_addr, unsigned long size, -+ unsigned long flags) - { - int ret = 0; - pte_t *pte; -@@ -177,6 +179,15 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, - unsigned long end_addr = PAGE_ALIGN(start_addr + size); - unsigned long target_address; - -+ /* -+ * It is convenient for callers to pass in __PAGE_KERNEL etc, -+ * and there is no actual harm from setting _PAGE_GLOBAL, so -+ * long as CR4.PGE is not set. But it is nonetheless troubling -+ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser" -+ * requires that not to be #defined to 0): so mask it off here. -+ */ -+ flags &= ~_PAGE_GLOBAL; -+ - for (; address < end_addr; address += PAGE_SIZE) { - target_address = get_pa_from_mapping(address); - if (target_address == -1) { -@@ -263,6 +274,8 @@ void __init kaiser_init(void) - { - int cpu; - -+ if (!kaiser_enabled) -+ return; - kaiser_init_all_pgds(); - - for_each_possible_cpu(cpu) { -@@ -311,6 +324,8 @@ void __init kaiser_init(void) - /* Add a mapping to the shadow mapping, and synchronize the mappings */ - int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) - { -+ if (!kaiser_enabled) -+ return 0; - return kaiser_add_user_map((const void *)addr, size, flags); - } - -@@ -322,6 +337,8 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) - unsigned long addr, next; - pgd_t *pgd; - -+ if (!kaiser_enabled) -+ return; - pgd = native_get_shadow_pgd(pgd_offset_k(start)); - for (addr = start; addr < end; pgd++, addr = next) { - next = pgd_addr_end(addr, end); -@@ -343,6 +360,8 @@ static inline bool is_userspace_pgd(pgd_t *pgdp) - - pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) - { -+ if (!kaiser_enabled) -+ return pgd; - /* - * Do we need to also populate the shadow pgd? Check _PAGE_USER to - * skip cases like kexec and EFI which make temporary low mappings. -@@ -399,4 +418,3 @@ void kaiser_flush_tlb_on_return_to_user(void) - X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); - } - EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); --#endif /* CONFIG_KAISER */ -diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index 352fd01..5aaec8e 100644 ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -345,16 +345,12 @@ static inline void _pgd_free(pgd_t *pgd) - } - #else - --#ifdef CONFIG_KAISER - /* -- * Instead of one pmd, we aquire two pmds. Being order-1, it is -+ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. - */ --#define PGD_ALLOCATION_ORDER 1 --#else --#define PGD_ALLOCATION_ORDER 0 --#endif -+#define PGD_ALLOCATION_ORDER kaiser_enabled - - static inline pgd_t *_pgd_alloc(void) - { -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 852c665..fde44bb 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -41,8 +41,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) - { - unsigned long new_mm_cr3 = __pa(pgdir); - --#ifdef CONFIG_KAISER -- if (this_cpu_has(X86_FEATURE_PCID)) { -+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { - /* - * We reuse the same PCID for different tasks, so we must - * flush all the entries for the PCID out when we change tasks. -@@ -59,7 +58,6 @@ static void load_new_mm_cr3(pgd_t *pgdir) - new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; - kaiser_flush_tlb_on_return_to_user(); - } --#endif /* CONFIG_KAISER */ - - /* - * Caution: many callers of this function expect -diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h -index a396292..67c93d9 100644 ---- a/tools/arch/x86/include/asm/cpufeatures.h -+++ b/tools/arch/x86/include/asm/cpufeatures.h -@@ -197,6 +197,9 @@ - #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ - -+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ -+ - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ - #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-Introduce-barrier_nospec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-Introduce-barrier_nospec.patch deleted file mode 100644 index 9b3ea121..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-Introduce-barrier_nospec.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 13c25ff312ecc09941828ec112a11c40debbfef1 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:33 -0800 -Subject: [PATCH 27/42] x86: Introduce barrier_nospec - -(cherry picked from commit b3d7ad85b80bbc404635dca80f5b129f6242bc7a) - -Rename the open coded form of this instruction sequence from -rdtsc_ordered() into a generic barrier primitive, barrier_nospec(). - -One of the mitigations for Spectre variant1 vulnerabilities is to fence -speculative execution after successfully validating a bounds check. I.e. -force the result of a bounds check to resolve in the instruction pipeline -to ensure speculative execution honors that result before potentially -operating on out-of-bounds data. - -No functional changes. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Suggested-by: Andi Kleen <ak@linux.intel.com> -Suggested-by: Ingo Molnar <mingo@redhat.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: Kees Cook <keescook@chromium.org> -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: Al Viro <viro@zeniv.linux.org.uk> -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/barrier.h | 4 ++++ - arch/x86/include/asm/msr.h | 3 +-- - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h -index ca22173..8575903 100644 ---- a/arch/x86/include/asm/barrier.h -+++ b/arch/x86/include/asm/barrier.h -@@ -47,6 +47,10 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - /* Override the default implementation from linux/nospec.h. */ - #define array_index_mask_nospec array_index_mask_nospec - -+/* Prevent speculative execution past this barrier. */ -+#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ -+ "lfence", X86_FEATURE_LFENCE_RDTSC) -+ - #ifdef CONFIG_X86_PPRO_FENCE - #define dma_rmb() rmb() - #else -diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h -index b5fee97..ed35b91 100644 ---- a/arch/x86/include/asm/msr.h -+++ b/arch/x86/include/asm/msr.h -@@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) - * that some other imaginary CPU is updating continuously with a - * time stamp. - */ -- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, -- "lfence", X86_FEATURE_LFENCE_RDTSC); -+ barrier_nospec(); - return rdtsc(); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch deleted file mode 100644 index ef01a1cb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 216ac4ef7d2da59cd2b3d6e34e559c7ef49a143d Mon Sep 17 00:00:00 2001 -From: Arnd Bergmann <arnd@arndb.de> -Date: Wed, 19 Jul 2017 14:53:04 +0200 -Subject: [PATCH 27/33] x86: add MULTIUSER dependency for KVM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit c2ce3f5d89d57301e2756ac325fe2ebc33bfec30 upstream. - -KVM tries to select 'TASKSTATS', which had additional dependencies: - -warning: (KVM) selects TASKSTATS which has unmet direct dependencies (NET && MULTIUSER) - -Signed-off-by: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig -index ab8e32f..66da97d 100644 ---- a/arch/x86/kvm/Kconfig -+++ b/arch/x86/kvm/Kconfig -@@ -22,7 +22,7 @@ config KVM - depends on HAVE_KVM - depends on HIGH_RES_TIMERS - # for TASKSTATS/TASK_DELAY_ACCT: -- depends on NET -+ depends on NET && MULTIUSER - select PREEMPT_NOTIFIERS - select MMU_NOTIFIER - select ANON_INODES --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch deleted file mode 100644 index c058dd8f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch +++ /dev/null @@ -1,148 +0,0 @@ -From a24af5ff013ee664d221b6b4d4933f8317f4facb Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:20 -0400 -Subject: [PATCH 27/93] x86/bugs: Expose /sys/../spec_store_bypass - -commit c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf upstream - -Add the sysfs file for the new vulerability. It does not do much except -show the words 'Vulnerable' for recent x86 cores. - -Intel cores prior to family 6 are known not to be vulnerable, and so are -some Atoms and some Xeon Phi. - -It assumes that older Cyrix, Centaur, etc. cores are immune. - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/kernel/cpu/bugs.c | 5 +++++ - arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++ - drivers/base/cpu.c | 8 ++++++++ - include/linux/cpu.h | 2 ++ - 6 files changed, 40 insertions(+) - -diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu -index dfd56ec..6d75a9c 100644 ---- a/Documentation/ABI/testing/sysfs-devices-system-cpu -+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu -@@ -355,6 +355,7 @@ What: /sys/devices/system/cpu/vulnerabilities - /sys/devices/system/cpu/vulnerabilities/meltdown - /sys/devices/system/cpu/vulnerabilities/spectre_v1 - /sys/devices/system/cpu/vulnerabilities/spectre_v2 -+ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass - Date: January 2018 - Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> - Description: Information about CPU vulnerabilities -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index a248531..a688adb 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -335,5 +335,6 @@ - #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ - #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ - #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ - - #endif /* _ASM_X86_CPUFEATURES_H */ -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index f5cad2f..64e17a9 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -403,4 +403,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c - { - return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); - } -+ -+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); -+} - #endif -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 357c589..4f1050a 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -879,10 +879,33 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - {} - }; - -+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, -+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, -+ { X86_VENDOR_CENTAUR, 5, }, -+ { X86_VENDOR_INTEL, 5, }, -+ { X86_VENDOR_NSC, 5, }, -+ { X86_VENDOR_ANY, 4, }, -+ {} -+}; -+ - static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - { - u64 ia32_cap = 0; - -+ if (!x86_match_cpu(cpu_no_spec_store_bypass)) -+ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); -+ - if (x86_match_cpu(cpu_no_speculation)) - return; - -diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c -index 56b6c85..cbb1cc6 100644 ---- a/drivers/base/cpu.c -+++ b/drivers/base/cpu.c -@@ -519,14 +519,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, - return sprintf(buf, "Not affected\n"); - } - -+ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "Not affected\n"); -+} -+ - static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); - static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); - static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); -+static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); - - static struct attribute *cpu_root_vulnerabilities_attrs[] = { - &dev_attr_meltdown.attr, - &dev_attr_spectre_v1.attr, - &dev_attr_spectre_v2.attr, -+ &dev_attr_spec_store_bypass.attr, - NULL - }; - -diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index 2f475ad..917829b 100644 ---- a/include/linux/cpu.h -+++ b/include/linux/cpu.h -@@ -50,6 +50,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf); - extern ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf); -+extern ssize_t cpu_show_spec_store_bypass(struct device *dev, -+ struct device_attribute *attr, char *buf); - - extern __printf(4, 5) - struct device *cpu_device_create(struct device *parent, void *drvdata, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch deleted file mode 100644 index 5c62ba8b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 7e8b0d6af232b1d642960ca4fb026a70bfaf1206 Mon Sep 17 00:00:00 2001 -From: Arnd Bergmann <arnd@arndb.de> -Date: Wed, 4 Oct 2017 12:28:18 +0200 -Subject: [PATCH 28/33] KVM: add X86_LOCAL_APIC dependency -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit e42eef4ba38806b18c4a74f0c276fb2e0b548173 upstream. - -The rework of the posted interrupt handling broke building without -support for the local APIC: - -ERROR: "boot_cpu_physical_apicid" [arch/x86/kvm/kvm-intel.ko] undefined! - -That configuration is probably not particularly useful anyway, so -we can avoid the randconfig failures by adding a Kconfig dependency. - -Fixes: 8b306e2f3c41 ("KVM: VMX: avoid double list add with VT-d posted interrupts") -Signed-off-by: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/Kconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig -index 66da97d..9150e09 100644 ---- a/arch/x86/kvm/Kconfig -+++ b/arch/x86/kvm/Kconfig -@@ -23,6 +23,7 @@ config KVM - depends on HIGH_RES_TIMERS - # for TASKSTATS/TASK_DELAY_ACCT: - depends on NET && MULTIUSER -+ depends on X86_LOCAL_APIC - select PREEMPT_NOTIFIERS - select MMU_NOTIFIER - select ANON_INODES --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch deleted file mode 100644 index aac56df7..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch +++ /dev/null @@ -1,83 +0,0 @@ -From b26b0d72d0e6506712e9ed45598814ff9e6b188b Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:39 -0800 -Subject: [PATCH 28/42] x86: Introduce __uaccess_begin_nospec() and - uaccess_try_nospec - -(cherry picked from commit b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd) - -For __get_user() paths, do not allow the kernel to speculate on the value -of a user controlled pointer. In addition to the 'stac' instruction for -Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the -access_ok() result to resolve in the pipeline before the CPU might take any -speculative action on the pointer value. Given the cost of 'stac' the -speculation barrier is placed after 'stac' to hopefully overlap the cost of -disabling SMAP with the cost of flushing the instruction pipeline. - -Since __get_user is a major kernel interface that deals with user -controlled pointers, the __uaccess_begin_nospec() mechanism will prevent -speculative execution past an access_ok() permission check. While -speculative execution past access_ok() is not enough to lead to a kernel -memory leak, it is a necessary precondition. - -To be clear, __uaccess_begin_nospec() is addressing a class of potential -problems near __get_user() usages. - -Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used -to protect __get_user(), pointer masking similar to array_index_nospec() -will be used for get_user() since it incorporates a bounds check near the -usage. - -uaccess_try_nospec provides the same mechanism for get_user_try. - -No functional changes. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Suggested-by: Andi Kleen <ak@linux.intel.com> -Suggested-by: Ingo Molnar <mingo@redhat.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: Kees Cook <keescook@chromium.org> -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: Al Viro <viro@zeniv.linux.org.uk> -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/uaccess.h | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h -index faf3687..c917703 100644 ---- a/arch/x86/include/asm/uaccess.h -+++ b/arch/x86/include/asm/uaccess.h -@@ -114,6 +114,11 @@ extern int __get_user_bad(void); - - #define __uaccess_begin() stac() - #define __uaccess_end() clac() -+#define __uaccess_begin_nospec() \ -+({ \ -+ stac(); \ -+ barrier_nospec(); \ -+}) - - /* - * This is a type: either unsigned long, if the argument fits into -@@ -465,6 +470,10 @@ struct __large_struct { unsigned long buf[100]; }; - __uaccess_begin(); \ - barrier(); - -+#define uaccess_try_nospec do { \ -+ current->thread.uaccess_err = 0; \ -+ __uaccess_begin_nospec(); \ -+ - #define uaccess_catch(err) \ - __uaccess_end(); \ - (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch deleted file mode 100644 index 19e234c1..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 516277f549be576a1146ab20f22ab17393a2c53c Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Sat, 28 Apr 2018 22:34:17 +0200 -Subject: [PATCH 28/93] x86/cpufeatures: Add X86_FEATURE_RDS - -commit 0cc5fa00b0a88dad140b4e5c2cead9951ad36822 upstream - -Add the CPU feature bit CPUID.7.0.EDX[31] which indicates whether the CPU -supports Reduced Data Speculation. - -[ tglx: Split it out from a later patch ] - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index a688adb..0c05c6c 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -306,6 +306,7 @@ - #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ - #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ - #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -+#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ - - /* - * BUG word(s) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch deleted file mode 100644 index 4e51c206..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 978e4ec232a9e93c7b378af9b2997e8cf4786a35 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Tue, 2 Jan 2018 14:19:48 +0100 -Subject: [PATCH 028/103] x86/kaiser: Rename and simplify X86_FEATURE_KAISER - handling - -Concentrate it in arch/x86/mm/kaiser.c and use the upstream string "nopti". - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 2 +- - arch/x86/kernel/cpu/common.c | 18 ------------------ - arch/x86/mm/kaiser.c | 20 +++++++++++++++++++- - 3 files changed, 20 insertions(+), 20 deletions(-) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index e2642ec..f5a95f77 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -2753,7 +2753,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - - nojitter [IA-64] Disables jitter checking for ITC timers. - -- nokaiser [X86-64] Disable KAISER isolation of kernel from user. -+ nopti [X86-64] Disable KAISER isolation of kernel from user. - - no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 8b03874..918e447 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -179,20 +179,6 @@ static int __init x86_pcid_setup(char *s) - return 1; - } - __setup("nopcid", x86_pcid_setup); -- --static int __init x86_nokaiser_setup(char *s) --{ -- /* nokaiser doesn't accept parameters */ -- if (s) -- return -EINVAL; --#ifdef CONFIG_KAISER -- kaiser_enabled = 0; -- setup_clear_cpu_cap(X86_FEATURE_KAISER); -- pr_info("nokaiser: KAISER feature disabled\n"); --#endif -- return 0; --} --early_param("nokaiser", x86_nokaiser_setup); - #endif - - static int __init x86_noinvpcid_setup(char *s) -@@ -813,10 +799,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) - c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); - - init_scattered_cpuid_features(c); --#ifdef CONFIG_KAISER -- if (kaiser_enabled) -- set_cpu_cap(c, X86_FEATURE_KAISER); --#endif - } - - static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 11032dc..87cae72 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -274,8 +274,13 @@ void __init kaiser_init(void) - { - int cpu; - -- if (!kaiser_enabled) -+ if (!kaiser_enabled) { -+ setup_clear_cpu_cap(X86_FEATURE_KAISER); - return; -+ } -+ -+ setup_force_cpu_cap(X86_FEATURE_KAISER); -+ - kaiser_init_all_pgds(); - - for_each_possible_cpu(cpu) { -@@ -418,3 +423,16 @@ void kaiser_flush_tlb_on_return_to_user(void) - X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); - } - EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); -+ -+static int __init x86_nokaiser_setup(char *s) -+{ -+ /* nopti doesn't accept parameters */ -+ if (s) -+ return -EINVAL; -+ -+ kaiser_enabled = 0; -+ pr_info("Kernel/User page tables isolation: disabled\n"); -+ -+ return 0; -+} -+early_param("nopti", x86_nokaiser_setup); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch deleted file mode 100644 index b50d5453..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 8e13680f134458dd1b0529ccb636ae5895fa8a4d Mon Sep 17 00:00:00 2001 -From: Wanpeng Li <wanpeng.li@hotmail.com> -Date: Thu, 14 Sep 2017 03:54:16 -0700 -Subject: [PATCH 29/33] KVM: async_pf: Fix #DF due to inject "Page not Present" - and "Page Ready" exceptions simultaneously -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 9a6e7c39810e4a8bc7fc95056cefb40583fe07ef upstream. - -qemu-system-x86-8600 [004] d..1 7205.687530: kvm_entry: vcpu 2 -qemu-system-x86-8600 [004] .... 7205.687532: kvm_exit: reason EXCEPTION_NMI rip 0xffffffffa921297d info ffffeb2c0e44e018 80000b0e -qemu-system-x86-8600 [004] .... 7205.687532: kvm_page_fault: address ffffeb2c0e44e018 error_code 0 -qemu-system-x86-8600 [004] .... 7205.687620: kvm_try_async_get_page: gva = 0xffffeb2c0e44e018, gfn = 0x427e4e -qemu-system-x86-8600 [004] .N.. 7205.687628: kvm_async_pf_not_present: token 0x8b002 gva 0xffffeb2c0e44e018 - kworker/4:2-7814 [004] .... 7205.687655: kvm_async_pf_completed: gva 0xffffeb2c0e44e018 address 0x7fcc30c4e000 -qemu-system-x86-8600 [004] .... 7205.687703: kvm_async_pf_ready: token 0x8b002 gva 0xffffeb2c0e44e018 -qemu-system-x86-8600 [004] d..1 7205.687711: kvm_entry: vcpu 2 - -After running some memory intensive workload in guest, I catch the kworker -which completes the GUP too quickly, and queues an "Page Ready" #PF exception -after the "Page not Present" exception before the next vmentry as the above -trace which will result in #DF injected to guest. - -This patch fixes it by clearing the queue for "Page not Present" if "Page Ready" -occurs before the next vmentry since the GUP has already got the required page -and shadow page table has already been fixed by "Page Ready" handler. - -Cc: Paolo Bonzini <pbonzini@redhat.com> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> -Fixes: 7c90705bf2a3 ("KVM: Inject asynchronous page fault into a PV guest if page is swapped out.") -[Changed indentation and added clearing of injected. - Radim] -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -[port from upstream v4.14-rc1, Don't assign to kvm_queued_exception::injected or - x86_exception::async_page_fault] -Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/x86.c | 34 ++++++++++++++++++++++++++-------- - 1 file changed, 26 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index af333e1..9f0f7e2 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -8370,6 +8370,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) - sizeof(val)); - } - -+static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) -+{ -+ -+ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val, -+ sizeof(u32)); -+} -+ - void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, - struct kvm_async_pf *work) - { -@@ -8396,6 +8403,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, - struct kvm_async_pf *work) - { - struct x86_exception fault; -+ u32 val; - - trace_kvm_async_pf_ready(work->arch.token, work->gva); - if (work->wakeup_all) -@@ -8403,14 +8411,24 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, - else - kvm_del_async_pf_gfn(vcpu, work->arch.gfn); - -- if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && -- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { -- fault.vector = PF_VECTOR; -- fault.error_code_valid = true; -- fault.error_code = 0; -- fault.nested_page_fault = false; -- fault.address = work->arch.token; -- kvm_inject_page_fault(vcpu, &fault); -+ if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && -+ !apf_get_user(vcpu, &val)) { -+ if (val == KVM_PV_REASON_PAGE_NOT_PRESENT && -+ vcpu->arch.exception.pending && -+ vcpu->arch.exception.nr == PF_VECTOR && -+ !apf_put_user(vcpu, 0)) { -+ vcpu->arch.exception.pending = false; -+ vcpu->arch.exception.nr = 0; -+ vcpu->arch.exception.has_error_code = false; -+ vcpu->arch.exception.error_code = 0; -+ } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { -+ fault.vector = PF_VECTOR; -+ fault.error_code_valid = true; -+ fault.error_code = 0; -+ fault.nested_page_fault = false; -+ fault.address = work->arch.token; -+ kvm_inject_page_fault(vcpu, &fault); -+ } - } - vcpu->arch.apf.halted = false; - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch deleted file mode 100644 index 15084ab2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch +++ /dev/null @@ -1,272 +0,0 @@ -From b3c238b8a317093dd74e635d553271f2c56cb8c3 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:21 -0400 -Subject: [PATCH 29/93] x86/bugs: Provide boot parameters for the - spec_store_bypass_disable mitigation - -commit 24f7fc83b9204d20f878c57cb77d261ae825e033 upstream - -Contemporary high performance processors use a common industry-wide -optimization known as "Speculative Store Bypass" in which loads from -addresses to which a recent store has occurred may (speculatively) see an -older value. Intel refers to this feature as "Memory Disambiguation" which -is part of their "Smart Memory Access" capability. - -Memory Disambiguation can expose a cache side-channel attack against such -speculatively read values. An attacker can create exploit code that allows -them to read memory outside of a sandbox environment (for example, -malicious JavaScript in a web page), or to perform more complex attacks -against code running within the same privilege level, e.g. via the stack. - -As a first step to mitigate against such attacks, provide two boot command -line control knobs: - - nospec_store_bypass_disable - spec_store_bypass_disable=[off,auto,on] - -By default affected x86 processors will power on with Speculative -Store Bypass enabled. Hence the provided kernel parameters are written -from the point of view of whether to enable a mitigation or not. -The parameters are as follows: - - - auto - Kernel detects whether your CPU model contains an implementation - of Speculative Store Bypass and picks the most appropriate - mitigation. - - - on - disable Speculative Store Bypass - - off - enable Speculative Store Bypass - -[ tglx: Reordered the checks so that the whole evaluation is not done - when the CPU does not support RDS ] - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 33 +++++++++++ - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/nospec-branch.h | 6 ++ - arch/x86/kernel/cpu/bugs.c | 103 +++++++++++++++++++++++++++++++++++ - 4 files changed, 143 insertions(+) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 4b438e4..348ca9d 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -2686,6 +2686,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - allow data leaks with this option, which is equivalent - to spectre_v2=off. - -+ nospec_store_bypass_disable -+ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability -+ - noxsave [BUGS=X86] Disables x86 extended register state save - and restore using xsave. The kernel will fallback to - enabling legacy floating-point and sse state. -@@ -3962,6 +3965,36 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - Not specifying this option is equivalent to - spectre_v2=auto. - -+ spec_store_bypass_disable= -+ [HW] Control Speculative Store Bypass (SSB) Disable mitigation -+ (Speculative Store Bypass vulnerability) -+ -+ Certain CPUs are vulnerable to an exploit against a -+ a common industry wide performance optimization known -+ as "Speculative Store Bypass" in which recent stores -+ to the same memory location may not be observed by -+ later loads during speculative execution. The idea -+ is that such stores are unlikely and that they can -+ be detected prior to instruction retirement at the -+ end of a particular speculation execution window. -+ -+ In vulnerable processors, the speculatively forwarded -+ store can be used in a cache side channel attack, for -+ example to read memory to which the attacker does not -+ directly have access (e.g. inside sandboxed code). -+ -+ This parameter controls whether the Speculative Store -+ Bypass optimization is used. -+ -+ on - Unconditionally disable Speculative Store Bypass -+ off - Unconditionally enable Speculative Store Bypass -+ auto - Kernel detects whether the CPU model contains an -+ implementation of Speculative Store Bypass and -+ picks the most appropriate mitigation -+ -+ Not specifying this option is equivalent to -+ spec_store_bypass_disable=auto. -+ - spia_io_base= [HW,MTD] - spia_fio_base= - spia_pedr= -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 0c05c6c..013f3de 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -204,6 +204,7 @@ - - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ - #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index d1c2630..7b9eacf 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -238,6 +238,12 @@ extern u64 x86_spec_ctrl_get_default(void); - extern void x86_spec_ctrl_set_guest(u64); - extern void x86_spec_ctrl_restore_host(u64); - -+/* The Speculative Store Bypass disable variants */ -+enum ssb_mitigation { -+ SPEC_STORE_BYPASS_NONE, -+ SPEC_STORE_BYPASS_DISABLE, -+}; -+ - extern char __indirect_thunk_start[]; - extern char __indirect_thunk_end[]; - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 64e17a9..75146d9 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -26,6 +26,7 @@ - #include <asm/intel-family.h> - - static void __init spectre_v2_select_mitigation(void); -+static void __init ssb_select_mitigation(void); - - /* - * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any -@@ -52,6 +53,12 @@ void __init check_bugs(void) - /* Select the proper spectre mitigation before patching alternatives */ - spectre_v2_select_mitigation(); - -+ /* -+ * Select proper mitigation for any exposure to the Speculative Store -+ * Bypass vulnerability. -+ */ -+ ssb_select_mitigation(); -+ - #ifdef CONFIG_X86_32 - /* - * Check whether we are able to run this kernel safely on SMP. -@@ -357,6 +364,99 @@ static void __init spectre_v2_select_mitigation(void) - } - - #undef pr_fmt -+#define pr_fmt(fmt) "Speculative Store Bypass: " fmt -+ -+static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; -+ -+/* The kernel command line selection */ -+enum ssb_mitigation_cmd { -+ SPEC_STORE_BYPASS_CMD_NONE, -+ SPEC_STORE_BYPASS_CMD_AUTO, -+ SPEC_STORE_BYPASS_CMD_ON, -+}; -+ -+static const char *ssb_strings[] = { -+ [SPEC_STORE_BYPASS_NONE] = "Vulnerable", -+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" -+}; -+ -+static const struct { -+ const char *option; -+ enum ssb_mitigation_cmd cmd; -+} ssb_mitigation_options[] = { -+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ -+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ -+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -+}; -+ -+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) -+{ -+ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; -+ char arg[20]; -+ int ret, i; -+ -+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { -+ return SPEC_STORE_BYPASS_CMD_NONE; -+ } else { -+ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", -+ arg, sizeof(arg)); -+ if (ret < 0) -+ return SPEC_STORE_BYPASS_CMD_AUTO; -+ -+ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { -+ if (!match_option(arg, ret, ssb_mitigation_options[i].option)) -+ continue; -+ -+ cmd = ssb_mitigation_options[i].cmd; -+ break; -+ } -+ -+ if (i >= ARRAY_SIZE(ssb_mitigation_options)) { -+ pr_err("unknown option (%s). Switching to AUTO select\n", arg); -+ return SPEC_STORE_BYPASS_CMD_AUTO; -+ } -+ } -+ -+ return cmd; -+} -+ -+static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) -+{ -+ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; -+ enum ssb_mitigation_cmd cmd; -+ -+ if (!boot_cpu_has(X86_FEATURE_RDS)) -+ return mode; -+ -+ cmd = ssb_parse_cmdline(); -+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && -+ (cmd == SPEC_STORE_BYPASS_CMD_NONE || -+ cmd == SPEC_STORE_BYPASS_CMD_AUTO)) -+ return mode; -+ -+ switch (cmd) { -+ case SPEC_STORE_BYPASS_CMD_AUTO: -+ case SPEC_STORE_BYPASS_CMD_ON: -+ mode = SPEC_STORE_BYPASS_DISABLE; -+ break; -+ case SPEC_STORE_BYPASS_CMD_NONE: -+ break; -+ } -+ -+ if (mode != SPEC_STORE_BYPASS_NONE) -+ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); -+ return mode; -+} -+ -+static void ssb_select_mitigation() -+{ -+ ssb_mode = __ssb_select_mitigation(); -+ -+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) -+ pr_info("%s\n", ssb_strings[ssb_mode]); -+} -+ -+#undef pr_fmt - - #ifdef CONFIG_SYSFS - -@@ -382,6 +482,9 @@ ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); - -+ case X86_BUG_SPEC_STORE_BYPASS: -+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); -+ - default: - break; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-kaiser-Check-boottime-cmdline-params.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-kaiser-Check-boottime-cmdline-params.patch deleted file mode 100644 index af4df0b0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-kaiser-Check-boottime-cmdline-params.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 2ab17d5c490effaa13892d4fea0fe1970ede2e0a Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Tue, 2 Jan 2018 14:19:48 +0100 -Subject: [PATCH 029/103] x86/kaiser: Check boottime cmdline params - -AMD (and possibly other vendors) are not affected by the leak -KAISER is protecting against. - -Keep the "nopti" for traditional reasons and add pti=<on|off|auto> -like upstream. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 6 ++++ - arch/x86/mm/kaiser.c | 59 ++++++++++++++++++++++++++----------- - 2 files changed, 47 insertions(+), 18 deletions(-) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index f5a95f77..9f04c53 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -3317,6 +3317,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - pt. [PARIDE] - See Documentation/blockdev/paride.txt. - -+ pti= [X86_64] -+ Control KAISER user/kernel address space isolation: -+ on - enable -+ off - disable -+ auto - default setting -+ - pty.legacy_count= - [KNL] Number of legacy pty's. Overwrites compiled-in - default number. -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 87cae72..1840aa0 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -15,6 +15,7 @@ - #include <asm/pgtable.h> - #include <asm/pgalloc.h> - #include <asm/desc.h> -+#include <asm/cmdline.h> - - int kaiser_enabled __read_mostly = 1; - EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ -@@ -263,6 +264,43 @@ static void __init kaiser_init_all_pgds(void) - WARN_ON(__ret); \ - } while (0) - -+void __init kaiser_check_boottime_disable(void) -+{ -+ bool enable = true; -+ char arg[5]; -+ int ret; -+ -+ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); -+ if (ret > 0) { -+ if (!strncmp(arg, "on", 2)) -+ goto enable; -+ -+ if (!strncmp(arg, "off", 3)) -+ goto disable; -+ -+ if (!strncmp(arg, "auto", 4)) -+ goto skip; -+ } -+ -+ if (cmdline_find_option_bool(boot_command_line, "nopti")) -+ goto disable; -+ -+skip: -+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) -+ goto disable; -+ -+enable: -+ if (enable) -+ setup_force_cpu_cap(X86_FEATURE_KAISER); -+ -+ return; -+ -+disable: -+ pr_info("Kernel/User page tables isolation: disabled\n"); -+ kaiser_enabled = 0; -+ setup_clear_cpu_cap(X86_FEATURE_KAISER); -+} -+ - /* - * If anything in here fails, we will likely die on one of the - * first kernel->user transitions and init will die. But, we -@@ -274,12 +312,10 @@ void __init kaiser_init(void) - { - int cpu; - -- if (!kaiser_enabled) { -- setup_clear_cpu_cap(X86_FEATURE_KAISER); -- return; -- } -+ kaiser_check_boottime_disable(); - -- setup_force_cpu_cap(X86_FEATURE_KAISER); -+ if (!kaiser_enabled) -+ return; - - kaiser_init_all_pgds(); - -@@ -423,16 +459,3 @@ void kaiser_flush_tlb_on_return_to_user(void) - X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); - } - EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); -- --static int __init x86_nokaiser_setup(char *s) --{ -- /* nopti doesn't accept parameters */ -- if (s) -- return -EINVAL; -- -- kaiser_enabled = 0; -- pr_info("Kernel/User page tables isolation: disabled\n"); -- -- return 0; --} --early_param("nopti", x86_nokaiser_setup); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch deleted file mode 100644 index a27e1b16..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 73e4bfd188d510a576ca75964cd7939d97171e1f Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:44 -0800 -Subject: [PATCH 29/42] x86/usercopy: Replace open coded stac/clac with - __uaccess_{begin, end} - -(cherry picked from commit b5c4ae4f35325d520b230bab6eb3310613b72ac1) - -In preparation for converting some __uaccess_begin() instances to -__uacess_begin_nospec(), make sure all 'from user' uaccess paths are -using the _begin(), _end() helpers rather than open-coded stac() and -clac(). - -No functional changes. - -Suggested-by: Ingo Molnar <mingo@redhat.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: Kees Cook <keescook@chromium.org> -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: Al Viro <viro@zeniv.linux.org.uk> -Cc: torvalds@linux-foundation.org -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727416438.33451.17309465232057176966.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/lib/usercopy_32.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c -index 3bc7baf..9b5fa0f 100644 ---- a/arch/x86/lib/usercopy_32.c -+++ b/arch/x86/lib/usercopy_32.c -@@ -570,12 +570,12 @@ do { \ - unsigned long __copy_to_user_ll(void __user *to, const void *from, - unsigned long n) - { -- stac(); -+ __uaccess_begin(); - if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); - else - n = __copy_user_intel(to, from, n); -- clac(); -+ __uaccess_end(); - return n; - } - EXPORT_SYMBOL(__copy_to_user_ll); -@@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); - unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, - unsigned long n) - { -- stac(); -+ __uaccess_begin(); - #ifdef CONFIG_X86_INTEL_USERCOPY - if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) - n = __copy_user_intel_nocache(to, from, n); -@@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr - #else - __copy_user(to, from, n); - #endif -- clac(); -+ __uaccess_end(); - return n; - } - EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch deleted file mode 100644 index fefa3aac..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 9d11f29130341345dee37007dd76b9c4e83956a9 Mon Sep 17 00:00:00 2001 -From: Jan Dakinevich <jan.dakinevich@gmail.com> -Date: Fri, 23 Feb 2018 11:42:17 +0100 -Subject: [PATCH 30/33] KVM: VMX: clean up declaration of VPID/EPT invalidation - types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 63f3ac48133a19110c8a3666028dbd9b1bf3dcb3 upstream - -- Remove VMX_EPT_EXTENT_INDIVIDUAL_ADDR, since there is no such type of - EPT invalidation - - - Add missing VPID types names - -Signed-off-by: Jan Dakinevich <jan.dakinevich@gmail.com> -Tested-by: Ladi Prosek <lprosek@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -[jwang: port to 4.4] -Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/vmx.h | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h -index a002b07..6899cf1 100644 ---- a/arch/x86/include/asm/vmx.h -+++ b/arch/x86/include/asm/vmx.h -@@ -399,10 +399,11 @@ enum vmcs_field { - #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2) - - #define VMX_NR_VPIDS (1 << 16) -+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0 - #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 - #define VMX_VPID_EXTENT_ALL_CONTEXT 2 -+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3 - --#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 - #define VMX_EPT_EXTENT_CONTEXT 1 - #define VMX_EPT_EXTENT_GLOBAL 2 - #define VMX_EPT_EXTENT_SHIFT 24 -@@ -419,8 +420,10 @@ enum vmcs_field { - #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) - - #define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */ -+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */ - #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ - #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ -+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ - - #define VMX_EPT_DEFAULT_GAW 3 - #define VMX_EPT_MAX_GAW 0x4 --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch deleted file mode 100644 index f0bb8bea..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 1aa1f0422243ae2e5e8b07d7e2c4004544d39727 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Tue, 3 Oct 2017 20:49:04 -0700 -Subject: [PATCH 030/103] kaiser: use ALTERNATIVE instead of - x86_cr3_pcid_noflush - -Now that we're playing the ALTERNATIVE game, use that more efficient -method: instead of user-mapping an extra page, and reading an extra -cacheline each time for x86_cr3_pcid_noflush. - -Neel has found that __stringify(bts $X86_CR3_PCID_NOFLUSH_BIT, %rax) -is a working substitute for the "bts $63, %rax" in these ALTERNATIVEs; -but the one line with $63 in looks clearer, so let's stick with that. - -Worried about what happens with an ALTERNATIVE between the jump and -jump label in another ALTERNATIVE? I was, but have checked the -combinations in SWITCH_KERNEL_CR3_NO_STACK at entry_SYSCALL_64, -and it does a good job. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 7 ++++--- - arch/x86/include/asm/kaiser.h | 6 +++--- - arch/x86/mm/kaiser.c | 11 +---------- - 3 files changed, 8 insertions(+), 16 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index bbb38ac..d4ba81e 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1084,7 +1084,8 @@ ENTRY(paranoid_entry) - jz 2f - orl $2, %ebx - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -- orq x86_cr3_pcid_noflush, %rax -+ /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ -+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID - movq %rax, %cr3 - 2: - #endif -@@ -1342,7 +1343,7 @@ ENTRY(nmi) - /* %rax is saved above, so OK to clobber here */ - ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ -- orq x86_cr3_pcid_noflush, %rax -+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -@@ -1586,7 +1587,7 @@ end_repeat_nmi: - /* %rax is saved above, so OK to clobber here */ - ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ -- orq x86_cr3_pcid_noflush, %rax -+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 96643a9..906150d 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -25,7 +25,8 @@ - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg --orq x86_cr3_pcid_noflush, \reg -+/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */ -+ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID - movq \reg, %cr3 - .endm - -@@ -39,7 +40,7 @@ movq \reg, %cr3 - movq %cr3, \reg - orq PER_CPU_VAR(x86_cr3_pcid_user), \reg - js 9f --/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */ -+/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */ - movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7) - 9: - movq \reg, %cr3 -@@ -90,7 +91,6 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - */ - DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - --extern unsigned long x86_cr3_pcid_noflush; - DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); - - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 1840aa0..b8aa9ad 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -31,7 +31,6 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - * This is also handy because systems that do not support PCIDs - * just end up or'ing a 0 into their CR3, which does no harm. - */ --unsigned long x86_cr3_pcid_noflush __read_mostly; - DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user); - - /* -@@ -356,10 +355,6 @@ void __init kaiser_init(void) - kaiser_add_user_map_early(&debug_idt_table, - sizeof(gate_desc) * NR_VECTORS, - __PAGE_KERNEL); -- -- kaiser_add_user_map_early(&x86_cr3_pcid_noflush, -- sizeof(x86_cr3_pcid_noflush), -- __PAGE_KERNEL); - } - - /* Add a mapping to the shadow mapping, and synchronize the mappings */ -@@ -433,18 +428,14 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) - - void kaiser_setup_pcid(void) - { -- unsigned long kern_cr3 = 0; - unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; - -- if (this_cpu_has(X86_FEATURE_PCID)) { -- kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH; -+ if (this_cpu_has(X86_FEATURE_PCID)) - user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; -- } - /* - * These variables are used by the entry/exit - * code to change PCID and pgd and TLB flushing. - */ -- x86_cr3_pcid_noflush = kern_cr3; - this_cpu_write(x86_cr3_pcid_user, user_cr3); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch deleted file mode 100644 index d4c39c90..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch +++ /dev/null @@ -1,183 +0,0 @@ -From 58645a84abdc201b048cc16d3e1e500884ca452b Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:22 -0400 -Subject: [PATCH 30/93] x86/bugs/intel: Set proper CPU features and setup RDS - -commit 772439717dbf703b39990be58d8d4e3e4ad0598a upstream - -Intel CPUs expose methods to: - - - Detect whether RDS capability is available via CPUID.7.0.EDX[31], - - - The SPEC_CTRL MSR(0x48), bit 2 set to enable RDS. - - - MSR_IA32_ARCH_CAPABILITIES, Bit(4) no need to enable RRS. - -With that in mind if spec_store_bypass_disable=[auto,on] is selected set at -boot-time the SPEC_CTRL MSR to enable RDS if the platform requires it. - -Note that this does not fix the KVM case where the SPEC_CTRL is exposed to -guests which can muck with it, see patch titled : - KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS. - -And for the firmware (IBRS to be set), see patch titled: - x86/spectre_v2: Read SPEC_CTRL MSR during boot and re-use reserved bits - -[ tglx: Distangled it from the intel implementation and kept the call order ] - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/msr-index.h | 6 ++++++ - arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++-- - arch/x86/kernel/cpu/common.c | 10 ++++++---- - arch/x86/kernel/cpu/cpu.h | 3 +++ - arch/x86/kernel/cpu/intel.c | 1 + - 5 files changed, 44 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index 0e4da8e..9f014c1 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -40,6 +40,7 @@ - #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ - #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ - #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -+#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ - - #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ - #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ -@@ -61,6 +62,11 @@ - #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a - #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ - #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -+#define ARCH_CAP_RDS_NO (1 << 4) /* -+ * Not susceptible to Speculative Store Bypass -+ * attack, so no Reduced Data Speculation control -+ * required. -+ */ - - #define MSR_IA32_BBL_CR_CTL 0x00000119 - #define MSR_IA32_BBL_CR_CTL3 0x0000011e -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 75146d9..7dd16f4 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -116,7 +116,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; - - void x86_spec_ctrl_set(u64 val) - { -- if (val & ~SPEC_CTRL_IBRS) -+ if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) - WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); - else - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); -@@ -443,8 +443,28 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - break; - } - -- if (mode != SPEC_STORE_BYPASS_NONE) -+ /* -+ * We have three CPU feature flags that are in play here: -+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. -+ * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass -+ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation -+ */ -+ if (mode != SPEC_STORE_BYPASS_NONE) { - setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); -+ /* -+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses -+ * a completely different MSR and bit dependent on family. -+ */ -+ switch (boot_cpu_data.x86_vendor) { -+ case X86_VENDOR_INTEL: -+ x86_spec_ctrl_base |= SPEC_CTRL_RDS; -+ x86_spec_ctrl_set(SPEC_CTRL_RDS); -+ break; -+ case X86_VENDOR_AMD: -+ break; -+ } -+ } -+ - return mode; - } - -@@ -458,6 +478,12 @@ static void ssb_select_mitigation() - - #undef pr_fmt - -+void x86_spec_ctrl_setup_ap(void) -+{ -+ if (boot_cpu_has(X86_FEATURE_IBRS)) -+ x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); -+} -+ - #ifdef CONFIG_SYSFS - - ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 4f1050a..ab6b3ad 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -903,7 +903,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - { - u64 ia32_cap = 0; - -- if (!x86_match_cpu(cpu_no_spec_store_bypass)) -+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) -+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); -+ -+ if (!x86_match_cpu(cpu_no_spec_store_bypass) && -+ !(ia32_cap & ARCH_CAP_RDS_NO)) - setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - - if (x86_match_cpu(cpu_no_speculation)) -@@ -915,9 +919,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - if (x86_match_cpu(cpu_no_meltdown)) - return; - -- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) -- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); -- - /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) - return; -@@ -1339,6 +1340,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) - #endif - mtrr_ap_init(); - validate_apic_and_package_id(c); -+ x86_spec_ctrl_setup_ap(); - } - - struct msr_range { -diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h -index 2584265..3b19d82 100644 ---- a/arch/x86/kernel/cpu/cpu.h -+++ b/arch/x86/kernel/cpu/cpu.h -@@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], - - extern void get_cpu_cap(struct cpuinfo_x86 *c); - extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); -+ -+extern void x86_spec_ctrl_setup_ap(void); -+ - #endif /* ARCH_X86_CPU_H */ -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 8fb1d65..f15aea6 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -154,6 +154,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) - setup_clear_cpu_cap(X86_FEATURE_STIBP); - setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); - setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); -+ setup_clear_cpu_cap(X86_FEATURE_RDS); - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch deleted file mode 100644 index fab4948d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch +++ /dev/null @@ -1,196 +0,0 @@ -From 268e7abcab638b44ca26107c32bf0c2df0a5b678 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:49 -0800 -Subject: [PATCH 30/42] x86/uaccess: Use __uaccess_begin_nospec() and - uaccess_try_nospec - -(cherry picked from commit 304ec1b050310548db33063e567123fae8fd0301) - -Quoting Linus: - - I do think that it would be a good idea to very expressly document - the fact that it's not that the user access itself is unsafe. I do - agree that things like "get_user()" want to be protected, but not - because of any direct bugs or problems with get_user() and friends, - but simply because get_user() is an excellent source of a pointer - that is obviously controlled from a potentially attacking user - space. So it's a prime candidate for then finding _subsequent_ - accesses that can then be used to perturb the cache. - -__uaccess_begin_nospec() covers __get_user() and copy_from_iter() where the -limit check is far away from the user pointer de-reference. In those cases -a barrier_nospec() prevents speculation with a potential pointer to -privileged memory. uaccess_try_nospec covers get_user_try. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Suggested-by: Andi Kleen <ak@linux.intel.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: Kees Cook <keescook@chromium.org> -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: Al Viro <viro@zeniv.linux.org.uk> -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727416953.33451.10508284228526170604.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/uaccess.h | 6 +++--- - arch/x86/include/asm/uaccess_32.h | 12 ++++++------ - arch/x86/include/asm/uaccess_64.h | 12 ++++++------ - arch/x86/lib/usercopy_32.c | 4 ++-- - 4 files changed, 17 insertions(+), 17 deletions(-) - -diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h -index c917703..f80021b 100644 ---- a/arch/x86/include/asm/uaccess.h -+++ b/arch/x86/include/asm/uaccess.h -@@ -428,7 +428,7 @@ do { \ - ({ \ - int __gu_err; \ - __inttype(*(ptr)) __gu_val; \ -- __uaccess_begin(); \ -+ __uaccess_begin_nospec(); \ - __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ - __uaccess_end(); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ -@@ -538,7 +538,7 @@ struct __large_struct { unsigned long buf[100]; }; - * get_user_ex(...); - * } get_user_catch(err) - */ --#define get_user_try uaccess_try -+#define get_user_try uaccess_try_nospec - #define get_user_catch(err) uaccess_catch(err) - - #define get_user_ex(x, ptr) do { \ -@@ -573,7 +573,7 @@ extern void __cmpxchg_wrong_size(void) - __typeof__(ptr) __uval = (uval); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ -- __uaccess_begin(); \ -+ __uaccess_begin_nospec(); \ - switch (size) { \ - case 1: \ - { \ -diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h -index 7d3bdd1..d6d2450 100644 ---- a/arch/x86/include/asm/uaccess_32.h -+++ b/arch/x86/include/asm/uaccess_32.h -@@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) - - switch (n) { - case 1: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_size(*(u8 *)to, from, 1, ret, 1); - __uaccess_end(); - return ret; - case 2: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_size(*(u16 *)to, from, 2, ret, 2); - __uaccess_end(); - return ret; - case 4: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_size(*(u32 *)to, from, 4, ret, 4); - __uaccess_end(); - return ret; -@@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, - - switch (n) { - case 1: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_size(*(u8 *)to, from, 1, ret, 1); - __uaccess_end(); - return ret; - case 2: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_size(*(u16 *)to, from, 2, ret, 2); - __uaccess_end(); - return ret; - case 4: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_size(*(u32 *)to, from, 4, ret, 4); - __uaccess_end(); - return ret; -diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h -index 673059a..6e5cc08 100644 ---- a/arch/x86/include/asm/uaccess_64.h -+++ b/arch/x86/include/asm/uaccess_64.h -@@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) - return copy_user_generic(dst, (__force void *)src, size); - switch (size) { - case 1: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_asm(*(u8 *)dst, (u8 __user *)src, - ret, "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_asm(*(u16 *)dst, (u16 __user *)src, - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_asm(*(u32 *)dst, (u32 __user *)src, - ret, "l", "k", "=r", 4); - __uaccess_end(); - return ret; - case 8: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_asm(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - case 10: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_asm(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 10); - if (likely(!ret)) -@@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) - __uaccess_end(); - return ret; - case 16: -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - __get_user_asm(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); - if (likely(!ret)) -diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c -index 9b5fa0f..5c06dbf 100644 ---- a/arch/x86/lib/usercopy_32.c -+++ b/arch/x86/lib/usercopy_32.c -@@ -570,7 +570,7 @@ do { \ - unsigned long __copy_to_user_ll(void __user *to, const void *from, - unsigned long n) - { -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); - else -@@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); - unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, - unsigned long n) - { -- __uaccess_begin(); -+ __uaccess_begin_nospec(); - #ifdef CONFIG_X86_INTEL_USERCOPY - if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) - n = __copy_user_intel_nocache(to, from, n); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch deleted file mode 100644 index e96f0d9b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 1d5388c0b1e6eef66d7999451bb22cddf4cc5546 Mon Sep 17 00:00:00 2001 -From: Jan Dakinevich <jan.dakinevich@gmail.com> -Date: Fri, 23 Feb 2018 11:42:18 +0100 -Subject: [PATCH 31/33] KVM: nVMX: invvpid handling improvements -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit bcdde302b8268ef7dbc4ddbdaffb5b44eafe9a1e upstream - - - Expose all invalidation types to the L1 - - - Reject invvpid instruction, if L1 passed zero vpid value to single - context invalidations - -Signed-off-by: Jan Dakinevich <jan.dakinevich@gmail.com> -Tested-by: Ladi Prosek <lprosek@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -[jwang: port to 4.4] -Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 36 ++++++++++++++++++++++++------------ - 1 file changed, 24 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 85078c7..f6c0568 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -142,6 +142,12 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); - - #define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 - -+#define VMX_VPID_EXTENT_SUPPORTED_MASK \ -+ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \ -+ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \ -+ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \ -+ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT) -+ - /* - * These 2 parameters are used to config the controls for Pause-Loop Exiting: - * ple_gap: upper bound on the amount of time between two successive -@@ -2836,8 +2842,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) - */ - if (enable_vpid) - vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | -- VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | -- VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; -+ VMX_VPID_EXTENT_SUPPORTED_MASK; - else - vmx->nested.nested_vmx_vpid_caps = 0; - -@@ -7671,7 +7676,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) - vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - -- types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; -+ types = (vmx->nested.nested_vmx_vpid_caps & -+ VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; - - if (type >= 32 || !(types & (1 << type))) { - nested_vmx_failValid(vcpu, -@@ -7693,21 +7699,27 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) - } - - switch (type) { -+ case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: - case VMX_VPID_EXTENT_SINGLE_CONTEXT: -- /* -- * Old versions of KVM use the single-context version so we -- * have to support it; just treat it the same as all-context. -- */ -+ case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: -+ if (!vpid) { -+ nested_vmx_failValid(vcpu, -+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); -+ skip_emulated_instruction(vcpu); -+ return 1; -+ } -+ break; - case VMX_VPID_EXTENT_ALL_CONTEXT: -- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); -- nested_vmx_succeed(vcpu); - break; - default: -- /* Trap individual address invalidation invvpid calls */ -- BUG_ON(1); -- break; -+ WARN_ON_ONCE(1); -+ skip_emulated_instruction(vcpu); -+ return 1; - } - -+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02); -+ nested_vmx_succeed(vcpu); -+ - skip_emulated_instruction(vcpu); - return 1; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch deleted file mode 100644 index d78da68d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 37261333171ffce6be264a70a6a8db11616b2d6f Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 29 Oct 2017 11:36:19 -0700 -Subject: [PATCH 031/103] kaiser: drop is_atomic arg to kaiser_pagetable_walk() - -I have not observed a might_sleep() warning from setup_fixmap_gdt()'s -use of kaiser_add_mapping() in our tree (why not?), but like upstream -we have not provided a way for that to pass is_atomic true down to -kaiser_pagetable_walk(), and at startup it's far from a likely source -of trouble: so just delete the walk's is_atomic arg and might_sleep(). - -Signed-off-by: Hugh Dickins <hughd@google.com> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index b8aa9ad..65ac3fd 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -107,19 +107,13 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) - * - * Returns a pointer to a PTE on success, or NULL on failure. - */ --static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) -+static pte_t *kaiser_pagetable_walk(unsigned long address) - { - pmd_t *pmd; - pud_t *pud; - pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); - gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - -- if (is_atomic) { -- gfp &= ~GFP_KERNEL; -- gfp |= __GFP_HIGH | __GFP_ATOMIC; -- } else -- might_sleep(); -- - if (pgd_none(*pgd)) { - WARN_ONCE(1, "All shadow pgds should have been populated"); - return NULL; -@@ -194,7 +188,7 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, - ret = -EIO; - break; - } -- pte = kaiser_pagetable_walk(address, false); -+ pte = kaiser_pagetable_walk(address); - if (!pte) { - ret = -ENOMEM; - break; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch deleted file mode 100644 index ff00b421..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 9b78406df0ca3d21903d71f41b64a793dad76cfc Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:23 -0400 -Subject: [PATCH 31/93] x86/bugs: Whitelist allowed SPEC_CTRL MSR values - -commit 1115a859f33276fe8afb31c60cf9d8e657872558 upstream - -Intel and AMD SPEC_CTRL (0x48) MSR semantics may differ in the -future (or in fact use different MSRs for the same functionality). - -As such a run-time mechanism is required to whitelist the appropriate MSR -values. - -[ tglx: Made the variable __ro_after_init ] - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 7dd16f4..b92c469 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -34,6 +34,12 @@ static void __init ssb_select_mitigation(void); - */ - static u64 __ro_after_init x86_spec_ctrl_base; - -+/* -+ * The vendor and possibly platform specific bits which can be modified in -+ * x86_spec_ctrl_base. -+ */ -+static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; -+ - void __init check_bugs(void) - { - identify_boot_cpu(); -@@ -116,7 +122,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; - - void x86_spec_ctrl_set(u64 val) - { -- if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) -+ if (val & x86_spec_ctrl_mask) - WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); - else - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); -@@ -458,6 +464,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - x86_spec_ctrl_base |= SPEC_CTRL_RDS; -+ x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; - x86_spec_ctrl_set(SPEC_CTRL_RDS); - break; - case X86_VENDOR_AMD: -@@ -481,7 +488,7 @@ static void ssb_select_mitigation() - void x86_spec_ctrl_setup_ap(void) - { - if (boot_cpu_has(X86_FEATURE_IBRS)) -- x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); -+ x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); - } - - #ifdef CONFIG_SYSFS --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch deleted file mode 100644 index c58bff80..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch +++ /dev/null @@ -1,100 +0,0 @@ -From aa9e88541e4443ffd498e0dd1912b2e658a659e6 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:54 -0800 -Subject: [PATCH 31/42] x86/get_user: Use pointer masking to limit speculation - -(cherry picked from commit c7f631cb07e7da06ac1d231ca178452339e32a94) - -Quoting Linus: - - I do think that it would be a good idea to very expressly document - the fact that it's not that the user access itself is unsafe. I do - agree that things like "get_user()" want to be protected, but not - because of any direct bugs or problems with get_user() and friends, - but simply because get_user() is an excellent source of a pointer - that is obviously controlled from a potentially attacking user - space. So it's a prime candidate for then finding _subsequent_ - accesses that can then be used to perturb the cache. - -Unlike the __get_user() case get_user() includes the address limit check -near the pointer de-reference. With that locality the speculation can be -mitigated with pointer narrowing rather than a barrier, i.e. -array_index_nospec(). Where the narrowing is performed by: - - cmp %limit, %ptr - sbb %mask, %mask - and %mask, %ptr - -With respect to speculation the value of %ptr is either less than %limit -or NULL. - -Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: Kees Cook <keescook@chromium.org> -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: Al Viro <viro@zeniv.linux.org.uk> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: torvalds@linux-foundation.org -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727417469.33451.11804043010080838495.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/lib/getuser.S | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S -index 37b62d4..b12b214 100644 ---- a/arch/x86/lib/getuser.S -+++ b/arch/x86/lib/getuser.S -@@ -39,6 +39,8 @@ ENTRY(__get_user_1) - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ -+ and %_ASM_DX, %_ASM_AX - ASM_STAC - 1: movzbl (%_ASM_AX),%edx - xor %eax,%eax -@@ -53,6 +55,8 @@ ENTRY(__get_user_2) - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ -+ and %_ASM_DX, %_ASM_AX - ASM_STAC - 2: movzwl -1(%_ASM_AX),%edx - xor %eax,%eax -@@ -67,6 +71,8 @@ ENTRY(__get_user_4) - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ -+ and %_ASM_DX, %_ASM_AX - ASM_STAC - 3: movl -3(%_ASM_AX),%edx - xor %eax,%eax -@@ -82,6 +88,8 @@ ENTRY(__get_user_8) - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ -+ and %_ASM_DX, %_ASM_AX - ASM_STAC - 4: movq -7(%_ASM_AX),%rdx - xor %eax,%eax -@@ -93,6 +101,8 @@ ENTRY(__get_user_8) - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user_8 -+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ -+ and %_ASM_DX, %_ASM_AX - ASM_STAC - 4: movl -7(%_ASM_AX),%edx - 5: movl -3(%_ASM_AX),%ecx --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch deleted file mode 100644 index 4f0b4222..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 0ebeae5f6b25b48c0559950e2b7c2f0a1ffd641c Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Thu, 22 Feb 2018 16:43:17 +0100 -Subject: [PATCH 32/33] KVM/x86: Remove indirect MSR op calls from SPEC_CTRL -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit ecb586bd29c99fb4de599dec388658e74388daad upstream. - -Having a paravirt indirect call in the IBRS restore path is not a -good idea, since we are trying to protect from speculative execution -of bogus indirect branch targets. It is also slower, so use -native_wrmsrl() on the vmentry path too. - -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Reviewed-by: Jim Mattson <jmattson@google.com> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: KarimAllah Ahmed <karahmed@amazon.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: kvm@vger.kernel.org -Cc: stable@vger.kernel.org -Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d -Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonzini@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/svm.c | 7 ++++--- - arch/x86/kvm/vmx.c | 7 ++++--- - 2 files changed, 8 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 4a36977..8d33396 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -44,6 +44,7 @@ - #include <asm/debugreg.h> - #include <asm/kvm_para.h> - #include <asm/irq_remapping.h> -+#include <asm/microcode.h> - #include <asm/nospec-branch.h> - - #include <asm/virtext.h> -@@ -4907,7 +4908,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - * being speculatively taken. - */ - if (svm->spec_ctrl) -- wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); -+ native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); - - asm volatile ( - "push %%" _ASM_BP "; \n\t" -@@ -5017,10 +5018,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - * save it. - */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -- rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); -+ svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - if (svm->spec_ctrl) -- wrmsrl(MSR_IA32_SPEC_CTRL, 0); -+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index f6c0568..aa2684a 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -49,6 +49,7 @@ - #include <asm/kexec.h> - #include <asm/apic.h> - #include <asm/irq_remapping.h> -+#include <asm/microcode.h> - #include <asm/nospec-branch.h> - - #include "trace.h" -@@ -8888,7 +8889,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - * being speculatively taken. - */ - if (vmx->spec_ctrl) -- wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); -+ native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); - - vmx->__launched = vmx->loaded_vmcs->launched; - asm( -@@ -9024,10 +9025,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - * save it. - */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -- rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); -+ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - if (vmx->spec_ctrl) -- wrmsrl(MSR_IA32_SPEC_CTRL, 0); -+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch deleted file mode 100644 index 7cc0ba33..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 3b5d03bec6defac520ec0d6c5620f0c44392df39 Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sat, 4 Nov 2017 18:23:24 -0700 -Subject: [PATCH 032/103] kaiser: asm/tlbflush.h handle noPGE at lower level - -I found asm/tlbflush.h too twisty, and think it safer not to avoid -__native_flush_tlb_global_irq_disabled() in the kaiser_enabled case, -but instead let it handle kaiser_enabled along with cr3: it can just -use __native_flush_tlb() for that, no harm in re-disabling preemption. - -(This is not the same change as Kirill and Dave have suggested for -upstream, flipping PGE in cr4: that's neat, but needs a cpu_has_pge -check; cr3 is enough for kaiser, and thought to be cheaper than cr4.) - -Also delete the X86_FEATURE_INVPCID invpcid_flush_all_nonglobals() -preference from __native_flush_tlb(): unlike the invpcid_flush_all() -preference in __native_flush_tlb_global(), it's not seen in upstream -4.14, and was recently reported to be surprisingly slow. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/tlbflush.h | 27 +++------------------------ - 1 file changed, 3 insertions(+), 24 deletions(-) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 13a74f6..bc6f979 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -153,14 +153,6 @@ static inline void kaiser_flush_tlb_on_return_to_user(void) - - static inline void __native_flush_tlb(void) - { -- if (this_cpu_has(X86_FEATURE_INVPCID)) { -- /* -- * Note, this works with CR4.PCIDE=0 or 1. -- */ -- invpcid_flush_all_nonglobals(); -- return; -- } -- - /* - * If current->mm == NULL then we borrow a mm which may change during a - * task switch and therefore we must not be preempted while we write CR3 -@@ -184,11 +176,8 @@ static inline void __native_flush_tlb_global_irq_disabled(void) - /* restore PGE as it was before */ - native_write_cr4(cr4); - } else { -- /* -- * x86_64 microcode update comes this way when CR4.PGE is not -- * enabled, and it's safer for all callers to allow this case. -- */ -- native_write_cr3(native_read_cr3()); -+ /* do it with cr3, letting kaiser flush user PCID */ -+ __native_flush_tlb(); - } - } - -@@ -196,12 +185,6 @@ static inline void __native_flush_tlb_global(void) - { - unsigned long flags; - -- if (kaiser_enabled) { -- /* Globals are not used at all */ -- __native_flush_tlb(); -- return; -- } -- - if (this_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Using INVPCID is considerably faster than a pair of writes -@@ -257,11 +240,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) - - static inline void __flush_tlb_all(void) - { -- if (boot_cpu_has(X86_FEATURE_PGE)) -- __flush_tlb_global(); -- else -- __flush_tlb(); -- -+ __flush_tlb_global(); - /* - * Note: if we somehow had PCID but not PGE, then this wouldn't work -- - * we'd end up flushing kernel translations for the current ASID but --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch deleted file mode 100644 index a79d655d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch +++ /dev/null @@ -1,200 +0,0 @@ -From 5066a8fdb740b1c31a315ea7da3a58c8208b15eb Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Sun, 20 May 2018 20:52:05 +0100 -Subject: [PATCH 32/93] x86/bugs/AMD: Add support to disable RDS on - Fam[15,16,17]h if requested - -commit 764f3c21588a059cd783c6ba0734d4db2d72822d upstream - -AMD does not need the Speculative Store Bypass mitigation to be enabled. - -The parameters for this are already available and can be done via MSR -C001_1020. Each family uses a different bit in that MSR for this. - -[ tglx: Expose the bit mask via a variable and move the actual MSR fiddling - into the bugs code as that's the right thing to do and also required - to prepare for dynamic enable/disable ] - -Suggested-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/nospec-branch.h | 4 ++++ - arch/x86/kernel/cpu/amd.c | 26 ++++++++++++++++++++++++++ - arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++++++++++++- - arch/x86/kernel/cpu/common.c | 4 ++++ - 5 files changed, 61 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 013f3de..8797069 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -205,6 +205,7 @@ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ - #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ - #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -+#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 7b9eacf..3a1541c 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -244,6 +244,10 @@ enum ssb_mitigation { - SPEC_STORE_BYPASS_DISABLE, - }; - -+/* AMD specific Speculative Store Bypass MSR data */ -+extern u64 x86_amd_ls_cfg_base; -+extern u64 x86_amd_ls_cfg_rds_mask; -+ - extern char __indirect_thunk_start[]; - extern char __indirect_thunk_end[]; - -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 747f8a2..7551d9ad 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -9,6 +9,7 @@ - #include <asm/processor.h> - #include <asm/apic.h> - #include <asm/cpu.h> -+#include <asm/nospec-branch.h> - #include <asm/smp.h> - #include <asm/pci-direct.h> - #include <asm/delay.h> -@@ -542,6 +543,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) - rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; - } -+ -+ if (c->x86 >= 0x15 && c->x86 <= 0x17) { -+ unsigned int bit; -+ -+ switch (c->x86) { -+ case 0x15: bit = 54; break; -+ case 0x16: bit = 33; break; -+ case 0x17: bit = 10; break; -+ default: return; -+ } -+ /* -+ * Try to cache the base value so further operations can -+ * avoid RMW. If that faults, do not enable RDS. -+ */ -+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { -+ setup_force_cpu_cap(X86_FEATURE_RDS); -+ setup_force_cpu_cap(X86_FEATURE_AMD_RDS); -+ x86_amd_ls_cfg_rds_mask = 1ULL << bit; -+ } -+ } - } - - static void early_init_amd(struct cpuinfo_x86 *c) -@@ -827,6 +848,11 @@ static void init_amd(struct cpuinfo_x86 *c) - /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ - if (!cpu_has(c, X86_FEATURE_XENPV)) - set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); -+ -+ if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { -+ set_cpu_cap(c, X86_FEATURE_RDS); -+ set_cpu_cap(c, X86_FEATURE_AMD_RDS); -+ } - } - - #ifdef CONFIG_X86_32 -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index b92c469..b3696cc 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -40,6 +40,13 @@ static u64 __ro_after_init x86_spec_ctrl_base; - */ - static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; - -+/* -+ * AMD specific MSR info for Speculative Store Bypass control. -+ * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). -+ */ -+u64 __ro_after_init x86_amd_ls_cfg_base; -+u64 __ro_after_init x86_amd_ls_cfg_rds_mask; -+ - void __init check_bugs(void) - { - identify_boot_cpu(); -@@ -51,7 +58,8 @@ void __init check_bugs(void) - - /* - * Read the SPEC_CTRL MSR to account for reserved bits which may -- * have unknown values. -+ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD -+ * init code as it is not enumerated and depends on the family. - */ - if (boot_cpu_has(X86_FEATURE_IBRS)) - rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -@@ -153,6 +161,14 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); - -+static void x86_amd_rds_enable(void) -+{ -+ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; -+ -+ if (boot_cpu_has(X86_FEATURE_AMD_RDS)) -+ wrmsrl(MSR_AMD64_LS_CFG, msrval); -+} -+ - #ifdef RETPOLINE - static bool spectre_v2_bad_module; - -@@ -442,6 +458,11 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - - switch (cmd) { - case SPEC_STORE_BYPASS_CMD_AUTO: -+ /* -+ * AMD platforms by default don't need SSB mitigation. -+ */ -+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) -+ break; - case SPEC_STORE_BYPASS_CMD_ON: - mode = SPEC_STORE_BYPASS_DISABLE; - break; -@@ -468,6 +489,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - x86_spec_ctrl_set(SPEC_CTRL_RDS); - break; - case X86_VENDOR_AMD: -+ x86_amd_rds_enable(); - break; - } - } -@@ -489,6 +511,9 @@ void x86_spec_ctrl_setup_ap(void) - { - if (boot_cpu_has(X86_FEATURE_IBRS)) - x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); -+ -+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) -+ x86_amd_rds_enable(); - } - - #ifdef CONFIG_SYSFS -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index ab6b3ad..beb1da8 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -895,6 +895,10 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_CENTAUR, 5, }, - { X86_VENDOR_INTEL, 5, }, - { X86_VENDOR_NSC, 5, }, -+ { X86_VENDOR_AMD, 0x12, }, -+ { X86_VENDOR_AMD, 0x11, }, -+ { X86_VENDOR_AMD, 0x10, }, -+ { X86_VENDOR_AMD, 0xf, }, - { X86_VENDOR_ANY, 4, }, - {} - }; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch deleted file mode 100644 index 8acd9616..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 0c89c81045ecacb413a4cd61ec5187f7aa688074 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:02:59 -0800 -Subject: [PATCH 32/42] x86/syscall: Sanitize syscall table de-references under - speculation - -(cherry picked from commit 2fbd7af5af8665d18bcefae3e9700be07e22b681) - -The syscall table base is a user controlled function pointer in kernel -space. Use array_index_nospec() to prevent any out of bounds speculation. - -While retpoline prevents speculating into a userspace directed target it -does not stop the pointer de-reference, the concern is leaking memory -relative to the syscall table base, by observing instruction cache -behavior. - -Reported-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: Andy Lutomirski <luto@kernel.org> -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727417984.33451.1216731042505722161.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/common.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c -index bd1d102..b0cd306 100644 ---- a/arch/x86/entry/common.c -+++ b/arch/x86/entry/common.c -@@ -20,6 +20,7 @@ - #include <linux/export.h> - #include <linux/context_tracking.h> - #include <linux/user-return-notifier.h> -+#include <linux/nospec.h> - #include <linux/uprobes.h> - - #include <asm/desc.h> -@@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs) - * regs->orig_ax, which changes the behavior of some syscalls. - */ - if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { -- regs->ax = sys_call_table[nr & __SYSCALL_MASK]( -+ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); -+ regs->ax = sys_call_table[nr]( - regs->di, regs->si, regs->dx, - regs->r10, regs->r8, regs->r9); - } -@@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) - } - - if (likely(nr < IA32_NR_syscalls)) { -+ nr = array_index_nospec(nr, IA32_NR_syscalls); - /* - * It's possible that a 32-bit syscall implementation - * takes a 64-bit parameter but nonetheless assumes that --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch deleted file mode 100644 index 95086730..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 885a241a441e144391884136534657f8502b2a48 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Thu, 22 Feb 2018 16:43:18 +0100 -Subject: [PATCH 33/33] KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by - marking the RDMSR path as unlikely() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 946fbbc13dce68902f64515b610eeb2a6c3d7a64 upstream. - -vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving -branch hints to the compiler can actually make a substantial cycle -difference by keeping the fast path contiguous in memory. - -With this optimization, the retpoline-guest/retpoline-host case is -about 50 cycles faster. - -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Reviewed-by: Jim Mattson <jmattson@google.com> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: KarimAllah Ahmed <karahmed@amazon.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: kvm@vger.kernel.org -Cc: stable@vger.kernel.org -Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonzini@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/svm.c | 2 +- - arch/x86/kvm/vmx.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 8d33396..b82bb66 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -5017,7 +5017,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ -- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -+ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - if (svm->spec_ctrl) -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index aa2684a..3c3558b 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -9024,7 +9024,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ -- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -+ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - if (vmx->spec_ctrl) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch deleted file mode 100644 index b7c611b2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch +++ /dev/null @@ -1,93 +0,0 @@ -From bb144d27d0915eb3370d1d640d598ac047ce960e Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sat, 4 Nov 2017 18:43:06 -0700 -Subject: [PATCH 033/103] kaiser: kaiser_flush_tlb_on_return_to_user() check - PCID - -Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID -check, instead of each caller doing it inline first: nobody needs -to optimize for the noPCID case, it's clearer this way, and better -suits later changes. Replace those no-op X86_CR3_PCID_KERN_FLUSH lines -by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes. - -Signed-off-by: Hugh Dickins <hughd@google.com> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/tlbflush.h | 4 ++-- - arch/x86/mm/kaiser.c | 6 +++--- - arch/x86/mm/tlb.c | 8 ++++---- - 3 files changed, 9 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index bc6f979..8db339a 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -159,7 +159,7 @@ static inline void __native_flush_tlb(void) - * back: - */ - preempt_disable(); -- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) -+ if (kaiser_enabled) - kaiser_flush_tlb_on_return_to_user(); - native_write_cr3(native_read_cr3()); - preempt_enable(); -@@ -218,7 +218,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) - */ - - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { -- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) -+ if (kaiser_enabled) - kaiser_flush_tlb_on_return_to_user(); - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - return; -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 65ac3fd..8600663 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -435,12 +435,12 @@ void kaiser_setup_pcid(void) - - /* - * Make a note that this cpu will need to flush USER tlb on return to user. -- * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling: -- * if cpu does not, then the NOFLUSH bit will never have been set. -+ * If cpu does not have PCID, then the NOFLUSH bit will never have been set. - */ - void kaiser_flush_tlb_on_return_to_user(void) - { -- this_cpu_write(x86_cr3_pcid_user, -+ if (this_cpu_has(X86_FEATURE_PCID)) -+ this_cpu_write(x86_cr3_pcid_user, - X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); - } - EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index fde44bb..e81f8bb 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -41,7 +41,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) - { - unsigned long new_mm_cr3 = __pa(pgdir); - -- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { -+ if (kaiser_enabled) { - /* - * We reuse the same PCID for different tasks, so we must - * flush all the entries for the PCID out when we change tasks. -@@ -52,10 +52,10 @@ static void load_new_mm_cr3(pgd_t *pgdir) - * do it here, but can only be used if X86_FEATURE_INVPCID is - * available - and many machines support pcid without invpcid. - * -- * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0; -- * but keep that line in there in case something changes. -+ * If X86_CR3_PCID_KERN_FLUSH actually added something, then it -+ * would be needed in the write_cr3() below - if PCIDs enabled. - */ -- new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; -+ BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH); - kaiser_flush_tlb_on_return_to_user(); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch deleted file mode 100644 index d9334b8e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch +++ /dev/null @@ -1,57 +0,0 @@ -From daf0f36d9103ecacecf426f868c8608e7e3edd95 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:03:05 -0800 -Subject: [PATCH 33/42] vfs, fdtable: Prevent bounds-check bypass via - speculative execution - -(cherry picked from commit 56c30ba7b348b90484969054d561f711ba196507) - -'fd' is a user controlled value that is used as a data dependency to -read from the 'fdt->fd' array. In order to avoid potential leaks of -kernel memory values, block speculative execution of the instruction -stream that could issue reads based on an invalid 'file *' returned from -__fcheck_files. - -Co-developed-by: Elena Reshetova <elena.reshetova@intel.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: Al Viro <viro@zeniv.linux.org.uk> -Cc: torvalds@linux-foundation.org -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/fdtable.h | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h -index 6e84b2cae..442b54a 100644 ---- a/include/linux/fdtable.h -+++ b/include/linux/fdtable.h -@@ -9,6 +9,7 @@ - #include <linux/compiler.h> - #include <linux/spinlock.h> - #include <linux/rcupdate.h> -+#include <linux/nospec.h> - #include <linux/types.h> - #include <linux/init.h> - #include <linux/fs.h> -@@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i - { - struct fdtable *fdt = rcu_dereference_raw(files->fdt); - -- if (fd < fdt->max_fds) -+ if (fd < fdt->max_fds) { -+ fd = array_index_nospec(fd, fdt->max_fds); - return rcu_dereference_raw(fdt->fd[fd]); -+ } - return NULL; - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch deleted file mode 100644 index 743d2a90..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 8f8f17abbbabcff7ebf353b62bbcfb414f83d77e Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 25 Apr 2018 22:04:25 -0400 -Subject: [PATCH 33/93] x86/KVM/VMX: Expose SPEC_CTRL Bit(2) to the guest -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit da39556f66f5cfe8f9c989206974f1cb16ca5d7c upstream - -Expose the CPUID.7.EDX[31] bit to the guest, and also guard against various -combinations of SPEC_CTRL MSR values. - -The handling of the MSR (to take into account the host value of SPEC_CTRL -Bit(2)) is taken care of in patch: - - KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> - -[dwmw2: Handle 4.9 guest CPUID differences, rename - guest_cpu_has_ibrs() → guest_cpu_has_spec_ctrl()] -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/cpuid.c | 2 +- - arch/x86/kvm/cpuid.h | 4 ++-- - arch/x86/kvm/svm.c | 4 ++-- - arch/x86/kvm/vmx.c | 6 +++--- - 4 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index bcebe84..237e926 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - - /* cpuid 7.0.edx*/ - const u32 kvm_cpuid_7_0_edx_x86_features = -- F(SPEC_CTRL) | F(ARCH_CAPABILITIES); -+ F(SPEC_CTRL) | F(RDS) | F(ARCH_CAPABILITIES); - - /* all calls to cpuid_count() should be made on the same cpu */ - get_cpu(); -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index 841e80d..39dd457 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -163,7 +163,7 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) - return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); - } - --static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) -+static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) - { - struct kvm_cpuid_entry2 *best; - -@@ -171,7 +171,7 @@ static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) - if (best && (best->ebx & bit(X86_FEATURE_IBRS))) - return true; - best = kvm_find_cpuid_entry(vcpu, 7, 0); -- return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); -+ return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_RDS))); - } - - static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index a07579f..43736dd 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -3540,7 +3540,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && -- !guest_cpuid_has_ibrs(vcpu)) -+ !guest_cpuid_has_spec_ctrl(vcpu)) - return 1; - - msr_info->data = svm->spec_ctrl; -@@ -3631,7 +3631,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) - break; - case MSR_IA32_SPEC_CTRL: - if (!msr->host_initiated && -- !guest_cpuid_has_ibrs(vcpu)) -+ !guest_cpuid_has_spec_ctrl(vcpu)) - return 1; - - /* The STIBP bit doesn't fault even if it's not advertised */ -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index c386d13..3210add 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -3017,7 +3017,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && -- !guest_cpuid_has_ibrs(vcpu)) -+ !guest_cpuid_has_spec_ctrl(vcpu)) - return 1; - - msr_info->data = to_vmx(vcpu)->spec_ctrl; -@@ -3129,11 +3129,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && -- !guest_cpuid_has_ibrs(vcpu)) -+ !guest_cpuid_has_spec_ctrl(vcpu)) - return 1; - - /* The STIBP bit doesn't fault even if it's not advertised */ -- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) -+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) - return 1; - - vmx->spec_ctrl = data; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-paravirt-Dont-patch-flush_tlb_single.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-paravirt-Dont-patch-flush_tlb_single.patch deleted file mode 100644 index df356798..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-paravirt-Dont-patch-flush_tlb_single.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 476529dd595a39d26d9f3c7f1a6526a2fc1bb49f Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 4 Dec 2017 15:07:30 +0100 -Subject: [PATCH 034/103] x86/paravirt: Dont patch flush_tlb_single - -commit a035795499ca1c2bd1928808d1a156eda1420383 upstream - -native_flush_tlb_single() will be changed with the upcoming -PAGE_TABLE_ISOLATION feature. This requires to have more code in -there than INVLPG. - -Remove the paravirt patching for it. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> -Reviewed-by: Juergen Gross <jgross@suse.com> -Acked-by: Peter Zijlstra <peterz@infradead.org> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Borislav Petkov <bpetkov@suse.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Laight <David.Laight@aculab.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: Eduardo Valentin <eduval@amazon.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Will Deacon <will.deacon@arm.com> -Cc: aliguori@amazon.com -Cc: daniel.gruss@iaik.tugraz.at -Cc: hughd@google.com -Cc: keescook@google.com -Cc: linux-mm@kvack.org -Cc: michael.schwarz@iaik.tugraz.at -Cc: moritz.lipp@iaik.tugraz.at -Cc: richard.fellner@student.tugraz.at -Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Acked-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/paravirt_patch_64.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c -index bb3840c..ee43b36 100644 ---- a/arch/x86/kernel/paravirt_patch_64.c -+++ b/arch/x86/kernel/paravirt_patch_64.c -@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); - DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); - DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); - DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); --DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); - DEF_NATIVE(pv_cpu_ops, clts, "clts"); - DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); - -@@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - PATCH_SITE(pv_mmu_ops, read_cr3); - PATCH_SITE(pv_mmu_ops, write_cr3); - PATCH_SITE(pv_cpu_ops, clts); -- PATCH_SITE(pv_mmu_ops, flush_tlb_single); - PATCH_SITE(pv_cpu_ops, wbinvd); - #if defined(CONFIG_PARAVIRT_SPINLOCKS) - case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch deleted file mode 100644 index 8703f68f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 793cff3e2e196a3287441de5c10c969d031ae64c Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Mon, 29 Jan 2018 17:03:21 -0800 -Subject: [PATCH 34/42] x86/spectre: Report get_user mitigation for spectre_v1 - -(cherry picked from commit edfbae53dab8348fca778531be9f4855d2ca0360) - -Reflect the presence of get_user(), __get_user(), and 'syscall' protections -in sysfs. The expectation is that new and better tooling will allow the -kernel to grow more usages of array_index_nospec(), for now, only claim -mitigation for __user pointer de-references. - -Reported-by: Jiri Slaby <jslaby@suse.cz> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: linux-arch@vger.kernel.org -Cc: kernel-hardening@lists.openwall.com -Cc: gregkh@linuxfoundation.org -Cc: torvalds@linux-foundation.org -Cc: alan@linux.intel.com -Link: https://lkml.kernel.org/r/151727420158.33451.11658324346540434635.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 51624c6..d4658e0 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -296,7 +296,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, - { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); -- return sprintf(buf, "Vulnerable\n"); -+ return sprintf(buf, "Mitigation: __user pointer sanitization\n"); - } - - ssize_t cpu_show_spectre_v2(struct device *dev, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch deleted file mode 100644 index 36224a56..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch +++ /dev/null @@ -1,141 +0,0 @@ -From b8380a76b18fa5522368b50c284530fc6e1b1992 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 29 Apr 2018 15:01:37 +0200 -Subject: [PATCH 34/93] x86/speculation: Create spec-ctrl.h to avoid include - hell - -commit 28a2775217b17208811fa43a9e96bd1fdf417b86 upstream - -Having everything in nospec-branch.h creates a hell of dependencies when -adding the prctl based switching mechanism. Move everything which is not -required in nospec-branch.h to spec-ctrl.h and fix up the includes in the -relevant files. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 14 -------------- - arch/x86/include/asm/spec-ctrl.h | 21 +++++++++++++++++++++ - arch/x86/kernel/cpu/amd.c | 2 +- - arch/x86/kernel/cpu/bugs.c | 2 +- - arch/x86/kvm/svm.c | 2 +- - arch/x86/kvm/vmx.c | 2 +- - 6 files changed, 25 insertions(+), 18 deletions(-) - create mode 100644 arch/x86/include/asm/spec-ctrl.h - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 3a1541c..1119f14 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -228,26 +228,12 @@ enum spectre_v2_mitigation { - extern void x86_spec_ctrl_set(u64); - extern u64 x86_spec_ctrl_get_default(void); - --/* -- * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR -- * the guest has, while on VMEXIT we restore the host view. This -- * would be easier if SPEC_CTRL were architecturally maskable or -- * shadowable for guests but this is not (currently) the case. -- * Takes the guest view of SPEC_CTRL MSR as a parameter. -- */ --extern void x86_spec_ctrl_set_guest(u64); --extern void x86_spec_ctrl_restore_host(u64); -- - /* The Speculative Store Bypass disable variants */ - enum ssb_mitigation { - SPEC_STORE_BYPASS_NONE, - SPEC_STORE_BYPASS_DISABLE, - }; - --/* AMD specific Speculative Store Bypass MSR data */ --extern u64 x86_amd_ls_cfg_base; --extern u64 x86_amd_ls_cfg_rds_mask; -- - extern char __indirect_thunk_start[]; - extern char __indirect_thunk_end[]; - -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -new file mode 100644 -index 0000000..3ad6442 ---- /dev/null -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _ASM_X86_SPECCTRL_H_ -+#define _ASM_X86_SPECCTRL_H_ -+ -+#include <asm/nospec-branch.h> -+ -+/* -+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR -+ * the guest has, while on VMEXIT we restore the host view. This -+ * would be easier if SPEC_CTRL were architecturally maskable or -+ * shadowable for guests but this is not (currently) the case. -+ * Takes the guest view of SPEC_CTRL MSR as a parameter. -+ */ -+extern void x86_spec_ctrl_set_guest(u64); -+extern void x86_spec_ctrl_restore_host(u64); -+ -+/* AMD specific Speculative Store Bypass MSR data */ -+extern u64 x86_amd_ls_cfg_base; -+extern u64 x86_amd_ls_cfg_rds_mask; -+ -+#endif -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 7551d9ad..a176c81 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -9,7 +9,7 @@ - #include <asm/processor.h> - #include <asm/apic.h> - #include <asm/cpu.h> --#include <asm/nospec-branch.h> -+#include <asm/spec-ctrl.h> - #include <asm/smp.h> - #include <asm/pci-direct.h> - #include <asm/delay.h> -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index b3696cc..46d01fd 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -12,7 +12,7 @@ - #include <linux/cpu.h> - #include <linux/module.h> - --#include <asm/nospec-branch.h> -+#include <asm/spec-ctrl.h> - #include <asm/cmdline.h> - #include <asm/bugs.h> - #include <asm/processor.h> -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 43736dd..47779f5 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -45,7 +45,7 @@ - #include <asm/kvm_para.h> - #include <asm/irq_remapping.h> - #include <asm/microcode.h> --#include <asm/nospec-branch.h> -+#include <asm/spec-ctrl.h> - - #include <asm/virtext.h> - #include "trace.h" -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 3210add..17199dc 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -50,7 +50,7 @@ - #include <asm/apic.h> - #include <asm/irq_remapping.h> - #include <asm/microcode.h> --#include <asm/nospec-branch.h> -+#include <asm/spec-ctrl.h> - - #include "trace.h" - #include "pmu.h" --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-kaiser-Reenable-PARAVIRT.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-kaiser-Reenable-PARAVIRT.patch deleted file mode 100644 index 0e546ac2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-kaiser-Reenable-PARAVIRT.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 293cec66a45710c094145c83df18808e43f6dd35 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Tue, 2 Jan 2018 14:19:49 +0100 -Subject: [PATCH 035/103] x86/kaiser: Reenable PARAVIRT - -Now that the required bits have been addressed, reenable -PARAVIRT. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - security/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/security/Kconfig b/security/Kconfig -index d8ae933..fd2ceeb 100644 ---- a/security/Kconfig -+++ b/security/Kconfig -@@ -34,7 +34,7 @@ config SECURITY - config KAISER - bool "Remove the kernel mapping in user mode" - default y -- depends on X86_64 && SMP && !PARAVIRT -+ depends on X86_64 && SMP - help - This enforces a strict kernel and user space isolation, in order - to close hardware side channels on kernel address information. --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch deleted file mode 100644 index bcbf8f92..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch +++ /dev/null @@ -1,125 +0,0 @@ -From ac5c35e60743b4260df777cc4ac1e877c2999b1d Mon Sep 17 00:00:00 2001 -From: Kyle Huey <me@kylehuey.com> -Date: Tue, 14 Feb 2017 00:11:02 -0800 -Subject: [PATCH 35/93] x86/process: Optimize TIF checks in __switch_to_xtra() - -commit af8b3cd3934ec60f4c2a420d19a9d416554f140b upstream - -Help the compiler to avoid reevaluating the thread flags for each checked -bit by reordering the bit checks and providing an explicit xor for -evaluation. - -With default defconfigs for each arch, - -x86_64: arch/x86/kernel/process.o -text data bss dec hex -3056 8577 16 11649 2d81 Before -3024 8577 16 11617 2d61 After - -i386: arch/x86/kernel/process.o -text data bss dec hex -2957 8673 8 11638 2d76 Before -2925 8673 8 11606 2d56 After - -Originally-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Kyle Huey <khuey@kylehuey.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Andy Lutomirski <luto@kernel.org> -Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@kylehuey.com -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - -[dwmw2: backported to make TIF_RDS handling simpler. - No deferred TR reload.] -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/process.c | 54 +++++++++++++++++++++++++++-------------------- - 1 file changed, 31 insertions(+), 23 deletions(-) - -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index a55b320..0e1999e 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -192,48 +192,56 @@ int set_tsc_mode(unsigned int val) - return 0; - } - -+static inline void switch_to_bitmap(struct tss_struct *tss, -+ struct thread_struct *prev, -+ struct thread_struct *next, -+ unsigned long tifp, unsigned long tifn) -+{ -+ if (tifn & _TIF_IO_BITMAP) { -+ /* -+ * Copy the relevant range of the IO bitmap. -+ * Normally this is 128 bytes or less: -+ */ -+ memcpy(tss->io_bitmap, next->io_bitmap_ptr, -+ max(prev->io_bitmap_max, next->io_bitmap_max)); -+ } else if (tifp & _TIF_IO_BITMAP) { -+ /* -+ * Clear any possible leftover bits: -+ */ -+ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); -+ } -+} -+ - void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) - { - struct thread_struct *prev, *next; -+ unsigned long tifp, tifn; - - prev = &prev_p->thread; - next = &next_p->thread; - -- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ -- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { -+ tifn = READ_ONCE(task_thread_info(next_p)->flags); -+ tifp = READ_ONCE(task_thread_info(prev_p)->flags); -+ switch_to_bitmap(tss, prev, next, tifp, tifn); -+ -+ propagate_user_return_notify(prev_p, next_p); -+ -+ if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; -- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) -+ if (tifn & _TIF_BLOCKSTEP) - debugctl |= DEBUGCTLMSR_BTF; -- - update_debugctlmsr(debugctl); - } - -- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ -- test_tsk_thread_flag(next_p, TIF_NOTSC)) { -- /* prev and next are different */ -- if (test_tsk_thread_flag(next_p, TIF_NOTSC)) -+ if ((tifp ^ tifn) & _TIF_NOTSC) { -+ if (tifn & _TIF_NOTSC) - hard_disable_TSC(); - else - hard_enable_TSC(); - } -- -- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { -- /* -- * Copy the relevant range of the IO bitmap. -- * Normally this is 128 bytes or less: -- */ -- memcpy(tss->io_bitmap, next->io_bitmap_ptr, -- max(prev->io_bitmap_max, next->io_bitmap_max)); -- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { -- /* -- * Clear any possible leftover bits: -- */ -- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); -- } -- propagate_user_return_notify(prev_p, next_p); - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch deleted file mode 100644 index 6308fc6d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 5f49c69f0110c99880f0d85cf96e7cc60acd4987 Mon Sep 17 00:00:00 2001 -From: Colin Ian King <colin.king@canonical.com> -Date: Tue, 30 Jan 2018 19:32:18 +0000 -Subject: [PATCH 35/42] x86/spectre: Fix spelling mistake: "vunerable"-> - "vulnerable" - -(cherry picked from commit e698dcdfcda41efd0984de539767b4cddd235f1e) - -Trivial fix to spelling mistake in pr_err error message text. - -Signed-off-by: Colin Ian King <colin.king@canonical.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: kernel-janitors@vger.kernel.org -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Borislav Petkov <bp@suse.de> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Link: https://lkml.kernel.org/r/20180130193218.9271-1-colin.king@canonical.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index d4658e0..aec7daf 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -102,7 +102,7 @@ bool retpoline_module_ok(bool has_retpoline) - if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) - return true; - -- pr_err("System may be vunerable to spectre v2\n"); -+ pr_err("System may be vulnerable to spectre v2\n"); - spectre_v2_bad_module = true; - return false; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-kaiser-disabled-on-Xen-PV.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-kaiser-disabled-on-Xen-PV.patch deleted file mode 100644 index 7dcd8729..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-kaiser-disabled-on-Xen-PV.patch +++ /dev/null @@ -1,44 +0,0 @@ -From e935ed6506f0753343bfc8adfa6f96922737af28 Mon Sep 17 00:00:00 2001 -From: Jiri Kosina <jkosina@suse.cz> -Date: Tue, 2 Jan 2018 14:19:49 +0100 -Subject: [PATCH 036/103] kaiser: disabled on Xen PV - -Kaiser cannot be used on paravirtualized MMUs (namely reading and writing CR3). -This does not work with KAISER as the CR3 switch from and to user space PGD -would require to map the whole XEN_PV machinery into both. - -More importantly, enabling KAISER on Xen PV doesn't make too much sense, as PV -guests use distinct %cr3 values for kernel and user already. - -Signed-off-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 8600663..2768854 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -263,6 +263,9 @@ void __init kaiser_check_boottime_disable(void) - char arg[5]; - int ret; - -+ if (boot_cpu_has(X86_FEATURE_XENPV)) -+ goto silent_disable; -+ - ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); - if (ret > 0) { - if (!strncmp(arg, "on", 2)) -@@ -290,6 +293,8 @@ void __init kaiser_check_boottime_disable(void) - - disable: - pr_info("Kernel/User page tables isolation: disabled\n"); -+ -+silent_disable: - kaiser_enabled = 0; - setup_clear_cpu_cap(X86_FEATURE_KAISER); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch deleted file mode 100644 index 54039e5f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 230aaaad00ca4c1e2c350ce30188d03417a170fe Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Tue, 30 Jan 2018 14:30:23 +0000 -Subject: [PATCH 36/42] x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature - bits on Intel - -(cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b) - -Despite the fact that all the other code there seems to be doing it, just -using set_cpu_cap() in early_intel_init() doesn't actually work. - -For CPUs with PKU support, setup_pku() calls get_cpu_cap() after -c->c_init() has set those feature bits. That resets those bits back to what -was queried from the hardware. - -Turning the bits off for bad microcode is easy to fix. That can just use -setup_clear_cpu_cap() to force them off for all CPUs. - -I was less keen on forcing the feature bits *on* that way, just in case -of inconsistencies. I appreciate that the kernel is going to get this -utterly wrong if CPU features are not consistent, because it has already -applied alternatives by the time secondary CPUs are brought up. - -But at least if setup_force_cpu_cap() isn't being used, we might have a -chance of *detecting* the lack of the corresponding bit and either -panicking or refusing to bring the offending CPU online. - -So ensure that the appropriate feature bits are set within get_cpu_cap() -regardless of how many extra times it's called. - -Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags") -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: karahmed@amazon.de -Cc: peterz@infradead.org -Cc: bp@alien8.de -Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ - arch/x86/kernel/cpu/intel.c | 27 ++++++++------------------- - 2 files changed, 29 insertions(+), 19 deletions(-) - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index cfa026f..60e537d 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -718,6 +718,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) - } - } - -+static void init_speculation_control(struct cpuinfo_x86 *c) -+{ -+ /* -+ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, -+ * and they also have a different bit for STIBP support. Also, -+ * a hypervisor might have set the individual AMD bits even on -+ * Intel CPUs, for finer-grained selection of what's available. -+ * -+ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware -+ * features, which are visible in /proc/cpuinfo and used by the -+ * kernel. So set those accordingly from the Intel bits. -+ */ -+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { -+ set_cpu_cap(c, X86_FEATURE_IBRS); -+ set_cpu_cap(c, X86_FEATURE_IBPB); -+ } -+ if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) -+ set_cpu_cap(c, X86_FEATURE_STIBP); -+} -+ - void get_cpu_cap(struct cpuinfo_x86 *c) - { - u32 eax, ebx, ecx, edx; -@@ -812,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) - c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); - - init_scattered_cpuid_features(c); -+ init_speculation_control(c); - } - - static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 2e257f8..4097b43 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -140,28 +140,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) - rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); - } - -- /* -- * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, -- * and they also have a different bit for STIBP support. Also, -- * a hypervisor might have set the individual AMD bits even on -- * Intel CPUs, for finer-grained selection of what's available. -- */ -- if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { -- set_cpu_cap(c, X86_FEATURE_IBRS); -- set_cpu_cap(c, X86_FEATURE_IBPB); -- } -- if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) -- set_cpu_cap(c, X86_FEATURE_STIBP); -- - /* Now if any of them are set, check the blacklist and clear the lot */ -- if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || -+ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || -+ cpu_has(c, X86_FEATURE_INTEL_STIBP) || -+ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || - cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { - pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); -- clear_cpu_cap(c, X86_FEATURE_IBRS); -- clear_cpu_cap(c, X86_FEATURE_IBPB); -- clear_cpu_cap(c, X86_FEATURE_STIBP); -- clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); -- clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); -+ setup_clear_cpu_cap(X86_FEATURE_IBRS); -+ setup_clear_cpu_cap(X86_FEATURE_IBPB); -+ setup_clear_cpu_cap(X86_FEATURE_STIBP); -+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); -+ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch deleted file mode 100644 index 9fd2ab23..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 19f795b97249d2e81ea918644577ab9669704c28 Mon Sep 17 00:00:00 2001 -From: Kyle Huey <me@kylehuey.com> -Date: Tue, 14 Feb 2017 00:11:03 -0800 -Subject: [PATCH 36/93] x86/process: Correct and optimize TIF_BLOCKSTEP switch - -commit b9894a2f5bd18b1691cb6872c9afe32b148d0132 upstream - -The debug control MSR is "highly magical" as the blockstep bit can be -cleared by hardware under not well documented circumstances. - -So a task switch relying on the bit set by the previous task (according to -the previous tasks thread flags) can trip over this and not update the flag -for the next task. - -To fix this its required to handle DEBUGCTLMSR_BTF when either the previous -or the next or both tasks have the TIF_BLOCKSTEP flag set. - -While at it avoid branching within the TIF_BLOCKSTEP case and evaluating -boot_cpu_data twice in kernels without CONFIG_X86_DEBUGCTLMSR. - -x86_64: arch/x86/kernel/process.o -text data bss dec hex -3024 8577 16 11617 2d61 Before -3008 8577 16 11601 2d51 After - -i386: No change - -[ tglx: Made the shift value explicit, use a local variable to make the -code readable and massaged changelog] - -Originally-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Kyle Huey <khuey@kylehuey.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Andy Lutomirski <luto@kernel.org> -Link: http://lkml.kernel.org/r/20170214081104.9244-3-khuey@kylehuey.com -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/msr-index.h | 1 + - arch/x86/kernel/process.c | 12 +++++++----- - 2 files changed, 8 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index 9f014c1..4027c33 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -141,6 +141,7 @@ - - /* DEBUGCTLMSR bits (others vary by model): */ - #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ -+#define DEBUGCTLMSR_BTF_SHIFT 1 - #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ - #define DEBUGCTLMSR_TR (1UL << 6) - #define DEBUGCTLMSR_BTS (1UL << 7) -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 0e1999e..496eef6 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -227,13 +227,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - - propagate_user_return_notify(prev_p, next_p); - -- if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { -- unsigned long debugctl = get_debugctlmsr(); -+ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && -+ arch_has_block_step()) { -+ unsigned long debugctl, msk; - -+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl &= ~DEBUGCTLMSR_BTF; -- if (tifn & _TIF_BLOCKSTEP) -- debugctl |= DEBUGCTLMSR_BTF; -- update_debugctlmsr(debugctl); -+ msk = tifn & _TIF_BLOCKSTEP; -+ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; -+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - } - - if ((tifp ^ tifn) & _TIF_NOTSC) { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-kaiser-Move-feature-detection-up.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-kaiser-Move-feature-detection-up.patch deleted file mode 100644 index 77e19632..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-kaiser-Move-feature-detection-up.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 4ce3d405a42c5799b762102a5f136159d5d7b5f8 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Mon, 25 Dec 2017 13:57:16 +0100 -Subject: [PATCH 037/103] x86/kaiser: Move feature detection up - -... before the first use of kaiser_enabled as otherwise funky -things happen: - - about to get started... - (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0000] - (XEN) Pagetable walk from ffff88022a449090: - (XEN) L4[0x110] = 0000000229e0e067 0000000000001e0e - (XEN) L3[0x008] = 0000000000000000 ffffffffffffffff - (XEN) domain_crash_sync called from entry.S: fault at ffff82d08033fd08 - entry.o#create_bounce_frame+0x135/0x14d - (XEN) Domain 0 (vcpu#0) crashed on cpu#0: - (XEN) ----[ Xen-4.9.1_02-3.21 x86_64 debug=n Not tainted ]---- - (XEN) CPU: 0 - (XEN) RIP: e033:[<ffffffff81007460>] - (XEN) RFLAGS: 0000000000000286 EM: 1 CONTEXT: pv guest (d0v0) - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/kaiser.h | 2 ++ - arch/x86/kernel/setup.c | 7 +++++++ - arch/x86/mm/kaiser.c | 2 -- - 3 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 906150d..b5e46aa 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -96,8 +96,10 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; - - extern int kaiser_enabled; -+extern void __init kaiser_check_boottime_disable(void); - #else - #define kaiser_enabled 0 -+static inline void __init kaiser_check_boottime_disable(void) {} - #endif /* CONFIG_KAISER */ - - /* -diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c -index 9c337b0..545a95a 100644 ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -114,6 +114,7 @@ - #include <asm/microcode.h> - #include <asm/mmu_context.h> - #include <asm/kaslr.h> -+#include <asm/kaiser.h> - - /* - * max_low_pfn_mapped: highest direct mapped pfn under 4GB -@@ -1019,6 +1020,12 @@ void __init setup_arch(char **cmdline_p) - */ - init_hypervisor_platform(); - -+ /* -+ * This needs to happen right after XENPV is set on xen and -+ * kaiser_enabled is checked below in cleanup_highmap(). -+ */ -+ kaiser_check_boottime_disable(); -+ - x86_init.resources.probe_roms(); - - /* after parse_early_param, so could debug it */ -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 2768854..d43f369 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -310,8 +310,6 @@ void __init kaiser_init(void) - { - int cpu; - -- kaiser_check_boottime_disable(); -- - if (!kaiser_enabled) - return; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-process-Optimize-TIF_NOTSC-switch.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-process-Optimize-TIF_NOTSC-switch.patch deleted file mode 100644 index e5a210ab..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-process-Optimize-TIF_NOTSC-switch.patch +++ /dev/null @@ -1,112 +0,0 @@ -From b72b69b9696975c9279441e4998ceca506280dec Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 14 Feb 2017 00:11:04 -0800 -Subject: [PATCH 37/93] x86/process: Optimize TIF_NOTSC switch - -commit 5a920155e388ec22a22e0532fb695b9215c9b34d upstream - -Provide and use a toggle helper instead of doing it with a branch. - -x86_64: arch/x86/kernel/process.o -text data bss dec hex -3008 8577 16 11601 2d51 Before -2976 8577 16 11569 2d31 After - -i386: arch/x86/kernel/process.o -text data bss dec hex -2925 8673 8 11606 2d56 Before -2893 8673 8 11574 2d36 After - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Andy Lutomirski <luto@kernel.org> -Link: http://lkml.kernel.org/r/20170214081104.9244-4-khuey@kylehuey.com -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/tlbflush.h | 10 ++++++++++ - arch/x86/kernel/process.c | 22 ++++------------------ - 2 files changed, 14 insertions(+), 18 deletions(-) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 99185a0..686a58d 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -111,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask) - } - } - -+static inline void cr4_toggle_bits(unsigned long mask) -+{ -+ unsigned long cr4; -+ -+ cr4 = this_cpu_read(cpu_tlbstate.cr4); -+ cr4 ^= mask; -+ this_cpu_write(cpu_tlbstate.cr4, cr4); -+ __write_cr4(cr4); -+} -+ - /* Read the CR4 shadow. */ - static inline unsigned long cr4_read_shadow(void) - { -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 496eef6..b7e3822 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -134,11 +134,6 @@ void flush_thread(void) - fpu__clear(&tsk->thread.fpu); - } - --static void hard_disable_TSC(void) --{ -- cr4_set_bits(X86_CR4_TSD); --} -- - void disable_TSC(void) - { - preempt_disable(); -@@ -147,15 +142,10 @@ void disable_TSC(void) - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ -- hard_disable_TSC(); -+ cr4_set_bits(X86_CR4_TSD); - preempt_enable(); - } - --static void hard_enable_TSC(void) --{ -- cr4_clear_bits(X86_CR4_TSD); --} -- - static void enable_TSC(void) - { - preempt_disable(); -@@ -164,7 +154,7 @@ static void enable_TSC(void) - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ -- hard_enable_TSC(); -+ cr4_clear_bits(X86_CR4_TSD); - preempt_enable(); - } - -@@ -238,12 +228,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - } - -- if ((tifp ^ tifn) & _TIF_NOTSC) { -- if (tifn & _TIF_NOTSC) -- hard_disable_TSC(); -- else -- hard_enable_TSC(); -- } -+ if ((tifp ^ tifn) & _TIF_NOTSC) -+ cr4_toggle_bits(X86_CR4_TSD); - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch deleted file mode 100644 index 846ec86f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 72e87893e6f14922dcd6231a7676bac67154dae8 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 1 Feb 2018 11:27:20 +0000 -Subject: [PATCH 37/42] x86/retpoline: Avoid retpolines for built-in __init - functions - -(cherry picked from commit 66f793099a636862a71c59d4a6ba91387b155e0c) - -There's no point in building init code with retpolines, since it runs before -any potentially hostile userspace does. And before the retpoline is actually -ALTERNATIVEd into place, for much of it. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: karahmed@amazon.de -Cc: peterz@infradead.org -Cc: bp@alien8.de -Link: https://lkml.kernel.org/r/1517484441-1420-2-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/init.h | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/include/linux/init.h b/include/linux/init.h -index e30104c..8e346d1 100644 ---- a/include/linux/init.h -+++ b/include/linux/init.h -@@ -4,6 +4,13 @@ - #include <linux/compiler.h> - #include <linux/types.h> - -+/* Built-in __init functions needn't be compiled with retpoline */ -+#if defined(RETPOLINE) && !defined(MODULE) -+#define __noretpoline __attribute__((indirect_branch("keep"))) -+#else -+#define __noretpoline -+#endif -+ - /* These macros are used to mark some functions or - * initialized data (doesn't apply to uninitialized data) - * as `initialization' functions. The kernel can take this -@@ -39,7 +46,7 @@ - - /* These are for everybody (although not all archs will actually - discard it in modules) */ --#define __init __section(.init.text) __cold notrace __latent_entropy -+#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline - #define __initdata __section(.init.data) - #define __initconst __section(.init.rodata) - #define __exitdata __section(.exit.data) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch deleted file mode 100644 index 23caec1c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch +++ /dev/null @@ -1,359 +0,0 @@ -From 5e40b997d86f563b8ebe8a17019fca81af241bfb Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Wed, 3 Jan 2018 10:17:35 -0800 -Subject: [PATCH 038/103] KPTI: Rename to PAGE_TABLE_ISOLATION - -This renames CONFIG_KAISER to CONFIG_PAGE_TABLE_ISOLATION. - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/boot/compressed/misc.h | 2 +- - arch/x86/entry/entry_64.S | 12 ++++++------ - arch/x86/events/intel/ds.c | 4 ++-- - arch/x86/include/asm/cpufeatures.h | 2 +- - arch/x86/include/asm/kaiser.h | 12 ++++++------ - arch/x86/include/asm/pgtable.h | 4 ++-- - arch/x86/include/asm/pgtable_64.h | 4 ++-- - arch/x86/include/asm/pgtable_types.h | 2 +- - arch/x86/include/asm/tlbflush.h | 2 +- - arch/x86/kernel/head_64.S | 2 +- - arch/x86/mm/Makefile | 2 +- - arch/x86/mm/kaslr.c | 2 +- - include/linux/kaiser.h | 6 +++--- - include/linux/percpu-defs.h | 2 +- - security/Kconfig | 2 +- - tools/arch/x86/include/asm/cpufeatures.h | 2 +- - 16 files changed, 31 insertions(+), 31 deletions(-) - -diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h -index cd80024..4f4c42a 100644 ---- a/arch/x86/boot/compressed/misc.h -+++ b/arch/x86/boot/compressed/misc.h -@@ -9,7 +9,7 @@ - */ - #undef CONFIG_PARAVIRT - #undef CONFIG_PARAVIRT_SPINLOCKS --#undef CONFIG_KAISER -+#undef CONFIG_PAGE_TABLE_ISOLATION - #undef CONFIG_KASAN - - #include <linux/linkage.h> -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index d4ba81e..5bb9b02 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1071,7 +1071,7 @@ ENTRY(paranoid_entry) - SWAPGS - xorl %ebx, %ebx - 1: --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * We might have come in between a swapgs and a SWITCH_KERNEL_CR3 - * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit. -@@ -1111,7 +1111,7 @@ ENTRY(paranoid_exit) - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF_DEBUG - TRACE_IRQS_IRETQ_DEBUG --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ - testl $2, %ebx /* SWITCH_USER_CR3 needed? */ - jz paranoid_exit_no_switch -@@ -1338,7 +1338,7 @@ ENTRY(nmi) - - movq %rsp, %rdi - movq $-1, %rsi --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ - ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER -@@ -1352,7 +1352,7 @@ ENTRY(nmi) - #endif - call do_nmi - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * Unconditionally restore CR3. I know we return to - * kernel code that needs user CR3, but do we ever return -@@ -1582,7 +1582,7 @@ end_repeat_nmi: - 1: - movq %rsp, %rdi - movq $-1, %rsi --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ - ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER -@@ -1598,7 +1598,7 @@ end_repeat_nmi: - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ - call do_nmi - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * Unconditionally restore CR3. We might be returning to - * kernel code that needs user CR3, like just just before -diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c -index c2e4ae2..f97d8b4 100644 ---- a/arch/x86/events/intel/ds.c -+++ b/arch/x86/events/intel/ds.c -@@ -274,7 +274,7 @@ static DEFINE_PER_CPU(void *, insn_buffer); - - static void *dsalloc(size_t size, gfp_t flags, int node) - { --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - unsigned int order = get_order(size); - struct page *page; - unsigned long addr; -@@ -295,7 +295,7 @@ static void *dsalloc(size_t size, gfp_t flags, int node) - - static void dsfree(const void *buffer, size_t size) - { --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - if (!buffer) - return; - kaiser_remove_mapping((unsigned long)buffer, size); -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 20271d6..454a37a 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -199,7 +199,7 @@ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ - - /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ --#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ -+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index b5e46aa..802bbbd 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -20,7 +20,7 @@ - #define KAISER_SHADOW_PGD_OFFSET 0x1000 - - #ifdef __ASSEMBLY__ --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg -@@ -69,7 +69,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - 8: - .endm - --#else /* CONFIG_KAISER */ -+#else /* CONFIG_PAGE_TABLE_ISOLATION */ - - .macro SWITCH_KERNEL_CR3 - .endm -@@ -78,11 +78,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax - .macro SWITCH_KERNEL_CR3_NO_STACK - .endm - --#endif /* CONFIG_KAISER */ -+#endif /* CONFIG_PAGE_TABLE_ISOLATION */ - - #else /* __ASSEMBLY__ */ - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * Upon kernel/user mode switch, it may happen that the address - * space has to be switched before the registers have been -@@ -100,10 +100,10 @@ extern void __init kaiser_check_boottime_disable(void); - #else - #define kaiser_enabled 0 - static inline void __init kaiser_check_boottime_disable(void) {} --#endif /* CONFIG_KAISER */ -+#endif /* CONFIG_PAGE_TABLE_ISOLATION */ - - /* -- * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, -+ * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set, - * so as to build with tests on kaiser_enabled instead of #ifdefs. - */ - -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index 217e83a..2536f90 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -18,7 +18,7 @@ - #ifndef __ASSEMBLY__ - #include <asm/x86_init.h> - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - extern int kaiser_enabled; - #else - #define kaiser_enabled 0 -@@ -920,7 +920,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, - static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) - { - memcpy(dst, src, count * sizeof(pgd_t)); --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - if (kaiser_enabled) { - /* Clone the shadow pgd part as well */ - memcpy(native_get_shadow_pgd(dst), -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index cf68b5c..ce97c8c6 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -106,7 +106,7 @@ static inline void native_pud_clear(pud_t *pud) - native_set_pud(pud, native_make_pud(0)); - } - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); - - static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) -@@ -127,7 +127,7 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) - BUILD_BUG_ON(1); - return NULL; - } --#endif /* CONFIG_KAISER */ -+#endif /* CONFIG_PAGE_TABLE_ISOLATION */ - - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) - { -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index f0d9a1a..f1c8ac4 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -144,7 +144,7 @@ - #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) - #define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) - --#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) -+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64) - /* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */ - #define X86_CR3_PCID_ASID_USER (_AC(0x80,UL)) - -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 8db339a..183af59 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -137,7 +137,7 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) - * Declare a couple of kaiser interfaces here for convenience, - * to avoid the need for asm/kaiser.h in unexpected places. - */ --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - extern int kaiser_enabled; - extern void kaiser_setup_pcid(void); - extern void kaiser_flush_tlb_on_return_to_user(void); -diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S -index d04479b..67cd7c1 100644 ---- a/arch/x86/kernel/head_64.S -+++ b/arch/x86/kernel/head_64.S -@@ -405,7 +405,7 @@ GLOBAL(early_recursion_flag) - .balign PAGE_SIZE; \ - GLOBAL(name) - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * Each PGD needs to be 8k long and 8k aligned. We do not - * ever go out to userspace with these, so we do not -diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile -index c505569..c548b46 100644 ---- a/arch/x86/mm/Makefile -+++ b/arch/x86/mm/Makefile -@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o - obj-$(CONFIG_X86_INTEL_MPX) += mpx.o - obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o - obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o --obj-$(CONFIG_KAISER) += kaiser.o -+obj-$(CONFIG_PAGE_TABLE_ISOLATION) += kaiser.o -diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c -index 9284ec1..319183d 100644 ---- a/arch/x86/mm/kaslr.c -+++ b/arch/x86/mm/kaslr.c -@@ -189,6 +189,6 @@ void __meminit init_trampoline(void) - *pud_tramp = *pud; - } - -- /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */ -+ /* Avoid set_pgd(), in case it's complicated by CONFIG_PAGE_TABLE_ISOLATION */ - trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); - } -diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h -index 4a4d6d9..58c55b1 100644 ---- a/include/linux/kaiser.h -+++ b/include/linux/kaiser.h -@@ -1,7 +1,7 @@ - #ifndef _LINUX_KAISER_H - #define _LINUX_KAISER_H - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - #include <asm/kaiser.h> - - static inline int kaiser_map_thread_stack(void *stack) -@@ -24,7 +24,7 @@ static inline void kaiser_unmap_thread_stack(void *stack) - #else - - /* -- * These stubs are used whenever CONFIG_KAISER is off, which -+ * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which - * includes architectures that support KAISER, but have it disabled. - */ - -@@ -48,5 +48,5 @@ static inline void kaiser_unmap_thread_stack(void *stack) - { - } - --#endif /* !CONFIG_KAISER */ -+#endif /* !CONFIG_PAGE_TABLE_ISOLATION */ - #endif /* _LINUX_KAISER_H */ -diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h -index cfe13cb..8902f23 100644 ---- a/include/linux/percpu-defs.h -+++ b/include/linux/percpu-defs.h -@@ -35,7 +35,7 @@ - - #endif - --#ifdef CONFIG_KAISER -+#ifdef CONFIG_PAGE_TABLE_ISOLATION - #define USER_MAPPED_SECTION "..user_mapped" - #else - #define USER_MAPPED_SECTION "" -diff --git a/security/Kconfig b/security/Kconfig -index fd2ceeb..32f36b4 100644 ---- a/security/Kconfig -+++ b/security/Kconfig -@@ -31,7 +31,7 @@ config SECURITY - - If you are unsure how to answer this question, answer N. - --config KAISER -+config PAGE_TABLE_ISOLATION - bool "Remove the kernel mapping in user mode" - default y - depends on X86_64 && SMP -diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h -index 67c93d9..f79669a 100644 ---- a/tools/arch/x86/include/asm/cpufeatures.h -+++ b/tools/arch/x86/include/asm/cpufeatures.h -@@ -198,7 +198,7 @@ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ - - /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ --#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ -+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch deleted file mode 100644 index 86badf1b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch +++ /dev/null @@ -1,229 +0,0 @@ -From 4cac5cffd142a19a03aceb9037302e10fe04d566 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 29 Apr 2018 15:21:42 +0200 -Subject: [PATCH 38/93] x86/process: Allow runtime control of Speculative Store - Bypass - -commit 885f82bfbc6fefb6664ea27965c3ab9ac4194b8c upstream - -The Speculative Store Bypass vulnerability can be mitigated with the -Reduced Data Speculation (RDS) feature. To allow finer grained control of -this eventually expensive mitigation a per task mitigation control is -required. - -Add a new TIF_RDS flag and put it into the group of TIF flags which are -evaluated for mismatch in switch_to(). If these bits differ in the previous -and the next task, then the slow path function __switch_to_xtra() is -invoked. Implement the TIF_RDS dependent mitigation control in the slow -path. - -If the prctl for controlling Speculative Store Bypass is disabled or no -task uses the prctl then there is no overhead in the switch_to() fast -path. - -Update the KVM related speculation control functions to take TID_RDS into -account as well. - -Based on a patch from Tim Chen. Completely rewritten. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/msr-index.h | 3 ++- - arch/x86/include/asm/spec-ctrl.h | 17 +++++++++++++++++ - arch/x86/include/asm/thread_info.h | 6 ++++-- - arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++++----- - arch/x86/kernel/process.c | 22 ++++++++++++++++++++++ - 5 files changed, 66 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index 4027c33..7ad3ed9 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -40,7 +40,8 @@ - #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ - #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ - #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ --#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ -+#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ -+#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ - - #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ - #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index 3ad6442..45ef00a 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -2,6 +2,7 @@ - #ifndef _ASM_X86_SPECCTRL_H_ - #define _ASM_X86_SPECCTRL_H_ - -+#include <linux/thread_info.h> - #include <asm/nospec-branch.h> - - /* -@@ -18,4 +19,20 @@ extern void x86_spec_ctrl_restore_host(u64); - extern u64 x86_amd_ls_cfg_base; - extern u64 x86_amd_ls_cfg_rds_mask; - -+/* The Intel SPEC CTRL MSR base value cache */ -+extern u64 x86_spec_ctrl_base; -+ -+static inline u64 rds_tif_to_spec_ctrl(u64 tifn) -+{ -+ BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); -+ return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); -+} -+ -+static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) -+{ -+ return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; -+} -+ -+extern void speculative_store_bypass_update(void); -+ - #endif -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index 89978b9..661afac 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -83,6 +83,7 @@ struct thread_info { - #define TIF_SIGPENDING 2 /* signal pending */ - #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ - #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -+#define TIF_RDS 5 /* Reduced data speculation */ - #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ - #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ - #define TIF_SECCOMP 8 /* secure computing */ -@@ -104,8 +105,9 @@ struct thread_info { - #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) - #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) --#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -+#define _TIF_RDS (1 << TIF_RDS) - #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) - #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) - #define _TIF_SECCOMP (1 << TIF_SECCOMP) -@@ -139,7 +141,7 @@ struct thread_info { - - /* flags to check in __switch_to() */ - #define _TIF_WORK_CTXSW \ -- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) -+ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) - - #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) - #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 46d01fd..4f09576 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -32,7 +32,7 @@ static void __init ssb_select_mitigation(void); - * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any - * writes to SPEC_CTRL contain whatever reserved bits have been set. - */ --static u64 __ro_after_init x86_spec_ctrl_base; -+u64 __ro_after_init x86_spec_ctrl_base; - - /* - * The vendor and possibly platform specific bits which can be modified in -@@ -139,25 +139,41 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); - - u64 x86_spec_ctrl_get_default(void) - { -- return x86_spec_ctrl_base; -+ u64 msrval = x86_spec_ctrl_base; -+ -+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); -+ return msrval; - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); - - void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) - { -+ u64 host = x86_spec_ctrl_base; -+ - if (!boot_cpu_has(X86_FEATURE_IBRS)) - return; -- if (x86_spec_ctrl_base != guest_spec_ctrl) -+ -+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); -+ -+ if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); - - void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) - { -+ u64 host = x86_spec_ctrl_base; -+ - if (!boot_cpu_has(X86_FEATURE_IBRS)) - return; -- if (x86_spec_ctrl_base != guest_spec_ctrl) -- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -+ -+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); -+ -+ if (host != guest_spec_ctrl) -+ wrmsrl(MSR_IA32_SPEC_CTRL, host); - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); - -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index b7e3822..9c48e18 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -33,6 +33,7 @@ - #include <asm/mce.h> - #include <asm/vm86.h> - #include <asm/switch_to.h> -+#include <asm/spec-ctrl.h> - - /* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, -@@ -202,6 +203,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss, - } - } - -+static __always_inline void __speculative_store_bypass_update(unsigned long tifn) -+{ -+ u64 msr; -+ -+ if (static_cpu_has(X86_FEATURE_AMD_RDS)) { -+ msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); -+ wrmsrl(MSR_AMD64_LS_CFG, msr); -+ } else { -+ msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); -+ wrmsrl(MSR_IA32_SPEC_CTRL, msr); -+ } -+} -+ -+void speculative_store_bypass_update(void) -+{ -+ __speculative_store_bypass_update(current_thread_info()->flags); -+} -+ - void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) - { -@@ -230,6 +249,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - - if ((tifp ^ tifn) & _TIF_NOTSC) - cr4_toggle_bits(X86_CR4_TSD); -+ -+ if ((tifp ^ tifn) & _TIF_RDS) -+ __speculative_store_bypass_update(tifn); - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch deleted file mode 100644 index ad179306..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 825c7a1a9545787191c7dec21823a4b854dd8172 Mon Sep 17 00:00:00 2001 -From: KarimAllah Ahmed <karahmed@amazon.de> -Date: Thu, 1 Feb 2018 11:27:21 +0000 -Subject: [PATCH 38/42] x86/spectre: Simplify spectre_v2 command line parsing - -(cherry picked from commit 9005c6834c0ffdfe46afa76656bd9276cca864f6) - -[dwmw2: Use ARRAY_SIZE] - -Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: peterz@infradead.org -Cc: bp@alien8.de -Link: https://lkml.kernel.org/r/1517484441-1420-3-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 86 ++++++++++++++++++++++++++++++---------------- - 1 file changed, 56 insertions(+), 30 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index aec7daf..957ad44 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -118,13 +118,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; } - static void __init spec2_print_if_insecure(const char *reason) - { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) -- pr_info("%s\n", reason); -+ pr_info("%s selected on command line.\n", reason); - } - - static void __init spec2_print_if_secure(const char *reason) - { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) -- pr_info("%s\n", reason); -+ pr_info("%s selected on command line.\n", reason); - } - - static inline bool retp_compiler(void) -@@ -139,42 +139,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) - return len == arglen && !strncmp(arg, opt, len); - } - -+static const struct { -+ const char *option; -+ enum spectre_v2_mitigation_cmd cmd; -+ bool secure; -+} mitigation_options[] = { -+ { "off", SPECTRE_V2_CMD_NONE, false }, -+ { "on", SPECTRE_V2_CMD_FORCE, true }, -+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, -+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, -+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, -+ { "auto", SPECTRE_V2_CMD_AUTO, false }, -+}; -+ - static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) - { - char arg[20]; -- int ret; -- -- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, -- sizeof(arg)); -- if (ret > 0) { -- if (match_option(arg, ret, "off")) { -- goto disable; -- } else if (match_option(arg, ret, "on")) { -- spec2_print_if_secure("force enabled on command line."); -- return SPECTRE_V2_CMD_FORCE; -- } else if (match_option(arg, ret, "retpoline")) { -- spec2_print_if_insecure("retpoline selected on command line."); -- return SPECTRE_V2_CMD_RETPOLINE; -- } else if (match_option(arg, ret, "retpoline,amd")) { -- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { -- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); -- return SPECTRE_V2_CMD_AUTO; -- } -- spec2_print_if_insecure("AMD retpoline selected on command line."); -- return SPECTRE_V2_CMD_RETPOLINE_AMD; -- } else if (match_option(arg, ret, "retpoline,generic")) { -- spec2_print_if_insecure("generic retpoline selected on command line."); -- return SPECTRE_V2_CMD_RETPOLINE_GENERIC; -- } else if (match_option(arg, ret, "auto")) { -+ int ret, i; -+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; -+ -+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) -+ return SPECTRE_V2_CMD_NONE; -+ else { -+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, -+ sizeof(arg)); -+ if (ret < 0) -+ return SPECTRE_V2_CMD_AUTO; -+ -+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { -+ if (!match_option(arg, ret, mitigation_options[i].option)) -+ continue; -+ cmd = mitigation_options[i].cmd; -+ break; -+ } -+ -+ if (i >= ARRAY_SIZE(mitigation_options)) { -+ pr_err("unknown option (%s). Switching to AUTO select\n", -+ mitigation_options[i].option); - return SPECTRE_V2_CMD_AUTO; - } - } - -- if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) -+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE || -+ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || -+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && -+ !IS_ENABLED(CONFIG_RETPOLINE)) { -+ pr_err("%s selected but not compiled in. Switching to AUTO select\n", -+ mitigation_options[i].option); - return SPECTRE_V2_CMD_AUTO; --disable: -- spec2_print_if_insecure("disabled on command line."); -- return SPECTRE_V2_CMD_NONE; -+ } -+ -+ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && -+ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { -+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); -+ return SPECTRE_V2_CMD_AUTO; -+ } -+ -+ if (mitigation_options[i].secure) -+ spec2_print_if_secure(mitigation_options[i].option); -+ else -+ spec2_print_if_insecure(mitigation_options[i].option); -+ -+ return cmd; - } - - /* Check for Skylake-like CPUs (for RSB handling) */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-KPTI-Report-when-enabled.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-KPTI-Report-when-enabled.patch deleted file mode 100644 index 016ebb4e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-KPTI-Report-when-enabled.patch +++ /dev/null @@ -1,48 +0,0 @@ -From e09e4eba09f13bd94283ce92d0a246ec3a97d7e7 Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Wed, 3 Jan 2018 10:18:01 -0800 -Subject: [PATCH 039/103] KPTI: Report when enabled - -Make sure dmesg reports when KPTI is enabled. - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index d43f369..b6b0f3a 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -10,6 +10,9 @@ - #include <linux/mm.h> - #include <linux/uaccess.h> - -+#undef pr_fmt -+#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt -+ - #include <asm/kaiser.h> - #include <asm/tlbflush.h> /* to verify its kaiser declarations */ - #include <asm/pgtable.h> -@@ -292,7 +295,7 @@ void __init kaiser_check_boottime_disable(void) - return; - - disable: -- pr_info("Kernel/User page tables isolation: disabled\n"); -+ pr_info("disabled\n"); - - silent_disable: - kaiser_enabled = 0; -@@ -352,6 +355,8 @@ void __init kaiser_init(void) - kaiser_add_user_map_early(&debug_idt_table, - sizeof(gate_desc) * NR_VECTORS, - __PAGE_KERNEL); -+ -+ pr_info("enabled\n"); - } - - /* Add a mapping to the shadow mapping, and synchronize the mappings */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-pti-Mark-constant-arrays-as-__initconst.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-pti-Mark-constant-arrays-as-__initconst.patch deleted file mode 100644 index a53ec46b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-pti-Mark-constant-arrays-as-__initconst.patch +++ /dev/null @@ -1,55 +0,0 @@ -From a89a8bf00b6ad57d89f9d42ae682f7367fcd0d27 Mon Sep 17 00:00:00 2001 -From: Arnd Bergmann <arnd@arndb.de> -Date: Fri, 2 Feb 2018 22:39:23 +0100 -Subject: [PATCH 39/42] x86/pti: Mark constant arrays as __initconst - -(cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e) - -I'm seeing build failures from the two newly introduced arrays that -are marked 'const' and '__initdata', which are mutually exclusive: - -arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init' -arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init' - -The correct annotation is __initconst. - -Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown") -Signed-off-by: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Borislav Petkov <bp@suse.de> -Cc: Thomas Garnier <thgarnie@google.com> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/common.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 60e537d..08e89ed 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -861,7 +861,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) - #endif - } - --static const __initdata struct x86_cpu_id cpu_no_speculation[] = { -+static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, -@@ -874,7 +874,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { - {} - }; - --static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { -+static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - { X86_VENDOR_AMD }, - {} - }; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch deleted file mode 100644 index d1cb5dcd..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch +++ /dev/null @@ -1,222 +0,0 @@ -From 3495e18cce0a77cb974173998dfecbf22c9df984 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 29 Apr 2018 15:26:40 +0200 -Subject: [PATCH 39/93] x86/speculation: Add prctl for Speculative Store Bypass - mitigation - -commit a73ec77ee17ec556fe7f165d00314cb7c047b1ac upstream - -Add prctl based control for Speculative Store Bypass mitigation and make it -the default mitigation for Intel and AMD. - -Andi Kleen provided the following rationale (slightly redacted): - - There are multiple levels of impact of Speculative Store Bypass: - - 1) JITed sandbox. - It cannot invoke system calls, but can do PRIME+PROBE and may have call - interfaces to other code - - 2) Native code process. - No protection inside the process at this level. - - 3) Kernel. - - 4) Between processes. - - The prctl tries to protect against case (1) doing attacks. - - If the untrusted code can do random system calls then control is already - lost in a much worse way. So there needs to be system call protection in - some way (using a JIT not allowing them or seccomp). Or rather if the - process can subvert its environment somehow to do the prctl it can already - execute arbitrary code, which is much worse than SSB. - - To put it differently, the point of the prctl is to not allow JITed code - to read data it shouldn't read from its JITed sandbox. If it already has - escaped its sandbox then it can already read everything it wants in its - address space, and do much worse. - - The ability to control Speculative Store Bypass allows to enable the - protection selectively without affecting overall system performance. - -Based on an initial patch from Tim Chen. Completely rewritten. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 6 ++- - arch/x86/include/asm/nospec-branch.h | 1 + - arch/x86/kernel/cpu/bugs.c | 83 +++++++++++++++++++++++++++++++----- - 3 files changed, 79 insertions(+), 11 deletions(-) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 348ca9d..80811df 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -3990,7 +3990,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - off - Unconditionally enable Speculative Store Bypass - auto - Kernel detects whether the CPU model contains an - implementation of Speculative Store Bypass and -- picks the most appropriate mitigation -+ picks the most appropriate mitigation. -+ prctl - Control Speculative Store Bypass per thread -+ via prctl. Speculative Store Bypass is enabled -+ for a process by default. The state of the control -+ is inherited on fork. - - Not specifying this option is equivalent to - spec_store_bypass_disable=auto. -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 1119f14..71ad014 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -232,6 +232,7 @@ extern u64 x86_spec_ctrl_get_default(void); - enum ssb_mitigation { - SPEC_STORE_BYPASS_NONE, - SPEC_STORE_BYPASS_DISABLE, -+ SPEC_STORE_BYPASS_PRCTL, - }; - - extern char __indirect_thunk_start[]; -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 4f09576..b7d9adf 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -11,6 +11,8 @@ - #include <linux/utsname.h> - #include <linux/cpu.h> - #include <linux/module.h> -+#include <linux/nospec.h> -+#include <linux/prctl.h> - - #include <asm/spec-ctrl.h> - #include <asm/cmdline.h> -@@ -411,20 +413,23 @@ enum ssb_mitigation_cmd { - SPEC_STORE_BYPASS_CMD_NONE, - SPEC_STORE_BYPASS_CMD_AUTO, - SPEC_STORE_BYPASS_CMD_ON, -+ SPEC_STORE_BYPASS_CMD_PRCTL, - }; - - static const char *ssb_strings[] = { - [SPEC_STORE_BYPASS_NONE] = "Vulnerable", -- [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" -+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", -+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" - }; - - static const struct { - const char *option; - enum ssb_mitigation_cmd cmd; - } ssb_mitigation_options[] = { -- { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ -- { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ -- { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ -+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ -+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ - }; - - static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) -@@ -474,14 +479,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - - switch (cmd) { - case SPEC_STORE_BYPASS_CMD_AUTO: -- /* -- * AMD platforms by default don't need SSB mitigation. -- */ -- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) -- break; -+ /* Choose prctl as the default mode */ -+ mode = SPEC_STORE_BYPASS_PRCTL; -+ break; - case SPEC_STORE_BYPASS_CMD_ON: - mode = SPEC_STORE_BYPASS_DISABLE; - break; -+ case SPEC_STORE_BYPASS_CMD_PRCTL: -+ mode = SPEC_STORE_BYPASS_PRCTL; -+ break; - case SPEC_STORE_BYPASS_CMD_NONE: - break; - } -@@ -492,7 +498,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass - * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation - */ -- if (mode != SPEC_STORE_BYPASS_NONE) { -+ if (mode == SPEC_STORE_BYPASS_DISABLE) { - setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); - /* - * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses -@@ -523,6 +529,63 @@ static void ssb_select_mitigation() - - #undef pr_fmt - -+static int ssb_prctl_set(unsigned long ctrl) -+{ -+ bool rds = !!test_tsk_thread_flag(current, TIF_RDS); -+ -+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) -+ return -ENXIO; -+ -+ if (ctrl == PR_SPEC_ENABLE) -+ clear_tsk_thread_flag(current, TIF_RDS); -+ else -+ set_tsk_thread_flag(current, TIF_RDS); -+ -+ if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) -+ speculative_store_bypass_update(); -+ -+ return 0; -+} -+ -+static int ssb_prctl_get(void) -+{ -+ switch (ssb_mode) { -+ case SPEC_STORE_BYPASS_DISABLE: -+ return PR_SPEC_DISABLE; -+ case SPEC_STORE_BYPASS_PRCTL: -+ if (test_tsk_thread_flag(current, TIF_RDS)) -+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE; -+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE; -+ default: -+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) -+ return PR_SPEC_ENABLE; -+ return PR_SPEC_NOT_AFFECTED; -+ } -+} -+ -+int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) -+{ -+ if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) -+ return -ERANGE; -+ -+ switch (which) { -+ case PR_SPEC_STORE_BYPASS: -+ return ssb_prctl_set(ctrl); -+ default: -+ return -ENODEV; -+ } -+} -+ -+int arch_prctl_spec_ctrl_get(unsigned long which) -+{ -+ switch (which) { -+ case PR_SPEC_STORE_BYPASS: -+ return ssb_prctl_get(); -+ default: -+ return -ENODEV; -+ } -+} -+ - void x86_spec_ctrl_setup_ap(void) - { - if (boot_cpu_has(X86_FEATURE_IBRS)) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-kaiser-Set-_PAGE_NX-only-if-supported.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-kaiser-Set-_PAGE_NX-only-if-supported.patch deleted file mode 100644 index c6511915..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-kaiser-Set-_PAGE_NX-only-if-supported.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 71ce3e3c495897125681f2adfe13033aff1a3a58 Mon Sep 17 00:00:00 2001 -From: Guenter Roeck <groeck@chromium.org> -Date: Thu, 4 Jan 2018 13:41:55 -0800 -Subject: [PATCH 040/103] kaiser: Set _PAGE_NX only if supported - -This resolves a crash if loaded under qemu + haxm under windows. -See https://www.spinics.net/lists/kernel/msg2689835.html for details. -Here is a boot log (the log is from chromeos-4.4, but Tao Wu says that -the same log is also seen with vanilla v4.4.110-rc1). - -[ 0.712750] Freeing unused kernel memory: 552K -[ 0.721821] init: Corrupted page table at address 57b029b332e0 -[ 0.722761] PGD 80000000bb238067 PUD bc36a067 PMD bc369067 PTE 45d2067 -[ 0.722761] Bad pagetable: 000b [#1] PREEMPT SMP -[ 0.722761] Modules linked in: -[ 0.722761] CPU: 1 PID: 1 Comm: init Not tainted 4.4.96 #31 -[ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS -rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 -[ 0.722761] task: ffff8800bc290000 ti: ffff8800bc28c000 task.ti: ffff8800bc28c000 -[ 0.722761] RIP: 0010:[<ffffffff83f4129e>] [<ffffffff83f4129e>] __clear_user+0x42/0x67 -[ 0.722761] RSP: 0000:ffff8800bc28fcf8 EFLAGS: 00010202 -[ 0.722761] RAX: 0000000000000000 RBX: 00000000000001a4 RCX: 00000000000001a4 -[ 0.722761] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000057b029b332e0 -[ 0.722761] RBP: ffff8800bc28fd08 R08: ffff8800bc290000 R09: ffff8800bb2f4000 -[ 0.722761] R10: ffff8800bc290000 R11: ffff8800bb2f4000 R12: 000057b029b332e0 -[ 0.722761] R13: 0000000000000000 R14: 000057b029b33340 R15: ffff8800bb1e2a00 -[ 0.722761] FS: 0000000000000000(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 -[ 0.722761] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b -[ 0.722761] CR2: 000057b029b332e0 CR3: 00000000bb2f8000 CR4: 00000000000006e0 -[ 0.722761] Stack: -[ 0.722761] 000057b029b332e0 ffff8800bb95fa80 ffff8800bc28fd18 ffffffff83f4120c -[ 0.722761] ffff8800bc28fe18 ffffffff83e9e7a1 ffff8800bc28fd68 0000000000000000 -[ 0.722761] ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 -[ 0.722761] Call Trace: -[ 0.722761] [<ffffffff83f4120c>] clear_user+0x2e/0x30 -[ 0.722761] [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7 -[ 0.722761] [<ffffffff83de2088>] search_binary_handler+0x86/0x19c -[ 0.722761] [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98 -[ 0.722761] [<ffffffff844febe0>] ? rest_init+0x87/0x87 -[ 0.722761] [<ffffffff83de40be>] do_execve+0x23/0x25 -[ 0.722761] [<ffffffff83c002e3>] run_init_process+0x2b/0x2d -[ 0.722761] [<ffffffff844fec4d>] kernel_init+0x6d/0xda -[ 0.722761] [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70 -[ 0.722761] [<ffffffff844febe0>] ? rest_init+0x87/0x87 -[ 0.722761] Code: 86 84 be 12 00 00 00 e8 87 0d e8 ff 66 66 90 48 89 d8 48 c1 -eb 03 4c 89 e7 83 e0 07 48 89 d9 be 08 00 00 00 31 d2 48 85 c9 74 0a <48> 89 17 -48 01 f7 ff c9 75 f6 48 89 c1 85 c9 74 09 88 17 48 ff -[ 0.722761] RIP [<ffffffff83f4129e>] __clear_user+0x42/0x67 -[ 0.722761] RSP <ffff8800bc28fcf8> -[ 0.722761] ---[ end trace def703879b4ff090 ]--- -[ 0.722761] BUG: sleeping function called from invalid context at /mnt/host/source/src/third_party/kernel/v4.4/kernel/locking/rwsem.c:21 -[ 0.722761] in_atomic(): 0, irqs_disabled(): 1, pid: 1, name: init -[ 0.722761] CPU: 1 PID: 1 Comm: init Tainted: G D 4.4.96 #31 -[ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 -[ 0.722761] 0000000000000086 dcb5d76098c89836 ffff8800bc28fa30 ffffffff83f34004 -[ 0.722761] ffffffff84839dc2 0000000000000015 ffff8800bc28fa40 ffffffff83d57dc9 -[ 0.722761] ffff8800bc28fa68 ffffffff83d57e6a ffffffff84a53640 0000000000000000 -[ 0.722761] Call Trace: -[ 0.722761] [<ffffffff83f34004>] dump_stack+0x4d/0x63 -[ 0.722761] [<ffffffff83d57dc9>] ___might_sleep+0x13a/0x13c -[ 0.722761] [<ffffffff83d57e6a>] __might_sleep+0x9f/0xa6 -[ 0.722761] [<ffffffff84502788>] down_read+0x20/0x31 -[ 0.722761] [<ffffffff83cc5d9b>] __blocking_notifier_call_chain+0x35/0x63 -[ 0.722761] [<ffffffff83cc5ddd>] blocking_notifier_call_chain+0x14/0x16 -[ 0.800374] usb 1-1: new full-speed USB device number 2 using uhci_hcd -[ 0.722761] [<ffffffff83cefe97>] profile_task_exit+0x1a/0x1c -[ 0.802309] [<ffffffff83cac84e>] do_exit+0x39/0xe7f -[ 0.802309] [<ffffffff83ce5938>] ? vprintk_default+0x1d/0x1f -[ 0.802309] [<ffffffff83d7bb95>] ? printk+0x57/0x73 -[ 0.802309] [<ffffffff83c46e25>] oops_end+0x80/0x85 -[ 0.802309] [<ffffffff83c7b747>] pgtable_bad+0x8a/0x95 -[ 0.802309] [<ffffffff83ca7f4a>] __do_page_fault+0x8c/0x352 -[ 0.802309] [<ffffffff83eefba5>] ? file_has_perm+0xc4/0xe5 -[ 0.802309] [<ffffffff83ca821c>] do_page_fault+0xc/0xe -[ 0.802309] [<ffffffff84507682>] page_fault+0x22/0x30 -[ 0.802309] [<ffffffff83f4129e>] ? __clear_user+0x42/0x67 -[ 0.802309] [<ffffffff83f4127f>] ? __clear_user+0x23/0x67 -[ 0.802309] [<ffffffff83f4120c>] clear_user+0x2e/0x30 -[ 0.802309] [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7 -[ 0.802309] [<ffffffff83de2088>] search_binary_handler+0x86/0x19c -[ 0.802309] [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98 -[ 0.802309] [<ffffffff844febe0>] ? rest_init+0x87/0x87 -[ 0.802309] [<ffffffff83de40be>] do_execve+0x23/0x25 -[ 0.802309] [<ffffffff83c002e3>] run_init_process+0x2b/0x2d -[ 0.802309] [<ffffffff844fec4d>] kernel_init+0x6d/0xda -[ 0.802309] [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70 -[ 0.802309] [<ffffffff844febe0>] ? rest_init+0x87/0x87 -[ 0.830559] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 -[ 0.830559] -[ 0.831305] Kernel Offset: 0x2c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) -[ 0.831305] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 - -The crash part of this problem may be solved with the following patch -(thanks to Hugh for the hint). There is still another problem, though - -with this patch applied, the qemu session aborts with "VCPU Shutdown -request", whatever that means. - -Cc: lepton <ytht.net@gmail.com> -Signed-off-by: Guenter Roeck <groeck@chromium.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index b6b0f3a..d8376b4 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -413,7 +413,8 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) - * get out to userspace running on the kernel CR3, - * userspace will crash instead of running. - */ -- pgd.pgd |= _PAGE_NX; -+ if (__supported_pte_mask & _PAGE_NX) -+ pgd.pgd |= _PAGE_NX; - } - } else if (!pgd.pgd) { - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-nospec-Move-array_index_nospec-parameter-checking-in.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-nospec-Move-array_index_nospec-parameter-checking-in.patch deleted file mode 100644 index 973e9188..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-nospec-Move-array_index_nospec-parameter-checking-in.patch +++ /dev/null @@ -1,92 +0,0 @@ -From a3cb1b4823957921fa7a58e51bc8ee3e880bf1c5 Mon Sep 17 00:00:00 2001 -From: Will Deacon <will.deacon@arm.com> -Date: Mon, 5 Feb 2018 14:16:06 +0000 -Subject: [PATCH 40/93] nospec: Move array_index_nospec() parameter checking - into separate macro - -commit 8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 upstream. - -For architectures providing their own implementation of -array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to -complain about out-of-range parameters using WARN_ON() results in a mess -of mutually-dependent include files. - -Rather than unpick the dependencies, simply have the core code in nospec.h -perform the checking for us. - -Signed-off-by: Will Deacon <will.deacon@arm.com> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.deacon@arm.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/nospec.h | 36 +++++++++++++++++++++--------------- - 1 file changed, 21 insertions(+), 15 deletions(-) - -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -index b99bced..fbc98e2 100644 ---- a/include/linux/nospec.h -+++ b/include/linux/nospec.h -@@ -20,20 +20,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - unsigned long size) - { - /* -- * Warn developers about inappropriate array_index_nospec() usage. -- * -- * Even if the CPU speculates past the WARN_ONCE branch, the -- * sign bit of @index is taken into account when generating the -- * mask. -- * -- * This warning is compiled out when the compiler can infer that -- * @index and @size are less than LONG_MAX. -- */ -- if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, -- "array_index_nospec() limited to range of [0, LONG_MAX]\n")) -- return 0; -- -- /* - * Always calculate and emit the mask even if the compiler - * thinks the mask is not needed. The compiler does not take - * into account the value of @index under speculation. -@@ -44,6 +30,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - #endif - - /* -+ * Warn developers about inappropriate array_index_nospec() usage. -+ * -+ * Even if the CPU speculates past the WARN_ONCE branch, the -+ * sign bit of @index is taken into account when generating the -+ * mask. -+ * -+ * This warning is compiled out when the compiler can infer that -+ * @index and @size are less than LONG_MAX. -+ */ -+#define array_index_mask_nospec_check(index, size) \ -+({ \ -+ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ -+ "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ -+ _mask = 0; \ -+ else \ -+ _mask = array_index_mask_nospec(index, size); \ -+ _mask; \ -+}) -+ -+/* - * array_index_nospec - sanitize an array index after a bounds check - * - * For a code sequence like: -@@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - ({ \ - typeof(index) _i = (index); \ - typeof(size) _s = (size); \ -- unsigned long _mask = array_index_mask_nospec(_i, _s); \ -+ unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ - \ - BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ - BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch deleted file mode 100644 index 4e57ccfc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 95f8f24919bd97bf372f5edbf9a25d5d358c4596 Mon Sep 17 00:00:00 2001 -From: Darren Kenny <darren.kenny@oracle.com> -Date: Fri, 2 Feb 2018 19:12:20 +0000 -Subject: [PATCH 40/42] x86/speculation: Fix typo IBRS_ATT, which should be - IBRS_ALL - -(cherry picked from commit af189c95a371b59f493dbe0f50c0a09724868881) - -Fixes: 117cc7a908c83 ("x86/retpoline: Fill return stack buffer on vmexit") -Signed-off-by: Darren Kenny <darren.kenny@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Masami Hiramatsu <mhiramat@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Link: https://lkml.kernel.org/r/20180202191220.blvgkgutojecxr3b@starbug-vm.ie.oracle.com -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index df4ecec..300cc15 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -150,7 +150,7 @@ extern char __indirect_thunk_end[]; - * On VMEXIT we must ensure that no RSB predictions learned in the guest - * can be followed in the host, by overwriting the RSB completely. Both - * retpoline and IBRS mitigations for Spectre v2 need this; only on future -- * CPUs with IBRS_ATT *might* it be avoided. -+ * CPUs with IBRS_ALL *might* it be avoided. - */ - static inline void vmexit_fill_RSB(void) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-kaiser-Set-_PAGE_NX-only-if-supported.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-kaiser-Set-_PAGE_NX-only-if-supported.patch deleted file mode 100644 index cc925f0f..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-kaiser-Set-_PAGE_NX-only-if-supported.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 2627b29df65208ad9615fb761e37df13e3328e8c Mon Sep 17 00:00:00 2001 -From: Lepton Wu <ytht.net@gmail.com> -Date: Fri, 12 Jan 2018 13:42:56 -0800 -Subject: [PATCH 041/103] kaiser: Set _PAGE_NX only if supported - -This finally resolve crash if loaded under qemu + haxm. Haitao Shan pointed -out that the reason of that crash is that NX bit get set for page tables. -It seems we missed checking if _PAGE_NX is supported in kaiser_add_user_map - -Link: https://www.spinics.net/lists/kernel/msg2689835.html - -Reviewed-by: Guenter Roeck <groeck@chromium.org> -Signed-off-by: Lepton Wu <ytht.net@gmail.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/mm/kaiser.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index d8376b4..42a5307 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -184,6 +184,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, - * requires that not to be #defined to 0): so mask it off here. - */ - flags &= ~_PAGE_GLOBAL; -+ if (!(__supported_pte_mask & _PAGE_NX)) -+ flags &= ~_PAGE_NX; - - for (; address < end_addr; address += PAGE_SIZE) { - target_address = get_pa_from_mapping(address); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-nospec-Allow-index-argument-to-have-const-qualified-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-nospec-Allow-index-argument-to-have-const-qualified-.patch deleted file mode 100644 index 48dd7bd7..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-nospec-Allow-index-argument-to-have-const-qualified-.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 0f31ea4b42fd0a593d539e2278b1baa35a31a122 Mon Sep 17 00:00:00 2001 -From: Rasmus Villemoes <linux@rasmusvillemoes.dk> -Date: Fri, 16 Feb 2018 13:20:48 -0800 -Subject: [PATCH 41/93] nospec: Allow index argument to have const-qualified - type - -commit b98c6a160a057d5686a8c54c79cc6c8c94a7d0c8 upstream. - -The last expression in a statement expression need not be a bare -variable, quoting gcc docs - - The last thing in the compound statement should be an expression - followed by a semicolon; the value of this subexpression serves as the - value of the entire construct. - -and we already use that in e.g. the min/max macros which end with a -ternary expression. - -This way, we can allow index to have const-qualified type, which will in -some cases avoid the need for introducing a local copy of index of -non-const qualified type. That, in turn, can prevent readers not -familiar with the internals of array_index_nospec from wondering about -the seemingly redundant extra variable, and I think that's worthwhile -considering how confusing the whole _nospec business is. - -The expression _i&_mask has type unsigned long (since that is the type -of _mask, and the BUILD_BUG_ONs guarantee that _i will get promoted to -that), so in order not to change the type of the whole expression, add -a cast back to typeof(_i). - -Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Acked-by: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Will Deacon <will.deacon@arm.com> -Cc: linux-arch@vger.kernel.org -Cc: stable@vger.kernel.org -Link: http://lkml.kernel.org/r/151881604837.17395.10812767547837568328.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/nospec.h | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -index fbc98e2..132e3f5 100644 ---- a/include/linux/nospec.h -+++ b/include/linux/nospec.h -@@ -72,7 +72,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ - BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ - \ -- _i &= _mask; \ -- _i; \ -+ (typeof(_i)) (_i & _mask); \ - }) - #endif /* _LINUX_NOSPEC_H */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-x86-microcode-Do-the-family-check-first.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-x86-microcode-Do-the-family-check-first.patch deleted file mode 100644 index 1f502096..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-x86-microcode-Do-the-family-check-first.patch +++ /dev/null @@ -1,94 +0,0 @@ -From e614d84ae1ca7bad08645003fb3195a80fbdaae1 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Thu, 12 Oct 2017 13:23:16 +0200 -Subject: [PATCH 41/42] x86/microcode: Do the family check first - -commit 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 upstream with adjustments. - -On CPUs like AMD's Geode, for example, we shouldn't even try to load -microcode because they do not support the modern microcode loading -interface. - -However, we do the family check *after* the other checks whether the -loader has been disabled on the command line or whether we're running in -a guest. - -So move the family checks first in order to exit early if we're being -loaded on an unsupported family. - -Reported-and-tested-by: Sven Glodowski <glodi1@arcor.de> -Signed-off-by: Borislav Petkov <bp@suse.de> -Cc: <stable@vger.kernel.org> # 4.11.. -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://bugzilla.suse.com/show_bug.cgi?id=1061396 -Link: http://lkml.kernel.org/r/20171012112316.977-1-bp@alien8.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/microcode/core.c | 27 ++++++++++++++++++--------- - 1 file changed, 18 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c -index dc0b9f8..0afaf00 100644 ---- a/arch/x86/kernel/cpu/microcode/core.c -+++ b/arch/x86/kernel/cpu/microcode/core.c -@@ -86,9 +86,6 @@ static bool __init check_loader_disabled_bsp(void) - bool *res = &dis_ucode_ldr; - #endif - -- if (!have_cpuid_p()) -- return *res; -- - a = 1; - c = 0; - native_cpuid(&a, &b, &c, &d); -@@ -130,8 +127,9 @@ void __init load_ucode_bsp(void) - { - int vendor; - unsigned int family; -+ bool intel = true; - -- if (check_loader_disabled_bsp()) -+ if (!have_cpuid_p()) - return; - - vendor = x86_cpuid_vendor(); -@@ -139,16 +137,27 @@ void __init load_ucode_bsp(void) - - switch (vendor) { - case X86_VENDOR_INTEL: -- if (family >= 6) -- load_ucode_intel_bsp(); -+ if (family < 6) -+ return; - break; -+ - case X86_VENDOR_AMD: -- if (family >= 0x10) -- load_ucode_amd_bsp(family); -+ if (family < 0x10) -+ return; -+ intel = false; - break; -+ - default: -- break; -+ return; - } -+ -+ if (check_loader_disabled_bsp()) -+ return; -+ -+ if (intel) -+ load_ucode_intel_bsp(); -+ else -+ load_ucode_amd_bsp(family); - } - - static bool check_loader_disabled_ap(void) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-bpf-adjust-insn_aux_data-when-patching-insns.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-bpf-adjust-insn_aux_data-when-patching-insns.patch deleted file mode 100644 index f6ce7d86..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-bpf-adjust-insn_aux_data-when-patching-insns.patch +++ /dev/null @@ -1,103 +0,0 @@ -From cf7ad5027cb0764ea5276a432a9a3a402d2a2034 Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Fri, 22 Dec 2017 16:29:02 +0100 -Subject: [PATCH 042/103] bpf: adjust insn_aux_data when patching insns - -From: Alexei Starovoitov <ast@fb.com> - -[ Upstream commit 8041902dae5299c1f194ba42d14383f734631009 ] - -convert_ctx_accesses() replaces single bpf instruction with a set of -instructions. Adjust corresponding insn_aux_data while patching. -It's needed to make sure subsequent 'for(all insn)' loops -have matching insn and insn_aux_data. - -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Acked-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/verifier.c | 44 +++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 39 insertions(+), 5 deletions(-) - -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 85d1c94..66ee0c4 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -3017,6 +3017,41 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) - insn->src_reg = 0; - } - -+/* single env->prog->insni[off] instruction was replaced with the range -+ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying -+ * [0, off) and [off, end) to new locations, so the patched range stays zero -+ */ -+static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, -+ u32 off, u32 cnt) -+{ -+ struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; -+ -+ if (cnt == 1) -+ return 0; -+ new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len); -+ if (!new_data) -+ return -ENOMEM; -+ memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); -+ memcpy(new_data + off + cnt - 1, old_data + off, -+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); -+ env->insn_aux_data = new_data; -+ vfree(old_data); -+ return 0; -+} -+ -+static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, -+ const struct bpf_insn *patch, u32 len) -+{ -+ struct bpf_prog *new_prog; -+ -+ new_prog = bpf_patch_insn_single(env->prog, off, patch, len); -+ if (!new_prog) -+ return NULL; -+ if (adjust_insn_aux_data(env, new_prog->len, off, len)) -+ return NULL; -+ return new_prog; -+} -+ - /* convert load instructions that access fields of 'struct __sk_buff' - * into sequence of instructions that access fields of 'struct sk_buff' - */ -@@ -3036,10 +3071,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) - verbose("bpf verifier is misconfigured\n"); - return -EINVAL; - } else if (cnt) { -- new_prog = bpf_patch_insn_single(env->prog, 0, -- insn_buf, cnt); -+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); - if (!new_prog) - return -ENOMEM; -+ - env->prog = new_prog; - delta += cnt - 1; - } -@@ -3060,7 +3095,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) - else - continue; - -- if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) -+ if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) - continue; - - cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, -@@ -3070,8 +3105,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) - return -EINVAL; - } - -- new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, -- cnt); -+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); - if (!new_prog) - return -ENOMEM; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-nospec-Kill-array_index_nospec_mask_check.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-nospec-Kill-array_index_nospec_mask_check.patch deleted file mode 100644 index d74a2ba7..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-nospec-Kill-array_index_nospec_mask_check.patch +++ /dev/null @@ -1,85 +0,0 @@ -From ae4a53f80d78b49ff776956f133cb59344aa10e9 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Fri, 16 Feb 2018 13:20:42 -0800 -Subject: [PATCH 42/93] nospec: Kill array_index_nospec_mask_check() - -commit 1d91c1d2c80cb70e2e553845e278b87a960c04da upstream. - -There are multiple problems with the dynamic sanity checking in -array_index_nospec_mask_check(): - -* It causes unnecessary overhead in the 32-bit case since integer sized - @index values will no longer cause the check to be compiled away like - in the 64-bit case. - -* In the 32-bit case it may trigger with user controllable input when - the expectation is that should only trigger during development of new - kernel enabling. - -* The macro reuses the input parameter in multiple locations which is - broken if someone passes an expression like 'index++' to - array_index_nospec(). - -Reported-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Will Deacon <will.deacon@arm.com> -Cc: linux-arch@vger.kernel.org -Link: http://lkml.kernel.org/r/151881604278.17395.6605847763178076520.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/nospec.h | 22 +--------------------- - 1 file changed, 1 insertion(+), 21 deletions(-) - -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -index 132e3f5..172a19d 100644 ---- a/include/linux/nospec.h -+++ b/include/linux/nospec.h -@@ -30,26 +30,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - #endif - - /* -- * Warn developers about inappropriate array_index_nospec() usage. -- * -- * Even if the CPU speculates past the WARN_ONCE branch, the -- * sign bit of @index is taken into account when generating the -- * mask. -- * -- * This warning is compiled out when the compiler can infer that -- * @index and @size are less than LONG_MAX. -- */ --#define array_index_mask_nospec_check(index, size) \ --({ \ -- if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ -- "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ -- _mask = 0; \ -- else \ -- _mask = array_index_mask_nospec(index, size); \ -- _mask; \ --}) -- --/* - * array_index_nospec - sanitize an array index after a bounds check - * - * For a code sequence like: -@@ -67,7 +47,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - ({ \ - typeof(index) _i = (index); \ - typeof(size) _s = (size); \ -- unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ -+ unsigned long _mask = array_index_mask_nospec(_i, _s); \ - \ - BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ - BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-bpf-move-fixup_bpf_calls-function.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-bpf-move-fixup_bpf_calls-function.patch deleted file mode 100644 index 3d479ca1..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-bpf-move-fixup_bpf_calls-function.patch +++ /dev/null @@ -1,169 +0,0 @@ -From cf435cf5fbbd46f6e6ea1fa6f4175f2f1915af6d Mon Sep 17 00:00:00 2001 -From: Alexei Starovoitov <ast@fb.com> -Date: Wed, 15 Mar 2017 18:26:39 -0700 -Subject: [PATCH 043/103] bpf: move fixup_bpf_calls() function - -commit e245c5c6a5656e4d61aa7bb08e9694fd6e5b2b9d upstream. - -no functional change. -move fixup_bpf_calls() to verifier.c -it's being refactored in the next patch - -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Acked-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: David S. Miller <davem@davemloft.net> -Cc: Jiri Slaby <jslaby@suse.cz> -[backported to 4.9 - gregkh] -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/syscall.c | 54 --------------------------------------------------- - kernel/bpf/verifier.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 54 insertions(+), 54 deletions(-) - -diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c -index 237f3d6..6ae783b 100644 ---- a/kernel/bpf/syscall.c -+++ b/kernel/bpf/syscall.c -@@ -539,57 +539,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) - list_add(&tl->list_node, &bpf_prog_types); - } - --/* fixup insn->imm field of bpf_call instructions: -- * if (insn->imm == BPF_FUNC_map_lookup_elem) -- * insn->imm = bpf_map_lookup_elem - __bpf_call_base; -- * else if (insn->imm == BPF_FUNC_map_update_elem) -- * insn->imm = bpf_map_update_elem - __bpf_call_base; -- * else ... -- * -- * this function is called after eBPF program passed verification -- */ --static void fixup_bpf_calls(struct bpf_prog *prog) --{ -- const struct bpf_func_proto *fn; -- int i; -- -- for (i = 0; i < prog->len; i++) { -- struct bpf_insn *insn = &prog->insnsi[i]; -- -- if (insn->code == (BPF_JMP | BPF_CALL)) { -- /* we reach here when program has bpf_call instructions -- * and it passed bpf_check(), means that -- * ops->get_func_proto must have been supplied, check it -- */ -- BUG_ON(!prog->aux->ops->get_func_proto); -- -- if (insn->imm == BPF_FUNC_get_route_realm) -- prog->dst_needed = 1; -- if (insn->imm == BPF_FUNC_get_prandom_u32) -- bpf_user_rnd_init_once(); -- if (insn->imm == BPF_FUNC_tail_call) { -- /* mark bpf_tail_call as different opcode -- * to avoid conditional branch in -- * interpeter for every normal call -- * and to prevent accidental JITing by -- * JIT compiler that doesn't support -- * bpf_tail_call yet -- */ -- insn->imm = 0; -- insn->code |= BPF_X; -- continue; -- } -- -- fn = prog->aux->ops->get_func_proto(insn->imm); -- /* all functions that have prototype and verifier allowed -- * programs to call them, must be real in-kernel functions -- */ -- BUG_ON(!fn->func); -- insn->imm = fn->func - __bpf_call_base; -- } -- } --} -- - /* drop refcnt on maps used by eBPF program and free auxilary data */ - static void free_used_maps(struct bpf_prog_aux *aux) - { -@@ -782,9 +731,6 @@ static int bpf_prog_load(union bpf_attr *attr) - if (err < 0) - goto free_used_maps; - -- /* fixup BPF_CALL->imm field */ -- fixup_bpf_calls(prog); -- - /* eBPF program is ready to be JITed */ - prog = bpf_prog_select_runtime(prog, &err); - if (err < 0) -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 66ee0c4..1176556 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -3119,6 +3119,57 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) - return 0; - } - -+/* fixup insn->imm field of bpf_call instructions: -+ * if (insn->imm == BPF_FUNC_map_lookup_elem) -+ * insn->imm = bpf_map_lookup_elem - __bpf_call_base; -+ * else if (insn->imm == BPF_FUNC_map_update_elem) -+ * insn->imm = bpf_map_update_elem - __bpf_call_base; -+ * else ... -+ * -+ * this function is called after eBPF program passed verification -+ */ -+static void fixup_bpf_calls(struct bpf_prog *prog) -+{ -+ const struct bpf_func_proto *fn; -+ int i; -+ -+ for (i = 0; i < prog->len; i++) { -+ struct bpf_insn *insn = &prog->insnsi[i]; -+ -+ if (insn->code == (BPF_JMP | BPF_CALL)) { -+ /* we reach here when program has bpf_call instructions -+ * and it passed bpf_check(), means that -+ * ops->get_func_proto must have been supplied, check it -+ */ -+ BUG_ON(!prog->aux->ops->get_func_proto); -+ -+ if (insn->imm == BPF_FUNC_get_route_realm) -+ prog->dst_needed = 1; -+ if (insn->imm == BPF_FUNC_get_prandom_u32) -+ bpf_user_rnd_init_once(); -+ if (insn->imm == BPF_FUNC_tail_call) { -+ /* mark bpf_tail_call as different opcode -+ * to avoid conditional branch in -+ * interpeter for every normal call -+ * and to prevent accidental JITing by -+ * JIT compiler that doesn't support -+ * bpf_tail_call yet -+ */ -+ insn->imm = 0; -+ insn->code |= BPF_X; -+ continue; -+ } -+ -+ fn = prog->aux->ops->get_func_proto(insn->imm); -+ /* all functions that have prototype and verifier allowed -+ * programs to call them, must be real in-kernel functions -+ */ -+ BUG_ON(!fn->func); -+ insn->imm = fn->func - __bpf_call_base; -+ } -+ } -+} -+ - static void free_states(struct bpf_verifier_env *env) - { - struct bpf_verifier_state_list *sl, *sln; -@@ -3217,6 +3268,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) - /* program is valid, convert *(u32*)(ctx + off) accesses */ - ret = convert_ctx_accesses(env); - -+ if (ret == 0) -+ fixup_bpf_calls(env->prog); -+ - if (log_level && log_len >= log_size - 1) { - BUG_ON(log_len >= log_size); - /* verifier log exceeded user supplied buffer */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-nospec-Include-asm-barrier.h-dependency.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-nospec-Include-asm-barrier.h-dependency.patch deleted file mode 100644 index 33ce3dd7..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-nospec-Include-asm-barrier.h-dependency.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 3997af07cbe06033b93bffe163982e30f86d4ac7 Mon Sep 17 00:00:00 2001 -From: Dan Williams <dan.j.williams@intel.com> -Date: Fri, 16 Feb 2018 13:20:54 -0800 -Subject: [PATCH 43/93] nospec: Include <asm/barrier.h> dependency - -commit eb6174f6d1be16b19cfa43dac296bfed003ce1a6 upstream. - -The nospec.h header expects the per-architecture header file -<asm/barrier.h> to optionally define array_index_mask_nospec(). Include -that dependency to prevent inadvertent fallback to the default -array_index_mask_nospec() implementation. - -The default implementation may not provide a full mitigation -on architectures that perform data value speculation. - -Reported-by: Christian Borntraeger <borntraeger@de.ibm.com> -Signed-off-by: Dan Williams <dan.j.williams@intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Woodhouse <dwmw2@infradead.org> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Will Deacon <will.deacon@arm.com> -Cc: linux-arch@vger.kernel.org -Link: http://lkml.kernel.org/r/151881605404.17395.1341935530792574707.stgit@dwillia2-desk3.amr.corp.intel.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/nospec.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -index 172a19d..e791ebc 100644 ---- a/include/linux/nospec.h -+++ b/include/linux/nospec.h -@@ -5,6 +5,7 @@ - - #ifndef _LINUX_NOSPEC_H - #define _LINUX_NOSPEC_H -+#include <asm/barrier.h> - - /** - * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-bpf-refactor-fixup_bpf_calls.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-bpf-refactor-fixup_bpf_calls.patch deleted file mode 100644 index bd75a23a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-bpf-refactor-fixup_bpf_calls.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 9db9c08ce79c7ca4b0038faf03e31078f0b5dd4c Mon Sep 17 00:00:00 2001 -From: Alexei Starovoitov <ast@fb.com> -Date: Wed, 15 Mar 2017 18:26:40 -0700 -Subject: [PATCH 044/103] bpf: refactor fixup_bpf_calls() - -commit 79741b3bdec01a8628368fbcfccc7d189ed606cb upstream. - -reduce indent and make it iterate over instructions similar to -convert_ctx_accesses(). Also convert hard BUG_ON into soft verifier error. - -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Acked-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: David S. Miller <davem@davemloft.net> -Cc: Jiri Slaby <jslaby@suse.cz> -[Backported to 4.9.y - gregkh] -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/verifier.c | 73 ++++++++++++++++++++++++--------------------------- - 1 file changed, 34 insertions(+), 39 deletions(-) - -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 1176556..3f24718 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -3119,55 +3119,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) - return 0; - } - --/* fixup insn->imm field of bpf_call instructions: -- * if (insn->imm == BPF_FUNC_map_lookup_elem) -- * insn->imm = bpf_map_lookup_elem - __bpf_call_base; -- * else if (insn->imm == BPF_FUNC_map_update_elem) -- * insn->imm = bpf_map_update_elem - __bpf_call_base; -- * else ... -+/* fixup insn->imm field of bpf_call instructions - * - * this function is called after eBPF program passed verification - */ --static void fixup_bpf_calls(struct bpf_prog *prog) -+static int fixup_bpf_calls(struct bpf_verifier_env *env) - { -+ struct bpf_prog *prog = env->prog; -+ struct bpf_insn *insn = prog->insnsi; - const struct bpf_func_proto *fn; -+ const int insn_cnt = prog->len; - int i; - -- for (i = 0; i < prog->len; i++) { -- struct bpf_insn *insn = &prog->insnsi[i]; -+ for (i = 0; i < insn_cnt; i++, insn++) { -+ if (insn->code != (BPF_JMP | BPF_CALL)) -+ continue; - -- if (insn->code == (BPF_JMP | BPF_CALL)) { -- /* we reach here when program has bpf_call instructions -- * and it passed bpf_check(), means that -- * ops->get_func_proto must have been supplied, check it -- */ -- BUG_ON(!prog->aux->ops->get_func_proto); -- -- if (insn->imm == BPF_FUNC_get_route_realm) -- prog->dst_needed = 1; -- if (insn->imm == BPF_FUNC_get_prandom_u32) -- bpf_user_rnd_init_once(); -- if (insn->imm == BPF_FUNC_tail_call) { -- /* mark bpf_tail_call as different opcode -- * to avoid conditional branch in -- * interpeter for every normal call -- * and to prevent accidental JITing by -- * JIT compiler that doesn't support -- * bpf_tail_call yet -- */ -- insn->imm = 0; -- insn->code |= BPF_X; -- continue; -- } -+ if (insn->imm == BPF_FUNC_get_route_realm) -+ prog->dst_needed = 1; -+ if (insn->imm == BPF_FUNC_get_prandom_u32) -+ bpf_user_rnd_init_once(); -+ if (insn->imm == BPF_FUNC_tail_call) { -+ /* mark bpf_tail_call as different opcode to avoid -+ * conditional branch in the interpeter for every normal -+ * call and to prevent accidental JITing by JIT compiler -+ * that doesn't support bpf_tail_call yet -+ */ -+ insn->imm = 0; -+ insn->code |= BPF_X; -+ continue; -+ } - -- fn = prog->aux->ops->get_func_proto(insn->imm); -- /* all functions that have prototype and verifier allowed -- * programs to call them, must be real in-kernel functions -- */ -- BUG_ON(!fn->func); -- insn->imm = fn->func - __bpf_call_base; -+ fn = prog->aux->ops->get_func_proto(insn->imm); -+ /* all functions that have prototype and verifier allowed -+ * programs to call them, must be real in-kernel functions -+ */ -+ if (!fn->func) { -+ verbose("kernel subsystem misconfigured func %d\n", -+ insn->imm); -+ return -EFAULT; - } -+ insn->imm = fn->func - __bpf_call_base; - } -+ -+ return 0; - } - - static void free_states(struct bpf_verifier_env *env) -@@ -3269,7 +3264,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) - ret = convert_ctx_accesses(env); - - if (ret == 0) -- fixup_bpf_calls(env->prog); -+ ret = fixup_bpf_calls(env); - - if (log_level && log_len >= log_size - 1) { - BUG_ON(log_len >= log_size); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-prctl-Add-speculation-control-prctls.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-prctl-Add-speculation-control-prctls.patch deleted file mode 100644 index 1baf848c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-prctl-Add-speculation-control-prctls.patch +++ /dev/null @@ -1,239 +0,0 @@ -From 93715f38b4419faa4f84a9bb536f11d89c5c7427 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 29 Apr 2018 15:20:11 +0200 -Subject: [PATCH 44/93] prctl: Add speculation control prctls - -commit b617cfc858161140d69cc0b5cc211996b557a1c7 upstream - -Add two new prctls to control aspects of speculation related vulnerabilites -and their mitigations to provide finer grained control over performance -impacting mitigations. - -PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature -which is selected with arg2 of prctl(2). The return value uses bit 0-2 with -the following meaning: - -Bit Define Description -0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL -1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled -2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled - -If all bits are 0 the CPU is not affected by the speculation misfeature. - -If PR_SPEC_PRCTL is set, then the per task control of the mitigation is -available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation -misfeature will fail. - -PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which -is selected by arg2 of prctl(2) per task. arg3 is used to hand in the -control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. - -The common return values are: - -EINVAL prctl is not implemented by the architecture or the unused prctl() - arguments are not 0 -ENODEV arg2 is selecting a not supported speculation misfeature - -PR_SET_SPECULATION_CTRL has these additional return values: - -ERANGE arg3 is incorrect, i.e. it's not either PR_SPEC_ENABLE or PR_SPEC_DISABLE -ENXIO prctl control of the selected speculation misfeature is disabled - -The first supported controlable speculation misfeature is -PR_SPEC_STORE_BYPASS. Add the define so this can be shared between -architectures. - -Based on an initial patch from Tim Chen and mostly rewritten. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/spec_ctrl.txt | 86 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/nospec.h | 5 +++ - include/uapi/linux/prctl.h | 11 ++++++ - kernel/sys.c | 22 ++++++++++++ - 4 files changed, 124 insertions(+) - create mode 100644 Documentation/spec_ctrl.txt - -diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt -new file mode 100644 -index 0000000..ddbebcd ---- /dev/null -+++ b/Documentation/spec_ctrl.txt -@@ -0,0 +1,86 @@ -+=================== -+Speculation Control -+=================== -+ -+Quite some CPUs have speculation related misfeatures which are in fact -+vulnerabilites causing data leaks in various forms even accross privilege -+domains. -+ -+The kernel provides mitigation for such vulnerabilities in various -+forms. Some of these mitigations are compile time configurable and some on -+the kernel command line. -+ -+There is also a class of mitigations which are very expensive, but they can -+be restricted to a certain set of processes or tasks in controlled -+environments. The mechanism to control these mitigations is via -+:manpage:`prctl(2)`. -+ -+There are two prctl options which are related to this: -+ -+ * PR_GET_SPECULATION_CTRL -+ -+ * PR_SET_SPECULATION_CTRL -+ -+PR_GET_SPECULATION_CTRL -+----------------------- -+ -+PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature -+which is selected with arg2 of prctl(2). The return value uses bits 0-2 with -+the following meaning: -+ -+==== ================ =================================================== -+Bit Define Description -+==== ================ =================================================== -+0 PR_SPEC_PRCTL Mitigation can be controlled per task by -+ PR_SET_SPECULATION_CTRL -+1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is -+ disabled -+2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is -+ enabled -+==== ================ =================================================== -+ -+If all bits are 0 the CPU is not affected by the speculation misfeature. -+ -+If PR_SPEC_PRCTL is set, then the per task control of the mitigation is -+available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation -+misfeature will fail. -+ -+PR_SET_SPECULATION_CTRL -+----------------------- -+PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which -+is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand -+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. -+ -+Common error codes -+------------------ -+======= ================================================================= -+Value Meaning -+======= ================================================================= -+EINVAL The prctl is not implemented by the architecture or unused -+ prctl(2) arguments are not 0 -+ -+ENODEV arg2 is selecting a not supported speculation misfeature -+======= ================================================================= -+ -+PR_SET_SPECULATION_CTRL error codes -+----------------------------------- -+======= ================================================================= -+Value Meaning -+======= ================================================================= -+0 Success -+ -+ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor -+ PR_SPEC_DISABLE -+ -+ENXIO Control of the selected speculation misfeature is not possible. -+ See PR_GET_SPECULATION_CTRL. -+======= ================================================================= -+ -+Speculation misfeature controls -+------------------------------- -+- PR_SPEC_STORE_BYPASS: Speculative Store Bypass -+ -+ Invocations: -+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); -+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); -+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -index e791ebc..700bb8a 100644 ---- a/include/linux/nospec.h -+++ b/include/linux/nospec.h -@@ -55,4 +55,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - \ - (typeof(_i)) (_i & _mask); \ - }) -+ -+/* Speculation control prctl */ -+int arch_prctl_spec_ctrl_get(unsigned long which); -+int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); -+ - #endif /* _LINUX_NOSPEC_H */ -diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h -index a8d0759..3b316be 100644 ---- a/include/uapi/linux/prctl.h -+++ b/include/uapi/linux/prctl.h -@@ -197,4 +197,15 @@ struct prctl_mm_map { - # define PR_CAP_AMBIENT_LOWER 3 - # define PR_CAP_AMBIENT_CLEAR_ALL 4 - -+/* Per task speculation control */ -+#define PR_GET_SPECULATION_CTRL 52 -+#define PR_SET_SPECULATION_CTRL 53 -+/* Speculation control variants */ -+# define PR_SPEC_STORE_BYPASS 0 -+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ -+# define PR_SPEC_NOT_AFFECTED 0 -+# define PR_SPEC_PRCTL (1UL << 0) -+# define PR_SPEC_ENABLE (1UL << 1) -+# define PR_SPEC_DISABLE (1UL << 2) -+ - #endif /* _LINUX_PRCTL_H */ -diff --git a/kernel/sys.c b/kernel/sys.c -index 89d5be4..312c985 100644 ---- a/kernel/sys.c -+++ b/kernel/sys.c -@@ -53,6 +53,8 @@ - #include <linux/uidgid.h> - #include <linux/cred.h> - -+#include <linux/nospec.h> -+ - #include <linux/kmsg_dump.h> - /* Move somewhere else to avoid recompiling? */ - #include <generated/utsrelease.h> -@@ -2072,6 +2074,16 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) - } - #endif - -+int __weak arch_prctl_spec_ctrl_get(unsigned long which) -+{ -+ return -EINVAL; -+} -+ -+int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) -+{ -+ return -EINVAL; -+} -+ - SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, - unsigned long, arg4, unsigned long, arg5) - { -@@ -2270,6 +2282,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, - case PR_GET_FP_MODE: - error = GET_FP_MODE(me); - break; -+ case PR_GET_SPECULATION_CTRL: -+ if (arg3 || arg4 || arg5) -+ return -EINVAL; -+ error = arch_prctl_spec_ctrl_get(arg2); -+ break; -+ case PR_SET_SPECULATION_CTRL: -+ if (arg4 || arg5) -+ return -EINVAL; -+ error = arch_prctl_spec_ctrl_set(arg2, arg3); -+ break; - default: - error = -EINVAL; - break; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-bpf-prevent-out-of-bounds-speculation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-bpf-prevent-out-of-bounds-speculation.patch deleted file mode 100644 index 8f746ad1..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-bpf-prevent-out-of-bounds-speculation.patch +++ /dev/null @@ -1,274 +0,0 @@ -From cc702f102892308be35e4f0dc52519a9c62c3fdc Mon Sep 17 00:00:00 2001 -From: Alexei Starovoitov <ast@kernel.org> -Date: Sun, 7 Jan 2018 17:33:02 -0800 -Subject: [PATCH 045/103] bpf: prevent out-of-bounds speculation - -commit b2157399cc9898260d6031c5bfe45fe137c1fbe7 upstream. - -Under speculation, CPUs may mis-predict branches in bounds checks. Thus, -memory accesses under a bounds check may be speculated even if the -bounds check fails, providing a primitive for building a side channel. - -To avoid leaking kernel data round up array-based maps and mask the index -after bounds check, so speculated load with out of bounds index will load -either valid value from the array or zero from the padded area. - -Unconditionally mask index for all array types even when max_entries -are not rounded to power of 2 for root user. -When map is created by unpriv user generate a sequence of bpf insns -that includes AND operation to make sure that JITed code includes -the same 'index & index_mask' operation. - -If prog_array map is created by unpriv user replace - bpf_tail_call(ctx, map, index); -with - if (index >= max_entries) { - index &= map->index_mask; - bpf_tail_call(ctx, map, index); - } -(along with roundup to power 2) to prevent out-of-bounds speculation. -There is secondary redundant 'if (index >= max_entries)' in the interpreter -and in all JITs, but they can be optimized later if necessary. - -Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array) -cannot be used by unpriv, so no changes there. - -That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on -all architectures with and without JIT. - -v2->v3: -Daniel noticed that attack potentially can be crafted via syscall commands -without loading the program, so add masking to those paths as well. - -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Acked-by: John Fastabend <john.fastabend@gmail.com> -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Cc: Jiri Slaby <jslaby@suse.cz> -[ Backported to 4.9 - gregkh ] -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/bpf.h | 2 ++ - include/linux/bpf_verifier.h | 6 +++++- - kernel/bpf/arraymap.c | 31 ++++++++++++++++++++++--------- - kernel/bpf/verifier.c | 42 +++++++++++++++++++++++++++++++++++++++--- - 4 files changed, 68 insertions(+), 13 deletions(-) - -diff --git a/include/linux/bpf.h b/include/linux/bpf.h -index c201017..0dbb21b 100644 ---- a/include/linux/bpf.h -+++ b/include/linux/bpf.h -@@ -43,6 +43,7 @@ struct bpf_map { - u32 max_entries; - u32 map_flags; - u32 pages; -+ bool unpriv_array; - struct user_struct *user; - const struct bpf_map_ops *ops; - struct work_struct work; -@@ -189,6 +190,7 @@ struct bpf_prog_aux { - struct bpf_array { - struct bpf_map map; - u32 elem_size; -+ u32 index_mask; - /* 'ownership' of prog_array is claimed by the first program that - * is going to use this map or by the first program which FD is stored - * in the map to make sure that all callers and callees have the same -diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h -index a13b031..2edf8de 100644 ---- a/include/linux/bpf_verifier.h -+++ b/include/linux/bpf_verifier.h -@@ -66,7 +66,11 @@ struct bpf_verifier_state_list { - }; - - struct bpf_insn_aux_data { -- enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ -+ union { -+ enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ -+ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ -+ }; -+ bool seen; /* this insn was processed by the verifier */ - }; - - #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ -diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c -index a2ac051..eeb7f1b 100644 ---- a/kernel/bpf/arraymap.c -+++ b/kernel/bpf/arraymap.c -@@ -47,9 +47,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) - static struct bpf_map *array_map_alloc(union bpf_attr *attr) - { - bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; -+ u32 elem_size, index_mask, max_entries; -+ bool unpriv = !capable(CAP_SYS_ADMIN); - struct bpf_array *array; - u64 array_size; -- u32 elem_size; - - /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->key_size != 4 || -@@ -64,11 +65,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) - - elem_size = round_up(attr->value_size, 8); - -+ max_entries = attr->max_entries; -+ index_mask = roundup_pow_of_two(max_entries) - 1; -+ -+ if (unpriv) -+ /* round up array size to nearest power of 2, -+ * since cpu will speculate within index_mask limits -+ */ -+ max_entries = index_mask + 1; -+ - array_size = sizeof(*array); - if (percpu) -- array_size += (u64) attr->max_entries * sizeof(void *); -+ array_size += (u64) max_entries * sizeof(void *); - else -- array_size += (u64) attr->max_entries * elem_size; -+ array_size += (u64) max_entries * elem_size; - - /* make sure there is no u32 overflow later in round_up() */ - if (array_size >= U32_MAX - PAGE_SIZE) -@@ -82,6 +92,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) - if (!array) - return ERR_PTR(-ENOMEM); - } -+ array->index_mask = index_mask; -+ array->map.unpriv_array = unpriv; - - /* copy mandatory map attributes */ - array->map.map_type = attr->map_type; -@@ -115,7 +127,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) - if (unlikely(index >= array->map.max_entries)) - return NULL; - -- return array->value + array->elem_size * index; -+ return array->value + array->elem_size * (index & array->index_mask); - } - - /* Called from eBPF program */ -@@ -127,7 +139,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) - if (unlikely(index >= array->map.max_entries)) - return NULL; - -- return this_cpu_ptr(array->pptrs[index]); -+ return this_cpu_ptr(array->pptrs[index & array->index_mask]); - } - - int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) -@@ -147,7 +159,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) - */ - size = round_up(map->value_size, 8); - rcu_read_lock(); -- pptr = array->pptrs[index]; -+ pptr = array->pptrs[index & array->index_mask]; - for_each_possible_cpu(cpu) { - bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); - off += size; -@@ -195,10 +207,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, - return -EEXIST; - - if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) -- memcpy(this_cpu_ptr(array->pptrs[index]), -+ memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), - value, map->value_size); - else -- memcpy(array->value + array->elem_size * index, -+ memcpy(array->value + -+ array->elem_size * (index & array->index_mask), - value, map->value_size); - return 0; - } -@@ -232,7 +245,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, - */ - size = round_up(map->value_size, 8); - rcu_read_lock(); -- pptr = array->pptrs[index]; -+ pptr = array->pptrs[index & array->index_mask]; - for_each_possible_cpu(cpu) { - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); - off += size; -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 3f24718..787b851 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -1165,7 +1165,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) - } - } - --static int check_call(struct bpf_verifier_env *env, int func_id) -+static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) - { - struct bpf_verifier_state *state = &env->cur_state; - const struct bpf_func_proto *fn = NULL; -@@ -1216,6 +1216,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id) - err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); - if (err) - return err; -+ if (func_id == BPF_FUNC_tail_call) { -+ if (meta.map_ptr == NULL) { -+ verbose("verifier bug\n"); -+ return -EINVAL; -+ } -+ env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; -+ } - err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); - if (err) - return err; -@@ -2799,7 +2806,7 @@ static int do_check(struct bpf_verifier_env *env) - return -EINVAL; - } - -- err = check_call(env, insn->imm); -+ err = check_call(env, insn->imm, insn_idx); - if (err) - return err; - -@@ -3129,7 +3136,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) - struct bpf_insn *insn = prog->insnsi; - const struct bpf_func_proto *fn; - const int insn_cnt = prog->len; -- int i; -+ struct bpf_insn insn_buf[16]; -+ struct bpf_prog *new_prog; -+ struct bpf_map *map_ptr; -+ int i, cnt, delta = 0; -+ - - for (i = 0; i < insn_cnt; i++, insn++) { - if (insn->code != (BPF_JMP | BPF_CALL)) -@@ -3147,6 +3158,31 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) - */ - insn->imm = 0; - insn->code |= BPF_X; -+ -+ /* instead of changing every JIT dealing with tail_call -+ * emit two extra insns: -+ * if (index >= max_entries) goto out; -+ * index &= array->index_mask; -+ * to avoid out-of-bounds cpu speculation -+ */ -+ map_ptr = env->insn_aux_data[i + delta].map_ptr; -+ if (!map_ptr->unpriv_array) -+ continue; -+ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, -+ map_ptr->max_entries, 2); -+ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, -+ container_of(map_ptr, -+ struct bpf_array, -+ map)->index_mask); -+ insn_buf[2] = *insn; -+ cnt = 3; -+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); -+ if (!new_prog) -+ return -ENOMEM; -+ -+ delta += cnt - 1; -+ env->prog = prog = new_prog; -+ insn = new_prog->insnsi + i + delta; - continue; - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-nospec-Allow-getting-setting-on-non-current-task.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-nospec-Allow-getting-setting-on-non-current-task.patch deleted file mode 100644 index 5c1e6d48..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-nospec-Allow-getting-setting-on-non-current-task.patch +++ /dev/null @@ -1,162 +0,0 @@ -From e2a9a40a2a4fbebc999eacc678c2af449db5af11 Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Tue, 1 May 2018 15:19:04 -0700 -Subject: [PATCH 45/93] nospec: Allow getting/setting on non-current task - -commit 7bbf1373e228840bb0295a2ca26d548ef37f448e upstream - -Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than -current. - -This is needed both for /proc/$pid/status queries and for seccomp (since -thread-syncing can trigger seccomp in non-current threads). - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++----------- - include/linux/nospec.h | 7 +++++-- - kernel/sys.c | 9 +++++---- - 3 files changed, 26 insertions(+), 17 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index b7d9adf..3760931 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -529,31 +529,35 @@ static void ssb_select_mitigation() - - #undef pr_fmt - --static int ssb_prctl_set(unsigned long ctrl) -+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) - { -- bool rds = !!test_tsk_thread_flag(current, TIF_RDS); -+ bool rds = !!test_tsk_thread_flag(task, TIF_RDS); - - if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) - return -ENXIO; - - if (ctrl == PR_SPEC_ENABLE) -- clear_tsk_thread_flag(current, TIF_RDS); -+ clear_tsk_thread_flag(task, TIF_RDS); - else -- set_tsk_thread_flag(current, TIF_RDS); -+ set_tsk_thread_flag(task, TIF_RDS); - -- if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) -+ /* -+ * If being set on non-current task, delay setting the CPU -+ * mitigation until it is next scheduled. -+ */ -+ if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) - speculative_store_bypass_update(); - - return 0; - } - --static int ssb_prctl_get(void) -+static int ssb_prctl_get(struct task_struct *task) - { - switch (ssb_mode) { - case SPEC_STORE_BYPASS_DISABLE: - return PR_SPEC_DISABLE; - case SPEC_STORE_BYPASS_PRCTL: -- if (test_tsk_thread_flag(current, TIF_RDS)) -+ if (test_tsk_thread_flag(task, TIF_RDS)) - return PR_SPEC_PRCTL | PR_SPEC_DISABLE; - return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - default: -@@ -563,24 +567,25 @@ static int ssb_prctl_get(void) - } - } - --int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) -+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, -+ unsigned long ctrl) - { - if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) - return -ERANGE; - - switch (which) { - case PR_SPEC_STORE_BYPASS: -- return ssb_prctl_set(ctrl); -+ return ssb_prctl_set(task, ctrl); - default: - return -ENODEV; - } - } - --int arch_prctl_spec_ctrl_get(unsigned long which) -+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) - { - switch (which) { - case PR_SPEC_STORE_BYPASS: -- return ssb_prctl_get(); -+ return ssb_prctl_get(task); - default: - return -ENODEV; - } -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -index 700bb8a..a908c95 100644 ---- a/include/linux/nospec.h -+++ b/include/linux/nospec.h -@@ -7,6 +7,8 @@ - #define _LINUX_NOSPEC_H - #include <asm/barrier.h> - -+struct task_struct; -+ - /** - * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise - * @index: array element index -@@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - }) - - /* Speculation control prctl */ --int arch_prctl_spec_ctrl_get(unsigned long which); --int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); -+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); -+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, -+ unsigned long ctrl); - - #endif /* _LINUX_NOSPEC_H */ -diff --git a/kernel/sys.c b/kernel/sys.c -index 312c985..143cd63 100644 ---- a/kernel/sys.c -+++ b/kernel/sys.c -@@ -2074,12 +2074,13 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) - } - #endif - --int __weak arch_prctl_spec_ctrl_get(unsigned long which) -+int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) - { - return -EINVAL; - } - --int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) -+int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, -+ unsigned long ctrl) - { - return -EINVAL; - } -@@ -2285,12 +2286,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, - case PR_GET_SPECULATION_CTRL: - if (arg3 || arg4 || arg5) - return -EINVAL; -- error = arch_prctl_spec_ctrl_get(arg2); -+ error = arch_prctl_spec_ctrl_get(me, arg2); - break; - case PR_SET_SPECULATION_CTRL: - if (arg4 || arg5) - return -EINVAL; -- error = arch_prctl_spec_ctrl_set(arg2, arg3); -+ error = arch_prctl_spec_ctrl_set(me, arg2, arg3); - break; - default: - error = -EINVAL; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch deleted file mode 100644 index ef530214..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 5583fb3990982a52cfdcc0d1bb0cc991bee429b6 Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Wed, 10 Jan 2018 23:25:05 +0100 -Subject: [PATCH 046/103] bpf, array: fix overflow in max_entries and undefined - behavior in index_mask - -commit bbeb6e4323dad9b5e0ee9f60c223dd532e2403b1 upstream. - -syzkaller tried to alloc a map with 0xfffffffd entries out of a userns, -and thus unprivileged. With the recently added logic in b2157399cc98 -("bpf: prevent out-of-bounds speculation") we round this up to the next -power of two value for max_entries for unprivileged such that we can -apply proper masking into potentially zeroed out map slots. - -However, this will generate an index_mask of 0xffffffff, and therefore -a + 1 will let this overflow into new max_entries of 0. This will pass -allocation, etc, and later on map access we still enforce on the original -attr->max_entries value which was 0xfffffffd, therefore triggering GPF -all over the place. Thus bail out on overflow in such case. - -Moreover, on 32 bit archs roundup_pow_of_two() can also not be used, -since fls_long(max_entries - 1) can result in 32 and 1UL << 32 in 32 bit -space is undefined. Therefore, do this by hand in a 64 bit variable. - -This fixes all the issues triggered by syzkaller's reproducers. - -Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") -Reported-by: syzbot+b0efb8e572d01bce1ae0@syzkaller.appspotmail.com -Reported-by: syzbot+6c15e9744f75f2364773@syzkaller.appspotmail.com -Reported-by: syzbot+d2f5524fb46fd3b312ee@syzkaller.appspotmail.com -Reported-by: syzbot+61d23c95395cc90dbc2b@syzkaller.appspotmail.com -Reported-by: syzbot+0d363c942452cca68c01@syzkaller.appspotmail.com -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/arraymap.c | 18 +++++++++++++++--- - 1 file changed, 15 insertions(+), 3 deletions(-) - -diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c -index eeb7f1b..c6c0b62 100644 ---- a/kernel/bpf/arraymap.c -+++ b/kernel/bpf/arraymap.c -@@ -50,7 +50,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) - u32 elem_size, index_mask, max_entries; - bool unpriv = !capable(CAP_SYS_ADMIN); - struct bpf_array *array; -- u64 array_size; -+ u64 array_size, mask64; - - /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->key_size != 4 || -@@ -66,13 +66,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) - elem_size = round_up(attr->value_size, 8); - - max_entries = attr->max_entries; -- index_mask = roundup_pow_of_two(max_entries) - 1; - -- if (unpriv) -+ /* On 32 bit archs roundup_pow_of_two() with max_entries that has -+ * upper most bit set in u32 space is undefined behavior due to -+ * resulting 1U << 32, so do it manually here in u64 space. -+ */ -+ mask64 = fls_long(max_entries - 1); -+ mask64 = 1ULL << mask64; -+ mask64 -= 1; -+ -+ index_mask = mask64; -+ if (unpriv) { - /* round up array size to nearest power of 2, - * since cpu will speculate within index_mask limits - */ - max_entries = index_mask + 1; -+ /* Check for overflows. */ -+ if (max_entries < attr->max_entries) -+ return ERR_PTR(-E2BIG); -+ } - - array_size = sizeof(*array); - if (percpu) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-bugs-Make-boot-modes-__ro_after_init.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-bugs-Make-boot-modes-__ro_after_init.patch deleted file mode 100644 index f2e083bc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-bugs-Make-boot-modes-__ro_after_init.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 6dbf11655572182e63051b8ef4e61a07fb4901c0 Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Thu, 3 May 2018 15:03:30 -0700 -Subject: [PATCH 46/93] x86/bugs: Make boot modes __ro_after_init - -commit f9544b2b076ca90d887c5ae5d74fab4c21bb7c13 upstream - -There's no reason for these to be changed after boot. - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 3760931..65114d2 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -128,7 +128,8 @@ static const char *spectre_v2_strings[] = { - #undef pr_fmt - #define pr_fmt(fmt) "Spectre V2 : " fmt - --static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = -+ SPECTRE_V2_NONE; - - void x86_spec_ctrl_set(u64 val) - { -@@ -406,7 +407,7 @@ static void __init spectre_v2_select_mitigation(void) - #undef pr_fmt - #define pr_fmt(fmt) "Speculative Store Bypass: " fmt - --static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; -+static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE; - - /* The kernel command line selection */ - enum ssb_mitigation_cmd { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch deleted file mode 100644 index 6f74166c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 10f154142e83fdb4e9d107e0f72b01864e69e108 Mon Sep 17 00:00:00 2001 -From: John Ogness <john.ogness@linutronix.de> -Date: Thu, 14 Sep 2017 11:42:17 +0200 -Subject: [PATCH 47/93] fs/proc: Report eip/esp in /prod/PID/stat for - coredumping - -commit fd7d56270b526ca3ed0c224362e3c64a0f86687a upstream. - -Commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in -/proc/PID/stat") stopped reporting eip/esp because it is -racy and dangerous for executing tasks. The comment adds: - - As far as I know, there are no use programs that make any - material use of these fields, so just get rid of them. - -However, existing userspace core-dump-handler applications (for -example, minicoredumper) are using these fields since they -provide an excellent cross-platform interface to these valuable -pointers. So that commit introduced a user space visible -regression. - -Partially revert the change and make the readout possible for -tasks with the proper permissions and only if the target task -has the PF_DUMPCORE flag set. - -Fixes: 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in> /proc/PID/stat") -Reported-by: Marco Felsch <marco.felsch@preh.de> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Andy Lutomirski <luto@kernel.org> -Cc: Tycho Andersen <tycho.andersen@canonical.com> -Cc: Kees Cook <keescook@chromium.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Al Viro <viro@zeniv.linux.org.uk> -Cc: Linux API <linux-api@vger.kernel.org> -Cc: Andrew Morton <akpm@linux-foundation.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Link: http://lkml.kernel.org/r/87poatfwg6.fsf@linutronix.de -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - fs/proc/array.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/fs/proc/array.c b/fs/proc/array.c -index 81818ad..c932ec4 100644 ---- a/fs/proc/array.c -+++ b/fs/proc/array.c -@@ -60,6 +60,7 @@ - #include <linux/tty.h> - #include <linux/string.h> - #include <linux/mman.h> -+#include <linux/sched.h> - #include <linux/proc_fs.h> - #include <linux/ioport.h> - #include <linux/uaccess.h> -@@ -416,7 +417,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, - * esp and eip are intentionally zeroed out. There is no - * non-racy way to read them without freezing the task. - * Programs that need reliable values can use ptrace(2). -+ * -+ * The only exception is if the task is core dumping because -+ * a program is not able to use ptrace(2) in that case. It is -+ * safe because the task has stopped executing permanently. - */ -+ if (permitted && (task->flags & PF_DUMPCORE)) { -+ eip = KSTK_EIP(task); -+ esp = KSTK_ESP(task); -+ } - } - - get_task_comm(tcomm, task); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-x86-Documentation-Add-PTI-description.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-x86-Documentation-Add-PTI-description.patch deleted file mode 100644 index bd399062..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-x86-Documentation-Add-PTI-description.patch +++ /dev/null @@ -1,267 +0,0 @@ -From 3a2bc0721f7a7cb408570b01508a581ef69a2aac Mon Sep 17 00:00:00 2001 -From: Dave Hansen <dave.hansen@linux.intel.com> -Date: Fri, 5 Jan 2018 09:44:36 -0800 -Subject: [PATCH 047/103] x86/Documentation: Add PTI description - -commit 01c9b17bf673b05bb401b76ec763e9730ccf1376 upstream. - -Add some details about how PTI works, what some of the downsides -are, and how to debug it when things go wrong. - -Also document the kernel parameter: 'pti/nopti'. - -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Randy Dunlap <rdunlap@infradead.org> -Reviewed-by: Kees Cook <keescook@chromium.org> -Cc: Moritz Lipp <moritz.lipp@iaik.tugraz.at> -Cc: Daniel Gruss <daniel.gruss@iaik.tugraz.at> -Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at> -Cc: Richard Fellner <richard.fellner@student.tugraz.at> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Hugh Dickins <hughd@google.com> -Cc: Andi Lutomirsky <luto@kernel.org> -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 21 ++-- - Documentation/x86/pti.txt | 186 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 200 insertions(+), 7 deletions(-) - create mode 100644 Documentation/x86/pti.txt - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 9f04c53..3d53778 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -2753,8 +2753,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - - nojitter [IA-64] Disables jitter checking for ITC timers. - -- nopti [X86-64] Disable KAISER isolation of kernel from user. -- - no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver - - no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page -@@ -3317,11 +3315,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - pt. [PARIDE] - See Documentation/blockdev/paride.txt. - -- pti= [X86_64] -- Control KAISER user/kernel address space isolation: -- on - enable -- off - disable -- auto - default setting -+ pti= [X86_64] Control Page Table Isolation of user and -+ kernel address spaces. Disabling this feature -+ removes hardening, but improves performance of -+ system calls and interrupts. -+ -+ on - unconditionally enable -+ off - unconditionally disable -+ auto - kernel detects whether your CPU model is -+ vulnerable to issues that PTI mitigates -+ -+ Not specifying this option is equivalent to pti=auto. -+ -+ nopti [X86_64] -+ Equivalent to pti=off - - pty.legacy_count= - [KNL] Number of legacy pty's. Overwrites compiled-in -diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt -new file mode 100644 -index 0000000..d11eff6 ---- /dev/null -+++ b/Documentation/x86/pti.txt -@@ -0,0 +1,186 @@ -+Overview -+======== -+ -+Page Table Isolation (pti, previously known as KAISER[1]) is a -+countermeasure against attacks on the shared user/kernel address -+space such as the "Meltdown" approach[2]. -+ -+To mitigate this class of attacks, we create an independent set of -+page tables for use only when running userspace applications. When -+the kernel is entered via syscalls, interrupts or exceptions, the -+page tables are switched to the full "kernel" copy. When the system -+switches back to user mode, the user copy is used again. -+ -+The userspace page tables contain only a minimal amount of kernel -+data: only what is needed to enter/exit the kernel such as the -+entry/exit functions themselves and the interrupt descriptor table -+(IDT). There are a few strictly unnecessary things that get mapped -+such as the first C function when entering an interrupt (see -+comments in pti.c). -+ -+This approach helps to ensure that side-channel attacks leveraging -+the paging structures do not function when PTI is enabled. It can be -+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. -+Once enabled at compile-time, it can be disabled at boot with the -+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). -+ -+Page Table Management -+===================== -+ -+When PTI is enabled, the kernel manages two sets of page tables. -+The first set is very similar to the single set which is present in -+kernels without PTI. This includes a complete mapping of userspace -+that the kernel can use for things like copy_to_user(). -+ -+Although _complete_, the user portion of the kernel page tables is -+crippled by setting the NX bit in the top level. This ensures -+that any missed kernel->user CR3 switch will immediately crash -+userspace upon executing its first instruction. -+ -+The userspace page tables map only the kernel data needed to enter -+and exit the kernel. This data is entirely contained in the 'struct -+cpu_entry_area' structure which is placed in the fixmap which gives -+each CPU's copy of the area a compile-time-fixed virtual address. -+ -+For new userspace mappings, the kernel makes the entries in its -+page tables like normal. The only difference is when the kernel -+makes entries in the top (PGD) level. In addition to setting the -+entry in the main kernel PGD, a copy of the entry is made in the -+userspace page tables' PGD. -+ -+This sharing at the PGD level also inherently shares all the lower -+layers of the page tables. This leaves a single, shared set of -+userspace page tables to manage. One PTE to lock, one set of -+accessed bits, dirty bits, etc... -+ -+Overhead -+======== -+ -+Protection against side-channel attacks is important. But, -+this protection comes at a cost: -+ -+1. Increased Memory Use -+ a. Each process now needs an order-1 PGD instead of order-0. -+ (Consumes an additional 4k per process). -+ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB -+ aligned so that it can be mapped by setting a single PMD -+ entry. This consumes nearly 2MB of RAM once the kernel -+ is decompressed, but no space in the kernel image itself. -+ -+2. Runtime Cost -+ a. CR3 manipulation to switch between the page table copies -+ must be done at interrupt, syscall, and exception entry -+ and exit (it can be skipped when the kernel is interrupted, -+ though.) Moves to CR3 are on the order of a hundred -+ cycles, and are required at every entry and exit. -+ b. A "trampoline" must be used for SYSCALL entry. This -+ trampoline depends on a smaller set of resources than the -+ non-PTI SYSCALL entry code, so requires mapping fewer -+ things into the userspace page tables. The downside is -+ that stacks must be switched at entry time. -+ d. Global pages are disabled for all kernel structures not -+ mapped into both kernel and userspace page tables. This -+ feature of the MMU allows different processes to share TLB -+ entries mapping the kernel. Losing the feature means more -+ TLB misses after a context switch. The actual loss of -+ performance is very small, however, never exceeding 1%. -+ d. Process Context IDentifiers (PCID) is a CPU feature that -+ allows us to skip flushing the entire TLB when switching page -+ tables by setting a special bit in CR3 when the page tables -+ are changed. This makes switching the page tables (at context -+ switch, or kernel entry/exit) cheaper. But, on systems with -+ PCID support, the context switch code must flush both the user -+ and kernel entries out of the TLB. The user PCID TLB flush is -+ deferred until the exit to userspace, minimizing the cost. -+ See intel.com/sdm for the gory PCID/INVPCID details. -+ e. The userspace page tables must be populated for each new -+ process. Even without PTI, the shared kernel mappings -+ are created by copying top-level (PGD) entries into each -+ new process. But, with PTI, there are now *two* kernel -+ mappings: one in the kernel page tables that maps everything -+ and one for the entry/exit structures. At fork(), we need to -+ copy both. -+ f. In addition to the fork()-time copying, there must also -+ be an update to the userspace PGD any time a set_pgd() is done -+ on a PGD used to map userspace. This ensures that the kernel -+ and userspace copies always map the same userspace -+ memory. -+ g. On systems without PCID support, each CR3 write flushes -+ the entire TLB. That means that each syscall, interrupt -+ or exception flushes the TLB. -+ h. INVPCID is a TLB-flushing instruction which allows flushing -+ of TLB entries for non-current PCIDs. Some systems support -+ PCIDs, but do not support INVPCID. On these systems, addresses -+ can only be flushed from the TLB for the current PCID. When -+ flushing a kernel address, we need to flush all PCIDs, so a -+ single kernel address flush will require a TLB-flushing CR3 -+ write upon the next use of every PCID. -+ -+Possible Future Work -+==================== -+1. We can be more careful about not actually writing to CR3 -+ unless its value is actually changed. -+2. Allow PTI to be enabled/disabled at runtime in addition to the -+ boot-time switching. -+ -+Testing -+======== -+ -+To test stability of PTI, the following test procedure is recommended, -+ideally doing all of these in parallel: -+ -+1. Set CONFIG_DEBUG_ENTRY=y -+2. Run several copies of all of the tools/testing/selftests/x86/ tests -+ (excluding MPX and protection_keys) in a loop on multiple CPUs for -+ several minutes. These tests frequently uncover corner cases in the -+ kernel entry code. In general, old kernels might cause these tests -+ themselves to crash, but they should never crash the kernel. -+3. Run the 'perf' tool in a mode (top or record) that generates many -+ frequent performance monitoring non-maskable interrupts (see "NMI" -+ in /proc/interrupts). This exercises the NMI entry/exit code which -+ is known to trigger bugs in code paths that did not expect to be -+ interrupted, including nested NMIs. Using "-c" boosts the rate of -+ NMIs, and using two -c with separate counters encourages nested NMIs -+ and less deterministic behavior. -+ -+ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done -+ -+4. Launch a KVM virtual machine. -+5. Run 32-bit binaries on systems supporting the SYSCALL instruction. -+ This has been a lightly-tested code path and needs extra scrutiny. -+ -+Debugging -+========= -+ -+Bugs in PTI cause a few different signatures of crashes -+that are worth noting here. -+ -+ * Failures of the selftests/x86 code. Usually a bug in one of the -+ more obscure corners of entry_64.S -+ * Crashes in early boot, especially around CPU bringup. Bugs -+ in the trampoline code or mappings cause these. -+ * Crashes at the first interrupt. Caused by bugs in entry_64.S, -+ like screwing up a page table switch. Also caused by -+ incorrectly mapping the IRQ handler entry code. -+ * Crashes at the first NMI. The NMI code is separate from main -+ interrupt handlers and can have bugs that do not affect -+ normal interrupts. Also caused by incorrectly mapping NMI -+ code. NMIs that interrupt the entry code must be very -+ careful and can be the cause of crashes that show up when -+ running perf. -+ * Kernel crashes at the first exit to userspace. entry_64.S -+ bugs, or failing to map some of the exit code. -+ * Crashes at first interrupt that interrupts userspace. The paths -+ in entry_64.S that return to userspace are sometimes separate -+ from the ones that return to the kernel. -+ * Double faults: overflowing the kernel stack because of page -+ faults upon page faults. Caused by touching non-pti-mapped -+ data in the entry code, or forgetting to switch to kernel -+ CR3 before calling into C functions which are not pti-mapped. -+ * Userspace segfaults early in boot, sometimes manifesting -+ as mount(8) failing to mount the rootfs. These have -+ tended to be TLB invalidation issues. Usually invalidating -+ the wrong PCID, or otherwise missing an invalidation. -+ -+1. https://gruss.cc/files/kaiser.pdf -+2. https://meltdownattack.com/meltdown.pdf --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-proc-fix-coredump-vs-read-proc-stat-race.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-proc-fix-coredump-vs-read-proc-stat-race.patch deleted file mode 100644 index 30c8de50..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-proc-fix-coredump-vs-read-proc-stat-race.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 476f6e1404b0b16c48ae53249ffb362a16bf376c Mon Sep 17 00:00:00 2001 -From: Alexey Dobriyan <adobriyan@gmail.com> -Date: Thu, 18 Jan 2018 16:34:05 -0800 -Subject: [PATCH 48/93] proc: fix coredump vs read /proc/*/stat race - -commit 8bb2ee192e482c5d500df9f2b1b26a560bd3026f upstream. - -do_task_stat() accesses IP and SP of a task without bumping reference -count of a stack (which became an entity with independent lifetime at -some point). - -Steps to reproduce: - - #include <stdio.h> - #include <sys/types.h> - #include <sys/stat.h> - #include <fcntl.h> - #include <sys/time.h> - #include <sys/resource.h> - #include <unistd.h> - #include <sys/wait.h> - - int main(void) - { - setrlimit(RLIMIT_CORE, &(struct rlimit){}); - - while (1) { - char buf[64]; - char buf2[4096]; - pid_t pid; - int fd; - - pid = fork(); - if (pid == 0) { - *(volatile int *)0 = 0; - } - - snprintf(buf, sizeof(buf), "/proc/%u/stat", pid); - fd = open(buf, O_RDONLY); - read(fd, buf2, sizeof(buf2)); - close(fd); - - waitpid(pid, NULL, 0); - } - return 0; - } - - BUG: unable to handle kernel paging request at 0000000000003fd8 - IP: do_task_stat+0x8b4/0xaf0 - PGD 800000003d73e067 P4D 800000003d73e067 PUD 3d558067 PMD 0 - Oops: 0000 [#1] PREEMPT SMP PTI - CPU: 0 PID: 1417 Comm: a.out Not tainted 4.15.0-rc8-dirty #2 - Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014 - RIP: 0010:do_task_stat+0x8b4/0xaf0 - Call Trace: - proc_single_show+0x43/0x70 - seq_read+0xe6/0x3b0 - __vfs_read+0x1e/0x120 - vfs_read+0x84/0x110 - SyS_read+0x3d/0xa0 - entry_SYSCALL_64_fastpath+0x13/0x6c - RIP: 0033:0x7f4d7928cba0 - RSP: 002b:00007ffddb245158 EFLAGS: 00000246 - Code: 03 b7 a0 01 00 00 4c 8b 4c 24 70 4c 8b 44 24 78 4c 89 74 24 18 e9 91 f9 ff ff f6 45 4d 02 0f 84 fd f7 ff ff 48 8b 45 40 48 89 ef <48> 8b 80 d8 3f 00 00 48 89 44 24 20 e8 9b 97 eb ff 48 89 44 24 - RIP: do_task_stat+0x8b4/0xaf0 RSP: ffffc90000607cc8 - CR2: 0000000000003fd8 - -John Ogness said: for my tests I added an else case to verify that the -race is hit and correctly mitigated. - -Link: http://lkml.kernel.org/r/20180116175054.GA11513@avx2 -Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> -Reported-by: "Kohli, Gaurav" <gkohli@codeaurora.org> -Tested-by: John Ogness <john.ogness@linutronix.de> -Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> -Cc: Ingo Molnar <mingo@elte.hu> -Cc: Oleg Nesterov <oleg@redhat.com> -Signed-off-by: Andrew Morton <akpm@linux-foundation.org> -Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - fs/proc/array.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/fs/proc/array.c b/fs/proc/array.c -index c932ec4..794b52a 100644 ---- a/fs/proc/array.c -+++ b/fs/proc/array.c -@@ -423,8 +423,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, - * safe because the task has stopped executing permanently. - */ - if (permitted && (task->flags & PF_DUMPCORE)) { -- eip = KSTK_EIP(task); -- esp = KSTK_ESP(task); -+ if (try_get_task_stack(task)) { -+ eip = KSTK_EIP(task); -+ esp = KSTK_ESP(task); -+ put_task_stack(task); -+ } - } - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch deleted file mode 100644 index 65bd28de..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 4766e893b3d115fe69de4bfd9a9942669806620a Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Wed, 18 Jan 2017 11:15:38 -0800 -Subject: [PATCH 048/103] x86/cpu: Factor out application of forced CPU caps - -commit 8bf1ebca215c262e48c15a4a15f175991776f57f upstream. - -There are multiple call sites that apply forced CPU caps. Factor -them into a helper. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Reviewed-by: Borislav Petkov <bp@suse.de> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: Fenghua Yu <fenghua.yu@intel.com> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Matthew Whitehead <tedheadster@gmail.com> -Cc: Oleg Nesterov <oleg@redhat.com> -Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Yu-cheng Yu <yu-cheng.yu@intel.com> -Link: http://lkml.kernel.org/r/623ff7555488122143e4417de09b18be2085ad06.1484705016.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- - 1 file changed, 12 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 918e447..4c65225 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c) - } - } - -+static void apply_forced_caps(struct cpuinfo_x86 *c) -+{ -+ int i; -+ -+ for (i = 0; i < NCAPINTS; i++) { -+ c->x86_capability[i] &= ~cpu_caps_cleared[i]; -+ c->x86_capability[i] |= cpu_caps_set[i]; -+ } -+} -+ - void get_cpu_cap(struct cpuinfo_x86 *c) - { - u32 eax, ebx, ecx, edx; -@@ -1086,10 +1096,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) - this_cpu->c_identify(c); - - /* Clear/Set all flags overridden by options, after probe */ -- for (i = 0; i < NCAPINTS; i++) { -- c->x86_capability[i] &= ~cpu_caps_cleared[i]; -- c->x86_capability[i] |= cpu_caps_set[i]; -- } -+ apply_forced_caps(c); - - #ifdef CONFIG_X86_64 - c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); -@@ -1151,10 +1158,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) - * Clear/Set all flags overridden by options, need do it - * before following smp all cpus cap AND. - */ -- for (i = 0; i < NCAPINTS; i++) { -- c->x86_capability[i] &= ~cpu_caps_cleared[i]; -- c->x86_capability[i] |= cpu_caps_set[i]; -- } -+ apply_forced_caps(c); - - /* - * On SMP, boot_cpu_data holds the common feature set between --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-proc-Provide-details-on-speculation-flaw-mitigations.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-proc-Provide-details-on-speculation-flaw-mitigations.patch deleted file mode 100644 index 4c1c8184..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-proc-Provide-details-on-speculation-flaw-mitigations.patch +++ /dev/null @@ -1,64 +0,0 @@ -From a59a45de2d39c0e4f789ab2f05dc4b675ebc7914 Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Tue, 1 May 2018 15:31:45 -0700 -Subject: [PATCH 49/93] proc: Provide details on speculation flaw mitigations - -commit fae1fa0fc6cca8beee3ab8ed71d54f9a78fa3f64 upstream - -As done with seccomp and no_new_privs, also show speculation flaw -mitigation state in /proc/$pid/status. - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - fs/proc/array.c | 24 +++++++++++++++++++++++- - 1 file changed, 23 insertions(+), 1 deletion(-) - -diff --git a/fs/proc/array.c b/fs/proc/array.c -index 794b52a..64f3f20 100644 ---- a/fs/proc/array.c -+++ b/fs/proc/array.c -@@ -80,6 +80,7 @@ - #include <linux/delayacct.h> - #include <linux/seq_file.h> - #include <linux/pid_namespace.h> -+#include <linux/prctl.h> - #include <linux/ptrace.h> - #include <linux/tracehook.h> - #include <linux/string_helpers.h> -@@ -345,8 +346,29 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) - { - #ifdef CONFIG_SECCOMP - seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); -- seq_putc(m, '\n'); - #endif -+ seq_printf(m, "\nSpeculation Store Bypass:\t"); -+ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { -+ case -EINVAL: -+ seq_printf(m, "unknown"); -+ break; -+ case PR_SPEC_NOT_AFFECTED: -+ seq_printf(m, "not vulnerable"); -+ break; -+ case PR_SPEC_PRCTL | PR_SPEC_DISABLE: -+ seq_printf(m, "thread mitigated"); -+ break; -+ case PR_SPEC_PRCTL | PR_SPEC_ENABLE: -+ seq_printf(m, "thread vulnerable"); -+ break; -+ case PR_SPEC_DISABLE: -+ seq_printf(m, "globally mitigated"); -+ break; -+ default: -+ seq_printf(m, "vulnerable"); -+ break; -+ } -+ seq_putc(m, '\n'); - } - - static inline void task_context_switch_counts(struct seq_file *m, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-x86-cpufeatures-Make-CPU-bugs-sticky.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-x86-cpufeatures-Make-CPU-bugs-sticky.patch deleted file mode 100644 index 338458c6..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-x86-cpufeatures-Make-CPU-bugs-sticky.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 7662774f18773a931bbd96f226d9dc88c0d2f0c7 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 4 Dec 2017 15:07:32 +0100 -Subject: [PATCH 049/103] x86/cpufeatures: Make CPU bugs sticky - -commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream. - -There is currently no way to force CPU bug bits like CPU feature bits. That -makes it impossible to set a bug bit once at boot and have it stick for all -upcoming CPUs. - -Extend the force set/clear arrays to handle bug bits as well. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Borislav Petkov <bpetkov@suse.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Laight <David.Laight@aculab.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: Eduardo Valentin <eduval@amazon.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Juergen Gross <jgross@suse.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Will Deacon <will.deacon@arm.com> -Cc: aliguori@amazon.com -Cc: daniel.gruss@iaik.tugraz.at -Cc: hughd@google.com -Cc: keescook@google.com -Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeature.h | 2 ++ - arch/x86/include/asm/processor.h | 4 ++-- - arch/x86/kernel/cpu/common.c | 6 +++--- - 3 files changed, 7 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h -index 1d2b69f..9ea67a0 100644 ---- a/arch/x86/include/asm/cpufeature.h -+++ b/arch/x86/include/asm/cpufeature.h -@@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; - set_bit(bit, (unsigned long *)cpu_caps_set); \ - } while (0) - -+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -+ - #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) - /* - * Static testing of CPU features. Used the same as boot_cpu_has(). -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index 8cb52ee..e40b19c 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data; - extern struct cpuinfo_x86 new_cpu_data; - - extern struct tss_struct doublefault_tss; --extern __u32 cpu_caps_cleared[NCAPINTS]; --extern __u32 cpu_caps_set[NCAPINTS]; -+extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; -+extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; - - #ifdef CONFIG_SMP - DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 4c65225..ba9b601 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -480,8 +480,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) - return NULL; /* Not found */ - } - --__u32 cpu_caps_cleared[NCAPINTS]; --__u32 cpu_caps_set[NCAPINTS]; -+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; -+__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; - - void load_percpu_segment(int cpu) - { -@@ -710,7 +710,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) - { - int i; - -- for (i = 0; i < NCAPINTS; i++) { -+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-prctl-Add-force-disable-speculation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-prctl-Add-force-disable-speculation.patch deleted file mode 100644 index acdc260b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-prctl-Add-force-disable-speculation.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 6eca73ee80c5d8b6f8c3d294b3f97b7c8da67791 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 3 May 2018 22:09:15 +0200 -Subject: [PATCH 50/93] prctl: Add force disable speculation - -commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee upstream - -For certain use cases it is desired to enforce mitigations so they cannot -be undone afterwards. That's important for loader stubs which want to -prevent a child from disabling the mitigation again. Will also be used for -seccomp(). The extra state preserving of the prctl state for SSB is a -preparatory step for EBPF dymanic speculation control. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/spec_ctrl.txt | 34 +++++++++++++++++++++------------- - arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++---------- - fs/proc/array.c | 3 +++ - include/linux/sched.h | 9 +++++++++ - include/uapi/linux/prctl.h | 1 + - 5 files changed, 59 insertions(+), 23 deletions(-) - -diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt -index ddbebcd..1b3690d 100644 ---- a/Documentation/spec_ctrl.txt -+++ b/Documentation/spec_ctrl.txt -@@ -25,19 +25,21 @@ PR_GET_SPECULATION_CTRL - ----------------------- - - PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature --which is selected with arg2 of prctl(2). The return value uses bits 0-2 with -+which is selected with arg2 of prctl(2). The return value uses bits 0-3 with - the following meaning: - --==== ================ =================================================== --Bit Define Description --==== ================ =================================================== --0 PR_SPEC_PRCTL Mitigation can be controlled per task by -- PR_SET_SPECULATION_CTRL --1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is -- disabled --2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is -- enabled --==== ================ =================================================== -+==== ===================== =================================================== -+Bit Define Description -+==== ===================== =================================================== -+0 PR_SPEC_PRCTL Mitigation can be controlled per task by -+ PR_SET_SPECULATION_CTRL -+1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is -+ disabled -+2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is -+ enabled -+3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A -+ subsequent prctl(..., PR_SPEC_ENABLE) will fail. -+==== ===================== =================================================== - - If all bits are 0 the CPU is not affected by the speculation misfeature. - -@@ -47,9 +49,11 @@ misfeature will fail. - - PR_SET_SPECULATION_CTRL - ----------------------- -+ - PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which - is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand --in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. -+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or -+PR_SPEC_FORCE_DISABLE. - - Common error codes - ------------------ -@@ -70,10 +74,13 @@ Value Meaning - 0 Success - - ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor -- PR_SPEC_DISABLE -+ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE - - ENXIO Control of the selected speculation misfeature is not possible. - See PR_GET_SPECULATION_CTRL. -+ -+EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller -+ tried to enable it again. - ======= ================================================================= - - Speculation misfeature controls -@@ -84,3 +91,4 @@ Speculation misfeature controls - * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); - * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); - * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); -+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 65114d2..fdbd8e5 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -532,21 +532,37 @@ static void ssb_select_mitigation() - - static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) - { -- bool rds = !!test_tsk_thread_flag(task, TIF_RDS); -+ bool update; - - if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) - return -ENXIO; - -- if (ctrl == PR_SPEC_ENABLE) -- clear_tsk_thread_flag(task, TIF_RDS); -- else -- set_tsk_thread_flag(task, TIF_RDS); -+ switch (ctrl) { -+ case PR_SPEC_ENABLE: -+ /* If speculation is force disabled, enable is not allowed */ -+ if (task_spec_ssb_force_disable(task)) -+ return -EPERM; -+ task_clear_spec_ssb_disable(task); -+ update = test_and_clear_tsk_thread_flag(task, TIF_RDS); -+ break; -+ case PR_SPEC_DISABLE: -+ task_set_spec_ssb_disable(task); -+ update = !test_and_set_tsk_thread_flag(task, TIF_RDS); -+ break; -+ case PR_SPEC_FORCE_DISABLE: -+ task_set_spec_ssb_disable(task); -+ task_set_spec_ssb_force_disable(task); -+ update = !test_and_set_tsk_thread_flag(task, TIF_RDS); -+ break; -+ default: -+ return -ERANGE; -+ } - - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ -- if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) -+ if (task == current && update) - speculative_store_bypass_update(); - - return 0; -@@ -558,7 +574,9 @@ static int ssb_prctl_get(struct task_struct *task) - case SPEC_STORE_BYPASS_DISABLE: - return PR_SPEC_DISABLE; - case SPEC_STORE_BYPASS_PRCTL: -- if (test_tsk_thread_flag(task, TIF_RDS)) -+ if (task_spec_ssb_force_disable(task)) -+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; -+ if (task_spec_ssb_disable(task)) - return PR_SPEC_PRCTL | PR_SPEC_DISABLE; - return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - default: -@@ -571,9 +589,6 @@ static int ssb_prctl_get(struct task_struct *task) - int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - unsigned long ctrl) - { -- if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) -- return -ERANGE; -- - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssb_prctl_set(task, ctrl); -diff --git a/fs/proc/array.c b/fs/proc/array.c -index 64f3f20..3e37195 100644 ---- a/fs/proc/array.c -+++ b/fs/proc/array.c -@@ -355,6 +355,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) - case PR_SPEC_NOT_AFFECTED: - seq_printf(m, "not vulnerable"); - break; -+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: -+ seq_printf(m, "thread force mitigated"); -+ break; - case PR_SPEC_PRCTL | PR_SPEC_DISABLE: - seq_printf(m, "thread mitigated"); - break; -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 75d9a57..8e127a3 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -2335,6 +2335,8 @@ static inline void memalloc_noio_restore(unsigned int flags) - #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ - #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ - #define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ -+#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */ -+#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/ - - - #define TASK_PFA_TEST(name, func) \ -@@ -2361,6 +2363,13 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) - TASK_PFA_TEST(LMK_WAITING, lmk_waiting) - TASK_PFA_SET(LMK_WAITING, lmk_waiting) - -+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) -+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) -+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) -+ -+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) -+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) -+ - /* - * task->jobctl flags - */ -diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h -index 3b316be..64776b7 100644 ---- a/include/uapi/linux/prctl.h -+++ b/include/uapi/linux/prctl.h -@@ -207,5 +207,6 @@ struct prctl_mm_map { - # define PR_SPEC_PRCTL (1UL << 0) - # define PR_SPEC_ENABLE (1UL << 1) - # define PR_SPEC_DISABLE (1UL << 2) -+# define PR_SPEC_FORCE_DISABLE (1UL << 3) - - #endif /* _LINUX_PRCTL_H */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch deleted file mode 100644 index e1a6c9c2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch +++ /dev/null @@ -1,78 +0,0 @@ -From fe82bc0584bd60ca76418293ee2c3436b56ce3eb Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 4 Dec 2017 15:07:33 +0100 -Subject: [PATCH 050/103] x86/cpufeatures: Add X86_BUG_CPU_INSECURE - -commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd upstream. - -Many x86 CPUs leak information to user space due to missing isolation of -user space and kernel space page tables. There are many well documented -ways to exploit that. - -The upcoming software migitation of isolating the user and kernel space -page tables needs a misfeature flag so code can be made runtime -conditional. - -Add the BUG bits which indicates that the CPU is affected and add a feature -bit which indicates that the software migitation is enabled. - -Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be -made later. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: David Laight <David.Laight@aculab.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: Eduardo Valentin <eduval@amazon.com> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Juergen Gross <jgross@suse.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Will Deacon <will.deacon@arm.com> -Cc: aliguori@amazon.com -Cc: daniel.gruss@iaik.tugraz.at -Cc: hughd@google.com -Cc: keescook@google.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/kernel/cpu/common.c | 4 ++++ - 2 files changed, 5 insertions(+) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 454a37a..57bd52c 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -316,5 +316,6 @@ - #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ - #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ - #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -+#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ - - #endif /* _ASM_X86_CPUFEATURES_H */ -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index ba9b601..8c81adc 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -882,6 +882,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - } - - setup_force_cpu_cap(X86_FEATURE_ALWAYS); -+ -+ /* Assume for now that ALL x86 CPUs are insecure */ -+ setup_force_cpu_bug(X86_BUG_CPU_INSECURE); -+ - fpu__init_system(c); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch deleted file mode 100644 index 7361acf3..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 687c8baff48fb1849f5c2e8fdaeb2ff565f6554b Mon Sep 17 00:00:00 2001 -From: Oleg Nesterov <oleg@redhat.com> -Date: Wed, 27 Sep 2017 09:25:30 -0600 -Subject: [PATCH 51/93] seccomp: fix the usage of get/put_seccomp_filter() in - seccomp_get_filter() - -commit 66a733ea6b611aecf0119514d2dddab5f9d6c01e upstream. - -As Chris explains, get_seccomp_filter() and put_seccomp_filter() can end -up using different filters. Once we drop ->siglock it is possible for -task->seccomp.filter to have been replaced by SECCOMP_FILTER_FLAG_TSYNC. - -Fixes: f8e529ed941b ("seccomp, ptrace: add support for dumping seccomp filters") -Reported-by: Chris Salls <chrissalls5@gmail.com> -Signed-off-by: Oleg Nesterov <oleg@redhat.com> -[tycho: add __get_seccomp_filter vs. open coding refcount_inc()] -Signed-off-by: Tycho Andersen <tycho@docker.com> -[kees: tweak commit log] -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/seccomp.c | 23 ++++++++++++++++------- - 1 file changed, 16 insertions(+), 7 deletions(-) - -diff --git a/kernel/seccomp.c b/kernel/seccomp.c -index 0db7c8a..af182a6 100644 ---- a/kernel/seccomp.c -+++ b/kernel/seccomp.c -@@ -457,14 +457,19 @@ static long seccomp_attach_filter(unsigned int flags, - return 0; - } - -+void __get_seccomp_filter(struct seccomp_filter *filter) -+{ -+ /* Reference count is bounded by the number of total processes. */ -+ atomic_inc(&filter->usage); -+} -+ - /* get_seccomp_filter - increments the reference count of the filter on @tsk */ - void get_seccomp_filter(struct task_struct *tsk) - { - struct seccomp_filter *orig = tsk->seccomp.filter; - if (!orig) - return; -- /* Reference count is bounded by the number of total processes. */ -- atomic_inc(&orig->usage); -+ __get_seccomp_filter(orig); - } - - static inline void seccomp_filter_free(struct seccomp_filter *filter) -@@ -475,10 +480,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter) - } - } - --/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ --void put_seccomp_filter(struct task_struct *tsk) -+static void __put_seccomp_filter(struct seccomp_filter *orig) - { -- struct seccomp_filter *orig = tsk->seccomp.filter; - /* Clean up single-reference branches iteratively. */ - while (orig && atomic_dec_and_test(&orig->usage)) { - struct seccomp_filter *freeme = orig; -@@ -487,6 +490,12 @@ void put_seccomp_filter(struct task_struct *tsk) - } - } - -+/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ -+void put_seccomp_filter(struct task_struct *tsk) -+{ -+ __put_seccomp_filter(tsk->seccomp.filter); -+} -+ - /** - * seccomp_send_sigsys - signals the task to allow in-process syscall emulation - * @syscall: syscall number to send to userland -@@ -892,13 +901,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, - if (!data) - goto out; - -- get_seccomp_filter(task); -+ __get_seccomp_filter(filter); - spin_unlock_irq(&task->sighand->siglock); - - if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) - ret = -EFAULT; - -- put_seccomp_filter(task); -+ __put_seccomp_filter(filter); - return ret; - - out: --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch deleted file mode 100644 index 0d994853..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 61f2363570145629ccd0478eeb77756f2aee3e05 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 5 Jan 2018 15:27:34 +0100 -Subject: [PATCH 051/103] x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN - -commit de791821c295cc61419a06fe5562288417d1bc58 upstream. - -Use the name associated with the particular attack which needs page table -isolation for mitigation. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> -Cc: Jiri Koshina <jikos@kernel.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Andi Lutomirski <luto@amacapital.net> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Paul Turner <pjt@google.com> -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: Greg KH <gregkh@linux-foundation.org> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos -Signed-off-by: Razvan Ghitulete <rga@amazon.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 2 +- - arch/x86/kernel/cpu/common.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 57bd52c..985dfd7 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -316,6 +316,6 @@ - #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ - #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ - #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ --#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ -+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ - - #endif /* _ASM_X86_CPUFEATURES_H */ -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 8c81adc..5ab4fd7 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -884,7 +884,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - setup_force_cpu_cap(X86_FEATURE_ALWAYS); - - /* Assume for now that ALL x86 CPUs are insecure */ -- setup_force_cpu_bug(X86_BUG_CPU_INSECURE); -+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - - fpu__init_system(c); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-seccomp-Enable-speculation-flaw-mitigations.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-seccomp-Enable-speculation-flaw-mitigations.patch deleted file mode 100644 index 85ed7f13..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-seccomp-Enable-speculation-flaw-mitigations.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 6afc277e9b6b9bf8bb4c8c2e4641a021f9d709e2 Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Tue, 1 May 2018 15:07:31 -0700 -Subject: [PATCH 52/93] seccomp: Enable speculation flaw mitigations - -commit 5c3070890d06ff82eecb808d02d2ca39169533ef upstream - -When speculation flaw mitigations are opt-in (via prctl), using seccomp -will automatically opt-in to these protections, since using seccomp -indicates at least some level of sandboxing is desired. - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/seccomp.c | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - -diff --git a/kernel/seccomp.c b/kernel/seccomp.c -index af182a6..1d3078b 100644 ---- a/kernel/seccomp.c -+++ b/kernel/seccomp.c -@@ -16,6 +16,8 @@ - #include <linux/atomic.h> - #include <linux/audit.h> - #include <linux/compat.h> -+#include <linux/nospec.h> -+#include <linux/prctl.h> - #include <linux/sched.h> - #include <linux/seccomp.h> - #include <linux/slab.h> -@@ -214,6 +216,19 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) - return true; - } - -+/* -+ * If a given speculation mitigation is opt-in (prctl()-controlled), -+ * select it, by disabling speculation (enabling mitigation). -+ */ -+static inline void spec_mitigate(struct task_struct *task, -+ unsigned long which) -+{ -+ int state = arch_prctl_spec_ctrl_get(task, which); -+ -+ if (state > 0 && (state & PR_SPEC_PRCTL)) -+ arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); -+} -+ - static inline void seccomp_assign_mode(struct task_struct *task, - unsigned long seccomp_mode) - { -@@ -225,6 +240,8 @@ static inline void seccomp_assign_mode(struct task_struct *task, - * filter) is set. - */ - smp_mb__before_atomic(); -+ /* Assume seccomp processes want speculation flaw mitigation. */ -+ spec_mitigate(task, PR_SPEC_STORE_BYPASS); - set_tsk_thread_flag(task, TIF_SECCOMP); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch deleted file mode 100644 index 739b403e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch +++ /dev/null @@ -1,62 +0,0 @@ -From b7106ba8c6f5160f80cd6880a54e533494739078 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Sat, 6 Jan 2018 11:49:23 +0000 -Subject: [PATCH 052/103] x86/cpufeatures: Add X86_BUG_SPECTRE_V[12] - -commit 99c6fa2511d8a683e61468be91b83f85452115fa upstream. - -Add the bug bits for spectre v1/2 and force them unconditionally for all -cpus. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Razvan Ghitulete <rga@amazon.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 2 ++ - arch/x86/kernel/cpu/common.c | 3 +++ - 2 files changed, 5 insertions(+) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 985dfd7..f364c891 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -317,5 +317,7 @@ - #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ - #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ - - #endif /* _ASM_X86_CPUFEATURES_H */ -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 5ab4fd7..8339b43 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -886,6 +886,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - /* Assume for now that ALL x86 CPUs are insecure */ - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - -+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); -+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -+ - fpu__init_system(c); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch deleted file mode 100644 index a7a60b69..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 62722a97a6aeb1ebba9b749068ed6e9eaecceb37 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 May 2018 09:40:03 +0200 -Subject: [PATCH 53/93] seccomp: Use PR_SPEC_FORCE_DISABLE - -commit b849a812f7eb92e96d1c8239b06581b2cfd8b275 upstream - -Use PR_SPEC_FORCE_DISABLE in seccomp() because seccomp does not allow to -widen restrictions. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/seccomp.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/seccomp.c b/kernel/seccomp.c -index 1d3078b..a0bd6ea 100644 ---- a/kernel/seccomp.c -+++ b/kernel/seccomp.c -@@ -226,7 +226,7 @@ static inline void spec_mitigate(struct task_struct *task, - int state = arch_prctl_spec_ctrl_get(task, which); - - if (state > 0 && (state & PR_SPEC_PRCTL)) -- arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); -+ arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); - } - - static inline void seccomp_assign_mode(struct task_struct *task, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch deleted file mode 100644 index f540afd0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch +++ /dev/null @@ -1,141 +0,0 @@ -From a1aca7fc1e1c73add0d8caa5773a4d393e5b60b1 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Mon, 24 Oct 2016 19:38:43 +0200 -Subject: [PATCH 053/103] x86/cpu: Merge bugs.c and bugs_64.c - -commit 62a67e123e058a67db58bc6a14354dd037bafd0a upstream. - -Should be easier when following boot paths. It probably is a left over -from the x86 unification eons ago. - -No functionality change. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Brian Gerst <brgerst@gmail.com> -Cc: Denys Vlasenko <dvlasenk@redhat.com> -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/20161024173844.23038-3-bp@alien8.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Razvan Ghitulete <rga@amazon.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/Makefile | 4 +--- - arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++---- - arch/x86/kernel/cpu/bugs_64.c | 33 --------------------------------- - 3 files changed, 23 insertions(+), 40 deletions(-) - delete mode 100644 arch/x86/kernel/cpu/bugs_64.c - -diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile -index 4a8697f..33b6367 100644 ---- a/arch/x86/kernel/cpu/Makefile -+++ b/arch/x86/kernel/cpu/Makefile -@@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o - obj-y += common.o - obj-y += rdrand.o - obj-y += match.o -+obj-y += bugs.o - - obj-$(CONFIG_PROC_FS) += proc.o - obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o - --obj-$(CONFIG_X86_32) += bugs.o --obj-$(CONFIG_X86_64) += bugs_64.o -- - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o - obj-$(CONFIG_CPU_SUP_AMD) += amd.o - obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index bd17db1..a44ef52 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -16,15 +16,19 @@ - #include <asm/msr.h> - #include <asm/paravirt.h> - #include <asm/alternative.h> -+#include <asm/pgtable.h> -+#include <asm/cacheflush.h> - - void __init check_bugs(void) - { - identify_boot_cpu(); --#ifndef CONFIG_SMP -- pr_info("CPU: "); -- print_cpu_info(&boot_cpu_data); --#endif - -+ if (!IS_ENABLED(CONFIG_SMP)) { -+ pr_info("CPU: "); -+ print_cpu_info(&boot_cpu_data); -+ } -+ -+#ifdef CONFIG_X86_32 - /* - * Check whether we are able to run this kernel safely on SMP. - * -@@ -40,4 +44,18 @@ void __init check_bugs(void) - alternative_instructions(); - - fpu__init_check_bugs(); -+#else /* CONFIG_X86_64 */ -+ alternative_instructions(); -+ -+ /* -+ * Make sure the first 2MB area is not mapped by huge pages -+ * There are typically fixed size MTRRs in there and overlapping -+ * MTRRs into large pages causes slow downs. -+ * -+ * Right now we don't do that with gbpages because there seems -+ * very little benefit for that case. -+ */ -+ if (!direct_gbpages) -+ set_memory_4k((unsigned long)__va(0), 1); -+#endif - } -diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c -deleted file mode 100644 -index a972ac4..0000000 ---- a/arch/x86/kernel/cpu/bugs_64.c -+++ /dev/null -@@ -1,33 +0,0 @@ --/* -- * Copyright (C) 1994 Linus Torvalds -- * Copyright (C) 2000 SuSE -- */ -- --#include <linux/kernel.h> --#include <linux/init.h> --#include <asm/alternative.h> --#include <asm/bugs.h> --#include <asm/processor.h> --#include <asm/mtrr.h> --#include <asm/cacheflush.h> -- --void __init check_bugs(void) --{ -- identify_boot_cpu(); --#if !defined(CONFIG_SMP) -- pr_info("CPU: "); -- print_cpu_info(&boot_cpu_data); --#endif -- alternative_instructions(); -- -- /* -- * Make sure the first 2MB area is not mapped by huge pages -- * There are typically fixed size MTRRs in there and overlapping -- * MTRRs into large pages causes slow downs. -- * -- * Right now we don't do that with gbpages because there seems -- * very little benefit for that case. -- */ -- if (!direct_gbpages) -- set_memory_4k((unsigned long)__va(0), 1); --} --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch deleted file mode 100644 index 17012902..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch +++ /dev/null @@ -1,222 +0,0 @@ -From ed34265c5f460b645a0669079fbc6ad094c83c96 Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Thu, 3 May 2018 14:56:12 -0700 -Subject: [PATCH 54/93] seccomp: Add filter flag to opt-out of SSB mitigation - -commit 00a02d0c502a06d15e07b857f8ff921e3e402675 upstream - -If a seccomp user is not interested in Speculative Store Bypass mitigation -by default, it can set the new SECCOMP_FILTER_FLAG_SPEC_ALLOW flag when -adding filters. - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/seccomp.h | 3 +- - include/uapi/linux/seccomp.h | 4 +- - kernel/seccomp.c | 19 ++++--- - tools/testing/selftests/seccomp/seccomp_bpf.c | 78 ++++++++++++++++++++++++++- - 4 files changed, 93 insertions(+), 11 deletions(-) - -diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h -index ecc296c..50c460a 100644 ---- a/include/linux/seccomp.h -+++ b/include/linux/seccomp.h -@@ -3,7 +3,8 @@ - - #include <uapi/linux/seccomp.h> - --#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) -+#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ -+ SECCOMP_FILTER_FLAG_SPEC_ALLOW) - - #ifdef CONFIG_SECCOMP - -diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h -index 0f238a4..e4acb61 100644 ---- a/include/uapi/linux/seccomp.h -+++ b/include/uapi/linux/seccomp.h -@@ -15,7 +15,9 @@ - #define SECCOMP_SET_MODE_FILTER 1 - - /* Valid flags for SECCOMP_SET_MODE_FILTER */ --#define SECCOMP_FILTER_FLAG_TSYNC 1 -+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) -+/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */ -+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) - - /* - * All BPF programs must return a 32-bit value. -diff --git a/kernel/seccomp.c b/kernel/seccomp.c -index a0bd6ea..62a60e7 100644 ---- a/kernel/seccomp.c -+++ b/kernel/seccomp.c -@@ -230,7 +230,8 @@ static inline void spec_mitigate(struct task_struct *task, - } - - static inline void seccomp_assign_mode(struct task_struct *task, -- unsigned long seccomp_mode) -+ unsigned long seccomp_mode, -+ unsigned long flags) - { - assert_spin_locked(&task->sighand->siglock); - -@@ -240,8 +241,9 @@ static inline void seccomp_assign_mode(struct task_struct *task, - * filter) is set. - */ - smp_mb__before_atomic(); -- /* Assume seccomp processes want speculation flaw mitigation. */ -- spec_mitigate(task, PR_SPEC_STORE_BYPASS); -+ /* Assume default seccomp processes want spec flaw mitigation. */ -+ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) -+ spec_mitigate(task, PR_SPEC_STORE_BYPASS); - set_tsk_thread_flag(task, TIF_SECCOMP); - } - -@@ -309,7 +311,7 @@ static inline pid_t seccomp_can_sync_threads(void) - * without dropping the locks. - * - */ --static inline void seccomp_sync_threads(void) -+static inline void seccomp_sync_threads(unsigned long flags) - { - struct task_struct *thread, *caller; - -@@ -350,7 +352,8 @@ static inline void seccomp_sync_threads(void) - * allow one thread to transition the other. - */ - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) -- seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); -+ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER, -+ flags); - } - } - -@@ -469,7 +472,7 @@ static long seccomp_attach_filter(unsigned int flags, - - /* Now that the new filter is in place, synchronize to all threads. */ - if (flags & SECCOMP_FILTER_FLAG_TSYNC) -- seccomp_sync_threads(); -+ seccomp_sync_threads(flags); - - return 0; - } -@@ -729,7 +732,7 @@ static long seccomp_set_mode_strict(void) - #ifdef TIF_NOTSC - disable_TSC(); - #endif -- seccomp_assign_mode(current, seccomp_mode); -+ seccomp_assign_mode(current, seccomp_mode, 0); - ret = 0; - - out: -@@ -787,7 +790,7 @@ static long seccomp_set_mode_filter(unsigned int flags, - /* Do not free the successfully attached filter. */ - prepared = NULL; - -- seccomp_assign_mode(current, seccomp_mode); -+ seccomp_assign_mode(current, seccomp_mode, flags); - out: - spin_unlock_irq(¤t->sighand->siglock); - if (flags & SECCOMP_FILTER_FLAG_TSYNC) -diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c -index 03f1fa4..3362f11 100644 ---- a/tools/testing/selftests/seccomp/seccomp_bpf.c -+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c -@@ -1684,7 +1684,11 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) - #endif - - #ifndef SECCOMP_FILTER_FLAG_TSYNC --#define SECCOMP_FILTER_FLAG_TSYNC 1 -+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) -+#endif -+ -+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW -+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) - #endif - - #ifndef seccomp -@@ -1783,6 +1787,78 @@ TEST(seccomp_syscall_mode_lock) - } - } - -+/* -+ * Test detection of known and unknown filter flags. Userspace needs to be able -+ * to check if a filter flag is supported by the current kernel and a good way -+ * of doing that is by attempting to enter filter mode, with the flag bit in -+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates -+ * that the flag is valid and EINVAL indicates that the flag is invalid. -+ */ -+TEST(detect_seccomp_filter_flags) -+{ -+ unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, -+ SECCOMP_FILTER_FLAG_SPEC_ALLOW }; -+ unsigned int flag, all_flags; -+ int i; -+ long ret; -+ -+ /* Test detection of known-good filter flags */ -+ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { -+ int bits = 0; -+ -+ flag = flags[i]; -+ /* Make sure the flag is a single bit! */ -+ while (flag) { -+ if (flag & 0x1) -+ bits ++; -+ flag >>= 1; -+ } -+ ASSERT_EQ(1, bits); -+ flag = flags[i]; -+ -+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); -+ ASSERT_NE(ENOSYS, errno) { -+ TH_LOG("Kernel does not support seccomp syscall!"); -+ } -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EFAULT, errno) { -+ TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", -+ flag); -+ } -+ -+ all_flags |= flag; -+ } -+ -+ /* Test detection of all known-good filter flags */ -+ ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EFAULT, errno) { -+ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", -+ all_flags); -+ } -+ -+ /* Test detection of an unknown filter flag */ -+ flag = -1; -+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno) { -+ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", -+ flag); -+ } -+ -+ /* -+ * Test detection of an unknown filter flag that may simply need to be -+ * added to this test -+ */ -+ flag = flags[ARRAY_SIZE(flags) - 1] << 1; -+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); -+ EXPECT_EQ(-1, ret); -+ EXPECT_EQ(EINVAL, errno) { -+ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", -+ flag); -+ } -+} -+ - TEST(TSYNC_first) - { - struct sock_filter filter[] = { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-sysfs-cpu-Add-vulnerability-folder.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-sysfs-cpu-Add-vulnerability-folder.patch deleted file mode 100644 index a7a0183b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-sysfs-cpu-Add-vulnerability-folder.patch +++ /dev/null @@ -1,157 +0,0 @@ -From a03760456446357758d4f6702df7cb9446e022e0 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 7 Jan 2018 22:48:00 +0100 -Subject: [PATCH 054/103] sysfs/cpu: Add vulnerability folder - -commit 87590ce6e373d1a5401f6539f0c59ef92dd924a9 upstream. - -As the meltdown/spectre problem affects several CPU architectures, it makes -sense to have common way to express whether a system is affected by a -particular vulnerability or not. If affected the way to express the -mitigation should be common as well. - -Create /sys/devices/system/cpu/vulnerabilities folder and files for -meltdown, spectre_v1 and spectre_v2. - -Allow architectures to override the show function. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Will Deacon <will.deacon@arm.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Linus Torvalds <torvalds@linuxfoundation.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Link: https://lkml.kernel.org/r/20180107214913.096657732@linutronix.de -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/ABI/testing/sysfs-devices-system-cpu | 16 ++++++++ - drivers/base/Kconfig | 3 ++ - drivers/base/cpu.c | 48 ++++++++++++++++++++++ - include/linux/cpu.h | 7 ++++ - 4 files changed, 74 insertions(+) - -diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu -index 4987417..8b30a48 100644 ---- a/Documentation/ABI/testing/sysfs-devices-system-cpu -+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu -@@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org> - Description: AArch64 CPU registers - 'identification' directory exposes the CPU ID registers for - identifying model and revision of the CPU. -+ -+What: /sys/devices/system/cpu/vulnerabilities -+ /sys/devices/system/cpu/vulnerabilities/meltdown -+ /sys/devices/system/cpu/vulnerabilities/spectre_v1 -+ /sys/devices/system/cpu/vulnerabilities/spectre_v2 -+Date: Januar 2018 -+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> -+Description: Information about CPU vulnerabilities -+ -+ The files are named after the code names of CPU -+ vulnerabilities. The output of those files reflects the -+ state of the CPUs in the system. Possible output values: -+ -+ "Not affected" CPU is not affected by the vulnerability -+ "Vulnerable" CPU is affected and no mitigation in effect -+ "Mitigation: $M" CPU is affetcted and mitigation $M is in effect -diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig -index d02e7c0..0651010 100644 ---- a/drivers/base/Kconfig -+++ b/drivers/base/Kconfig -@@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES - config GENERIC_CPU_AUTOPROBE - bool - -+config GENERIC_CPU_VULNERABILITIES -+ bool -+ - config SOC_BUS - bool - -diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c -index 4c28e1a..56b6c85 100644 ---- a/drivers/base/cpu.c -+++ b/drivers/base/cpu.c -@@ -499,10 +499,58 @@ static void __init cpu_dev_register_generic(void) - #endif - } - -+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES -+ -+ssize_t __weak cpu_show_meltdown(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "Not affected\n"); -+} -+ -+ssize_t __weak cpu_show_spectre_v1(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "Not affected\n"); -+} -+ -+ssize_t __weak cpu_show_spectre_v2(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "Not affected\n"); -+} -+ -+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); -+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); -+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); -+ -+static struct attribute *cpu_root_vulnerabilities_attrs[] = { -+ &dev_attr_meltdown.attr, -+ &dev_attr_spectre_v1.attr, -+ &dev_attr_spectre_v2.attr, -+ NULL -+}; -+ -+static const struct attribute_group cpu_root_vulnerabilities_group = { -+ .name = "vulnerabilities", -+ .attrs = cpu_root_vulnerabilities_attrs, -+}; -+ -+static void __init cpu_register_vulnerabilities(void) -+{ -+ if (sysfs_create_group(&cpu_subsys.dev_root->kobj, -+ &cpu_root_vulnerabilities_group)) -+ pr_err("Unable to register CPU vulnerabilities\n"); -+} -+ -+#else -+static inline void cpu_register_vulnerabilities(void) { } -+#endif -+ - void __init cpu_dev_init(void) - { - if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) - panic("Failed to register CPU subsystem"); - - cpu_dev_register_generic(); -+ cpu_register_vulnerabilities(); - } -diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index e571128..2f475ad 100644 ---- a/include/linux/cpu.h -+++ b/include/linux/cpu.h -@@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); - extern int cpu_add_dev_attr_group(struct attribute_group *attrs); - extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); - -+extern ssize_t cpu_show_meltdown(struct device *dev, -+ struct device_attribute *attr, char *buf); -+extern ssize_t cpu_show_spectre_v1(struct device *dev, -+ struct device_attribute *attr, char *buf); -+extern ssize_t cpu_show_spectre_v2(struct device *dev, -+ struct device_attribute *attr, char *buf); -+ - extern __printf(4, 5) - struct device *cpu_device_create(struct device *parent, void *drvdata, - const struct attribute_group **groups, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-seccomp-Move-speculation-migitation-control-to-arch-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-seccomp-Move-speculation-migitation-control-to-arch-.patch deleted file mode 100644 index ca98b862..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-seccomp-Move-speculation-migitation-control-to-arch-.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 2a4ae48837c977605ea36a01ed63fa8638e4c881 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 May 2018 15:12:06 +0200 -Subject: [PATCH 55/93] seccomp: Move speculation migitation control to arch - code - -commit 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc upstream - -The migitation control is simpler to implement in architecture code as it -avoids the extra function call to check the mode. Aside of that having an -explicit seccomp enabled mode in the architecture mitigations would require -even more workarounds. - -Move it into architecture code and provide a weak function in the seccomp -code. Remove the 'which' argument as this allows the architecture to decide -which mitigations are relevant for seccomp. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 29 ++++++++++++++++++----------- - include/linux/nospec.h | 2 ++ - kernel/seccomp.c | 15 ++------------- - 3 files changed, 22 insertions(+), 24 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index fdbd8e5..131617d 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -568,6 +568,24 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) - return 0; - } - -+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, -+ unsigned long ctrl) -+{ -+ switch (which) { -+ case PR_SPEC_STORE_BYPASS: -+ return ssb_prctl_set(task, ctrl); -+ default: -+ return -ENODEV; -+ } -+} -+ -+#ifdef CONFIG_SECCOMP -+void arch_seccomp_spec_mitigate(struct task_struct *task) -+{ -+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); -+} -+#endif -+ - static int ssb_prctl_get(struct task_struct *task) - { - switch (ssb_mode) { -@@ -586,17 +604,6 @@ static int ssb_prctl_get(struct task_struct *task) - } - } - --int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, -- unsigned long ctrl) --{ -- switch (which) { -- case PR_SPEC_STORE_BYPASS: -- return ssb_prctl_set(task, ctrl); -- default: -- return -ENODEV; -- } --} -- - int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) - { - switch (which) { -diff --git a/include/linux/nospec.h b/include/linux/nospec.h -index a908c95..0c5ef54 100644 ---- a/include/linux/nospec.h -+++ b/include/linux/nospec.h -@@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); - int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - unsigned long ctrl); -+/* Speculation control for seccomp enforced mitigation */ -+void arch_seccomp_spec_mitigate(struct task_struct *task); - - #endif /* _LINUX_NOSPEC_H */ -diff --git a/kernel/seccomp.c b/kernel/seccomp.c -index 62a60e7..3975856 100644 ---- a/kernel/seccomp.c -+++ b/kernel/seccomp.c -@@ -216,18 +216,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) - return true; - } - --/* -- * If a given speculation mitigation is opt-in (prctl()-controlled), -- * select it, by disabling speculation (enabling mitigation). -- */ --static inline void spec_mitigate(struct task_struct *task, -- unsigned long which) --{ -- int state = arch_prctl_spec_ctrl_get(task, which); -- -- if (state > 0 && (state & PR_SPEC_PRCTL)) -- arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); --} -+void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } - - static inline void seccomp_assign_mode(struct task_struct *task, - unsigned long seccomp_mode, -@@ -243,7 +232,7 @@ static inline void seccomp_assign_mode(struct task_struct *task, - smp_mb__before_atomic(); - /* Assume default seccomp processes want spec flaw mitigation. */ - if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) -- spec_mitigate(task, PR_SPEC_STORE_BYPASS); -+ arch_seccomp_spec_mitigate(task); - set_tsk_thread_flag(task, TIF_SECCOMP); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch deleted file mode 100644 index 84049cea..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 07101c90d59d128819a47b0274cbdd02750dfd1a Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 7 Jan 2018 22:48:01 +0100 -Subject: [PATCH 055/103] x86/cpu: Implement CPU vulnerabilites sysfs functions - -commit 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 upstream. - -Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and -spectre_v2. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Will Deacon <will.deacon@arm.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Linus Torvalds <torvalds@linuxfoundation.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de -Signed-off-by: Razvan Ghitulete <rga@amazon.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/Kconfig | 1 + - arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++++++++++++++++ - 2 files changed, 30 insertions(+) - -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index bada636..e0f6590 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -64,6 +64,7 @@ config X86 - select GENERIC_CLOCKEVENTS_MIN_ADJUST - select GENERIC_CMOS_UPDATE - select GENERIC_CPU_AUTOPROBE -+ select GENERIC_CPU_VULNERABILITIES - select GENERIC_EARLY_IOREMAP - select GENERIC_FIND_FIRST_BIT - select GENERIC_IOMAP -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index a44ef52..cb6b4f9 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -9,6 +9,7 @@ - */ - #include <linux/init.h> - #include <linux/utsname.h> -+#include <linux/cpu.h> - #include <asm/bugs.h> - #include <asm/processor.h> - #include <asm/processor-flags.h> -@@ -59,3 +60,31 @@ void __init check_bugs(void) - set_memory_4k((unsigned long)__va(0), 1); - #endif - } -+ -+#ifdef CONFIG_SYSFS -+ssize_t cpu_show_meltdown(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) -+ return sprintf(buf, "Not affected\n"); -+ if (boot_cpu_has(X86_FEATURE_KAISER)) -+ return sprintf(buf, "Mitigation: PTI\n"); -+ return sprintf(buf, "Vulnerable\n"); -+} -+ -+ssize_t cpu_show_spectre_v1(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) -+ return sprintf(buf, "Not affected\n"); -+ return sprintf(buf, "Vulnerable\n"); -+} -+ -+ssize_t cpu_show_spectre_v2(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) -+ return sprintf(buf, "Not affected\n"); -+ return sprintf(buf, "Vulnerable\n"); -+} -+#endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch deleted file mode 100644 index 188ff893..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 8f7353cbbd5a5a54c876c2be30217ba568e5989c Mon Sep 17 00:00:00 2001 -From: Tom Lendacky <thomas.lendacky@amd.com> -Date: Mon, 8 Jan 2018 16:09:21 -0600 -Subject: [PATCH 056/103] x86/cpu/AMD: Make LFENCE a serializing instruction - -commit e4d0e84e490790798691aaa0f2e598637f1867ec upstream. - -To aid in speculation control, make LFENCE a serializing instruction -since it has less overhead than MFENCE. This is done by setting bit 1 -of MSR 0xc0011029 (DE_CFG). Some families that support LFENCE do not -have this MSR. For these families, the LFENCE instruction is already -serializing. - -Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net -Signed-off-by: Razvan Ghitulete <rga@amazon.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/msr-index.h | 2 ++ - arch/x86/kernel/cpu/amd.c | 10 ++++++++++ - 2 files changed, 12 insertions(+) - -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index 78f3760..b1c0969 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -330,6 +330,8 @@ - #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL - #define FAM10H_MMIO_CONF_BASE_SHIFT 20 - #define MSR_FAM10H_NODE_ID 0xc001100c -+#define MSR_F10H_DECFG 0xc0011029 -+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 - - /* K8 MSRs */ - #define MSR_K8_TOP_MEM1 0xc001001a -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 2b4cf04..8b5b19d 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -782,6 +782,16 @@ static void init_amd(struct cpuinfo_x86 *c) - set_cpu_cap(c, X86_FEATURE_K8); - - if (cpu_has(c, X86_FEATURE_XMM2)) { -+ /* -+ * A serializing LFENCE has less overhead than MFENCE, so -+ * use it for execution serialization. On families which -+ * don't have that MSR, LFENCE is already serializing. -+ * msr_set_bit() uses the safe accessors, too, even if the MSR -+ * is not present. -+ */ -+ msr_set_bit(MSR_F10H_DECFG, -+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); -+ - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch deleted file mode 100644 index 21edf610..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch +++ /dev/null @@ -1,166 +0,0 @@ -From c9379df089e45eab50820798e3e98aee3b1e5adf Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Thu, 3 May 2018 14:37:54 -0700 -Subject: [PATCH 56/93] x86/speculation: Make "seccomp" the default mode for - Speculative Store Bypass - -commit f21b53b20c754021935ea43364dbf53778eeba32 upstream - -Unless explicitly opted out of, anything running under seccomp will have -SSB mitigations enabled. Choosing the "prctl" mode will disable this. - -[ tglx: Adjusted it to the new arch_seccomp_spec_mitigate() mechanism ] - -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 26 +++++++++++++++++--------- - arch/x86/include/asm/nospec-branch.h | 1 + - arch/x86/kernel/cpu/bugs.c | 32 +++++++++++++++++++++++--------- - 3 files changed, 41 insertions(+), 18 deletions(-) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 80811df..2c5df33 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -3986,19 +3986,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - This parameter controls whether the Speculative Store - Bypass optimization is used. - -- on - Unconditionally disable Speculative Store Bypass -- off - Unconditionally enable Speculative Store Bypass -- auto - Kernel detects whether the CPU model contains an -- implementation of Speculative Store Bypass and -- picks the most appropriate mitigation. -- prctl - Control Speculative Store Bypass per thread -- via prctl. Speculative Store Bypass is enabled -- for a process by default. The state of the control -- is inherited on fork. -+ on - Unconditionally disable Speculative Store Bypass -+ off - Unconditionally enable Speculative Store Bypass -+ auto - Kernel detects whether the CPU model contains an -+ implementation of Speculative Store Bypass and -+ picks the most appropriate mitigation. If the -+ CPU is not vulnerable, "off" is selected. If the -+ CPU is vulnerable the default mitigation is -+ architecture and Kconfig dependent. See below. -+ prctl - Control Speculative Store Bypass per thread -+ via prctl. Speculative Store Bypass is enabled -+ for a process by default. The state of the control -+ is inherited on fork. -+ seccomp - Same as "prctl" above, but all seccomp threads -+ will disable SSB unless they explicitly opt out. - - Not specifying this option is equivalent to - spec_store_bypass_disable=auto. - -+ Default mitigations: -+ X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" -+ - spia_io_base= [HW,MTD] - spia_fio_base= - spia_pedr= -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 71ad014..328ea3c 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -233,6 +233,7 @@ enum ssb_mitigation { - SPEC_STORE_BYPASS_NONE, - SPEC_STORE_BYPASS_DISABLE, - SPEC_STORE_BYPASS_PRCTL, -+ SPEC_STORE_BYPASS_SECCOMP, - }; - - extern char __indirect_thunk_start[]; -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 131617d..9a3bb65 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -415,22 +415,25 @@ enum ssb_mitigation_cmd { - SPEC_STORE_BYPASS_CMD_AUTO, - SPEC_STORE_BYPASS_CMD_ON, - SPEC_STORE_BYPASS_CMD_PRCTL, -+ SPEC_STORE_BYPASS_CMD_SECCOMP, - }; - - static const char *ssb_strings[] = { - [SPEC_STORE_BYPASS_NONE] = "Vulnerable", - [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", -- [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" -+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", -+ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", - }; - - static const struct { - const char *option; - enum ssb_mitigation_cmd cmd; - } ssb_mitigation_options[] = { -- { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ -- { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ -- { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -- { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ -+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ -+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ -+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ -+ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ - }; - - static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) -@@ -480,8 +483,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - - switch (cmd) { - case SPEC_STORE_BYPASS_CMD_AUTO: -- /* Choose prctl as the default mode */ -- mode = SPEC_STORE_BYPASS_PRCTL; -+ case SPEC_STORE_BYPASS_CMD_SECCOMP: -+ /* -+ * Choose prctl+seccomp as the default mode if seccomp is -+ * enabled. -+ */ -+ if (IS_ENABLED(CONFIG_SECCOMP)) -+ mode = SPEC_STORE_BYPASS_SECCOMP; -+ else -+ mode = SPEC_STORE_BYPASS_PRCTL; - break; - case SPEC_STORE_BYPASS_CMD_ON: - mode = SPEC_STORE_BYPASS_DISABLE; -@@ -529,12 +539,14 @@ static void ssb_select_mitigation() - } - - #undef pr_fmt -+#define pr_fmt(fmt) "Speculation prctl: " fmt - - static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) - { - bool update; - -- if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) -+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && -+ ssb_mode != SPEC_STORE_BYPASS_SECCOMP) - return -ENXIO; - - switch (ctrl) { -@@ -582,7 +594,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - #ifdef CONFIG_SECCOMP - void arch_seccomp_spec_mitigate(struct task_struct *task) - { -- ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); -+ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) -+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - } - #endif - -@@ -591,6 +604,7 @@ static int ssb_prctl_get(struct task_struct *task) - switch (ssb_mode) { - case SPEC_STORE_BYPASS_DISABLE: - return PR_SPEC_DISABLE; -+ case SPEC_STORE_BYPASS_SECCOMP: - case SPEC_STORE_BYPASS_PRCTL: - if (task_spec_ssb_force_disable(task)) - return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-bugs-Rename-_RDS-to-_SSBD.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-bugs-Rename-_RDS-to-_SSBD.patch deleted file mode 100644 index 189588aa..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-bugs-Rename-_RDS-to-_SSBD.patch +++ /dev/null @@ -1,405 +0,0 @@ -From 4bb9a717246aa3019a3d97904e29c4da0bfc37f9 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 9 May 2018 21:41:38 +0200 -Subject: [PATCH 57/93] x86/bugs: Rename _RDS to _SSBD - -commit 9f65fb29374ee37856dbad847b4e121aab72b510 upstream - -Intel collateral will reference the SSB mitigation bit in IA32_SPEC_CTL[2] -as SSBD (Speculative Store Bypass Disable). - -Hence changing it. - -It is unclear yet what the MSR_IA32_ARCH_CAPABILITIES (0x10a) Bit(4) name -is going to be. Following the rename it would be SSBD_NO but that rolls out -to Speculative Store Bypass Disable No. - -Also fixed the missing space in X86_FEATURE_AMD_SSBD. - -[ tglx: Fixup x86_amd_rds_enable() and rds_tif_to_amd_ls_cfg() as well ] - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 4 ++-- - arch/x86/include/asm/msr-index.h | 10 +++++----- - arch/x86/include/asm/spec-ctrl.h | 12 ++++++------ - arch/x86/include/asm/thread_info.h | 6 +++--- - arch/x86/kernel/cpu/amd.c | 14 +++++++------- - arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++------------------ - arch/x86/kernel/cpu/common.c | 2 +- - arch/x86/kernel/cpu/intel.c | 2 +- - arch/x86/kernel/process.c | 8 ++++---- - arch/x86/kvm/cpuid.c | 2 +- - arch/x86/kvm/cpuid.h | 2 +- - arch/x86/kvm/vmx.c | 2 +- - 12 files changed, 50 insertions(+), 50 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 8797069..0ed8ea5 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -205,7 +205,7 @@ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ - #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ - #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ --#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ -+#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -@@ -308,7 +308,7 @@ - #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ - #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ - #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ --#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ -+#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ - - /* - * BUG word(s) -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index 7ad3ed9..0145a0b 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -40,8 +40,8 @@ - #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ - #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ - #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ --#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ --#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ -+#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ - - #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ - #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ -@@ -63,10 +63,10 @@ - #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a - #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ - #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ --#define ARCH_CAP_RDS_NO (1 << 4) /* -+#define ARCH_CAP_SSBD_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass -- * attack, so no Reduced Data Speculation control -- * required. -+ * attack, so no Speculative Store Bypass -+ * control required. - */ - - #define MSR_IA32_BBL_CR_CTL 0x00000119 -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index 45ef00a..dc21209 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -17,20 +17,20 @@ extern void x86_spec_ctrl_restore_host(u64); - - /* AMD specific Speculative Store Bypass MSR data */ - extern u64 x86_amd_ls_cfg_base; --extern u64 x86_amd_ls_cfg_rds_mask; -+extern u64 x86_amd_ls_cfg_ssbd_mask; - - /* The Intel SPEC CTRL MSR base value cache */ - extern u64 x86_spec_ctrl_base; - --static inline u64 rds_tif_to_spec_ctrl(u64 tifn) -+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) - { -- BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); -- return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); -+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); -+ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); - } - --static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) -+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) - { -- return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; -+ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; - } - - extern void speculative_store_bypass_update(void); -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index 661afac..2d8788a 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -83,7 +83,7 @@ struct thread_info { - #define TIF_SIGPENDING 2 /* signal pending */ - #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ - #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ --#define TIF_RDS 5 /* Reduced data speculation */ -+#define TIF_SSBD 5 /* Reduced data speculation */ - #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ - #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ - #define TIF_SECCOMP 8 /* secure computing */ -@@ -107,7 +107,7 @@ struct thread_info { - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) - #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) --#define _TIF_RDS (1 << TIF_RDS) -+#define _TIF_SSBD (1 << TIF_SSBD) - #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) - #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) - #define _TIF_SECCOMP (1 << TIF_SECCOMP) -@@ -141,7 +141,7 @@ struct thread_info { - - /* flags to check in __switch_to() */ - #define _TIF_WORK_CTXSW \ -- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) -+ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) - - #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) - #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index a176c81..acb2fcc 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -555,12 +555,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) - } - /* - * Try to cache the base value so further operations can -- * avoid RMW. If that faults, do not enable RDS. -+ * avoid RMW. If that faults, do not enable SSBD. - */ - if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { -- setup_force_cpu_cap(X86_FEATURE_RDS); -- setup_force_cpu_cap(X86_FEATURE_AMD_RDS); -- x86_amd_ls_cfg_rds_mask = 1ULL << bit; -+ setup_force_cpu_cap(X86_FEATURE_SSBD); -+ setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); -+ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; - } - } - } -@@ -849,9 +849,9 @@ static void init_amd(struct cpuinfo_x86 *c) - if (!cpu_has(c, X86_FEATURE_XENPV)) - set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - -- if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { -- set_cpu_cap(c, X86_FEATURE_RDS); -- set_cpu_cap(c, X86_FEATURE_AMD_RDS); -+ if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { -+ set_cpu_cap(c, X86_FEATURE_SSBD); -+ set_cpu_cap(c, X86_FEATURE_AMD_SSBD); - } - } - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 9a3bb65..ae6f9ba 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -44,10 +44,10 @@ static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; - - /* - * AMD specific MSR info for Speculative Store Bypass control. -- * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). -+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). - */ - u64 __ro_after_init x86_amd_ls_cfg_base; --u64 __ro_after_init x86_amd_ls_cfg_rds_mask; -+u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; - - void __init check_bugs(void) - { -@@ -145,7 +145,7 @@ u64 x86_spec_ctrl_get_default(void) - u64 msrval = x86_spec_ctrl_base; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -- msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); -+ msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - return msrval; - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -@@ -158,7 +158,7 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) - return; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -- host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); -+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); -@@ -173,18 +173,18 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) - return; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -- host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); -+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, host); - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); - --static void x86_amd_rds_enable(void) -+static void x86_amd_ssb_disable(void) - { -- u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; -+ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - -- if (boot_cpu_has(X86_FEATURE_AMD_RDS)) -+ if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) - wrmsrl(MSR_AMD64_LS_CFG, msrval); - } - -@@ -472,7 +472,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; - enum ssb_mitigation_cmd cmd; - -- if (!boot_cpu_has(X86_FEATURE_RDS)) -+ if (!boot_cpu_has(X86_FEATURE_SSBD)) - return mode; - - cmd = ssb_parse_cmdline(); -@@ -506,7 +506,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - /* - * We have three CPU feature flags that are in play here: - * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. -- * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass -+ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass - * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation - */ - if (mode == SPEC_STORE_BYPASS_DISABLE) { -@@ -517,12 +517,12 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) - */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: -- x86_spec_ctrl_base |= SPEC_CTRL_RDS; -- x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; -- x86_spec_ctrl_set(SPEC_CTRL_RDS); -+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD; -+ x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; -+ x86_spec_ctrl_set(SPEC_CTRL_SSBD); - break; - case X86_VENDOR_AMD: -- x86_amd_rds_enable(); -+ x86_amd_ssb_disable(); - break; - } - } -@@ -555,16 +555,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) - if (task_spec_ssb_force_disable(task)) - return -EPERM; - task_clear_spec_ssb_disable(task); -- update = test_and_clear_tsk_thread_flag(task, TIF_RDS); -+ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); - break; - case PR_SPEC_DISABLE: - task_set_spec_ssb_disable(task); -- update = !test_and_set_tsk_thread_flag(task, TIF_RDS); -+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); - break; - case PR_SPEC_FORCE_DISABLE: - task_set_spec_ssb_disable(task); - task_set_spec_ssb_force_disable(task); -- update = !test_and_set_tsk_thread_flag(task, TIF_RDS); -+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); - break; - default: - return -ERANGE; -@@ -634,7 +634,7 @@ void x86_spec_ctrl_setup_ap(void) - x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); - - if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) -- x86_amd_rds_enable(); -+ x86_amd_ssb_disable(); - } - - #ifdef CONFIG_SYSFS -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index beb1da8..d0dd736 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -911,7 +911,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - - if (!x86_match_cpu(cpu_no_spec_store_bypass) && -- !(ia32_cap & ARCH_CAP_RDS_NO)) -+ !(ia32_cap & ARCH_CAP_SSBD_NO)) - setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - - if (x86_match_cpu(cpu_no_speculation)) -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index f15aea6..047adaa 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -154,7 +154,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) - setup_clear_cpu_cap(X86_FEATURE_STIBP); - setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); - setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); -- setup_clear_cpu_cap(X86_FEATURE_RDS); -+ setup_clear_cpu_cap(X86_FEATURE_SSBD); - } - - /* -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 9c48e18..c344230 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -207,11 +207,11 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn - { - u64 msr; - -- if (static_cpu_has(X86_FEATURE_AMD_RDS)) { -- msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); -+ if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { -+ msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); - wrmsrl(MSR_AMD64_LS_CFG, msr); - } else { -- msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); -+ msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); - wrmsrl(MSR_IA32_SPEC_CTRL, msr); - } - } -@@ -250,7 +250,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - if ((tifp ^ tifn) & _TIF_NOTSC) - cr4_toggle_bits(X86_CR4_TSD); - -- if ((tifp ^ tifn) & _TIF_RDS) -+ if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); - } - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 237e926..db95637 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - - /* cpuid 7.0.edx*/ - const u32 kvm_cpuid_7_0_edx_x86_features = -- F(SPEC_CTRL) | F(RDS) | F(ARCH_CAPABILITIES); -+ F(SPEC_CTRL) | F(SSBD) | F(ARCH_CAPABILITIES); - - /* all calls to cpuid_count() should be made on the same cpu */ - get_cpu(); -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index 39dd457..72551c5 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -171,7 +171,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) - if (best && (best->ebx & bit(X86_FEATURE_IBRS))) - return true; - best = kvm_find_cpuid_entry(vcpu, 7, 0); -- return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_RDS))); -+ return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); - } - - static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 17199dc..c7df5c4 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -3133,7 +3133,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - return 1; - - /* The STIBP bit doesn't fault even if it's not advertised */ -- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) -+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) - return 1; - - vmx->spec_ctrl = data; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch deleted file mode 100644 index faad5ca4..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch +++ /dev/null @@ -1,86 +0,0 @@ -From f2abe3ce255832f6ee98887777b25b58cfcc9015 Mon Sep 17 00:00:00 2001 -From: Tom Lendacky <thomas.lendacky@amd.com> -Date: Mon, 8 Jan 2018 16:09:32 -0600 -Subject: [PATCH 057/103] x86/cpu/AMD: Use LFENCE_RDTSC in preference to - MFENCE_RDTSC - -commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f upstream. - -With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference -to MFENCE_RDTSC. However, since the kernel could be running under a -hypervisor that does not support writing that MSR, read the MSR back and -verify that the bit has been set successfully. If the MSR can be read -and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the -MFENCE_RDTSC feature. - -Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net -Signed-off-by: Razvan Ghitulete <rga@amazon.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/msr-index.h | 1 + - arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++-- - 2 files changed, 17 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index b1c0969..4eeaa36 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -332,6 +332,7 @@ - #define MSR_FAM10H_NODE_ID 0xc001100c - #define MSR_F10H_DECFG 0xc0011029 - #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) - - /* K8 MSRs */ - #define MSR_K8_TOP_MEM1 0xc001001a -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 8b5b19d..1b89f0c 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -782,6 +782,9 @@ static void init_amd(struct cpuinfo_x86 *c) - set_cpu_cap(c, X86_FEATURE_K8); - - if (cpu_has(c, X86_FEATURE_XMM2)) { -+ unsigned long long val; -+ int ret; -+ - /* - * A serializing LFENCE has less overhead than MFENCE, so - * use it for execution serialization. On families which -@@ -792,8 +795,19 @@ static void init_amd(struct cpuinfo_x86 *c) - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); - -- /* MFENCE stops RDTSC speculation */ -- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); -+ /* -+ * Verify that the MSR write was successful (could be running -+ * under a hypervisor) and only then assume that LFENCE is -+ * serializing. -+ */ -+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val); -+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { -+ /* A serializing LFENCE stops RDTSC speculation */ -+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -+ } else { -+ /* MFENCE stops RDTSC speculation */ -+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); -+ } - } - - /* --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch deleted file mode 100644 index 86259f27..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch +++ /dev/null @@ -1,37 +0,0 @@ -From e7d0d7ecd8f7d78d7fb2b15324bb79fc5de42776 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Tue, 9 Jan 2018 15:02:51 +0000 -Subject: [PATCH 058/103] sysfs/cpu: Fix typos in vulnerability documentation - -commit 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 upstream. - -Fixes: 87590ce6e ("sysfs/cpu: Add vulnerability folder") -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/ABI/testing/sysfs-devices-system-cpu | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu -index 8b30a48..dfd56ec 100644 ---- a/Documentation/ABI/testing/sysfs-devices-system-cpu -+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu -@@ -355,7 +355,7 @@ What: /sys/devices/system/cpu/vulnerabilities - /sys/devices/system/cpu/vulnerabilities/meltdown - /sys/devices/system/cpu/vulnerabilities/spectre_v1 - /sys/devices/system/cpu/vulnerabilities/spectre_v2 --Date: Januar 2018 -+Date: January 2018 - Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> - Description: Information about CPU vulnerabilities - -@@ -365,4 +365,4 @@ Description: Information about CPU vulnerabilities - - "Not affected" CPU is not affected by the vulnerability - "Vulnerable" CPU is affected and no mitigation in effect -- "Mitigation: $M" CPU is affetcted and mitigation $M is in effect -+ "Mitigation: $M" CPU is affected and mitigation $M is in effect --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch deleted file mode 100644 index f24bec49..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 1372f3493fdf1eaaeb82c4f3770a38aad5541f3b Mon Sep 17 00:00:00 2001 -From: Jiri Kosina <jkosina@suse.cz> -Date: Thu, 10 May 2018 22:47:18 +0200 -Subject: [PATCH 58/93] x86/bugs: Fix __ssb_select_mitigation() return type - -commit d66d8ff3d21667b41eddbe86b35ab411e40d8c5f upstream - -__ssb_select_mitigation() returns one of the members of enum ssb_mitigation, -not ssb_mitigation_cmd; fix the prototype to reflect that. - -Fixes: 24f7fc83b9204 ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") -Signed-off-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index ae6f9ba..c7b4d11 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -467,7 +467,7 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) - return cmd; - } - --static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) -+static enum ssb_mitigation __init __ssb_select_mitigation(void) - { - enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; - enum ssb_mitigation_cmd cmd; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-alternatives-Fix-optimize_nops-checking.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-alternatives-Fix-optimize_nops-checking.patch deleted file mode 100644 index b954c23c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-alternatives-Fix-optimize_nops-checking.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1ac5fc23c160f82956aa23fd595206a15d80d742 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Wed, 10 Jan 2018 12:28:16 +0100 -Subject: [PATCH 059/103] x86/alternatives: Fix optimize_nops() checking - -commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca upstream. - -The alternatives code checks only the first byte whether it is a NOP, but -with NOPs in front of the payload and having actual instructions after it -breaks the "optimized' test. - -Make sure to scan all bytes before deciding to optimize the NOPs in there. - -Reported-by: David Woodhouse <dwmw2@infradead.org> -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Tom Lendacky <thomas.lendacky@amd.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Andi Kleen <andi@firstfloor.org> -Cc: Andrew Lutomirski <luto@kernel.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/alternative.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c -index 5cb272a..10d5a3d 100644 ---- a/arch/x86/kernel/alternative.c -+++ b/arch/x86/kernel/alternative.c -@@ -340,9 +340,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) - static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) - { - unsigned long flags; -+ int i; - -- if (instr[0] != 0x90) -- return; -+ for (i = 0; i < a->padlen; i++) { -+ if (instr[i] != 0x90) -+ return; -+ } - - local_irq_save(flags); - add_nops(instr + (a->instrlen - a->padlen), a->padlen); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-bugs-Make-cpu_show_common-static.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-bugs-Make-cpu_show_common-static.patch deleted file mode 100644 index 5dc616b5..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-bugs-Make-cpu_show_common-static.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 3c2ec124e35d5a74d3ed660095591290dc1d549b Mon Sep 17 00:00:00 2001 -From: Jiri Kosina <jkosina@suse.cz> -Date: Thu, 10 May 2018 22:47:32 +0200 -Subject: [PATCH 59/93] x86/bugs: Make cpu_show_common() static - -commit 7bb4d366cba992904bffa4820d24e70a3de93e76 upstream - -cpu_show_common() is not used outside of arch/x86/kernel/cpu/bugs.c, so -make it static. - -Signed-off-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index c7b4d11..8187642 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -639,7 +639,7 @@ void x86_spec_ctrl_setup_ap(void) - - #ifdef CONFIG_SYSFS - --ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, -+static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, - char *buf, unsigned int bug) - { - if (!boot_cpu_has_bug(bug)) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch deleted file mode 100644 index 2fa8674a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 529d4e89ede9d0c39277255ec85692ce5f69cf89 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 4 Jan 2018 14:37:05 +0000 -Subject: [PATCH 060/103] x86/alternatives: Add missing '\n' at end of - ALTERNATIVE inline asm - -commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream. - -Where an ALTERNATIVE is used in the middle of an inline asm block, this -would otherwise lead to the following instruction being appended directly -to the trailing ".popsection", and a failed compile. - -Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection") -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: ak@linux.intel.com -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Paul Turner <pjt@google.com> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/alternative.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h -index 1b02038..10a4b2c 100644 ---- a/arch/x86/include/asm/alternative.h -+++ b/arch/x86/include/asm/alternative.h -@@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end) - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ -- ".popsection" -+ ".popsection\n" - - #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ - OLDINSTR_2(oldinstr, 1, 2) \ -@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end) - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ - ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ -- ".popsection" -+ ".popsection\n" - - /* - * Alternative instructions for different CPU types or capabilities. --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch deleted file mode 100644 index ef9f4216..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 947d5d98fb1328a22a8b502f8ce6f8e5657a5ec7 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Fri, 11 May 2018 16:50:35 -0400 -Subject: [PATCH 60/93] x86/bugs: Fix the parameters alignment and missing void - -commit ffed645e3be0e32f8e9ab068d257aee8d0fe8eec upstream - -Fixes: 7bb4d366c ("x86/bugs: Make cpu_show_common() static") -Fixes: 24f7fc83b ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 8187642..4f8c88e 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -530,7 +530,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) - return mode; - } - --static void ssb_select_mitigation() -+static void ssb_select_mitigation(void) - { - ssb_mode = __ssb_select_mitigation(); - -@@ -640,7 +640,7 @@ void x86_spec_ctrl_setup_ap(void) - #ifdef CONFIG_SYSFS - - static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, -- char *buf, unsigned int bug) -+ char *buf, unsigned int bug) - { - if (!boot_cpu_has_bug(bug)) - return sprintf(buf, "Not affected\n"); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch deleted file mode 100644 index 1f830819..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 76eefada90172bd111371bd2669a50eec64a3b0f Mon Sep 17 00:00:00 2001 -From: Jim Mattson <jmattson@google.com> -Date: Sun, 13 May 2018 17:33:57 -0400 -Subject: [PATCH 61/93] x86/cpu: Make alternative_msr_write work for 32-bit - code - -commit 5f2b745f5e1304f438f9b2cd03ebc8120b6e0d3b upstream - -Cast val and (val >> 32) to (u32), so that they fit in a -general-purpose register in both 32-bit and 64-bit code. - -[ tglx: Made it u32 instead of uintptr_t ] - -Fixes: c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") -Signed-off-by: Jim Mattson <jmattson@google.com> -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 328ea3c..bc258e6 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -265,8 +265,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) - { - asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) - : : "c" (msr), -- "a" (val), -- "d" (val >> 32), -+ "a" ((u32)val), -+ "d" ((u32)(val >> 32)), - [feature] "i" (feature) - : "memory"); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch deleted file mode 100644 index 4149b72c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 1c6a584ef5f1c7ad9b8adef6ca862824541f361a Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sun, 17 Sep 2017 09:03:50 -0700 -Subject: [PATCH 061/103] x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) - earlier - -commit b8b7abaed7a49b350f8ba659ddc264b04931d581 upstream. - -Otherwise we might have the PCID feature bit set during cpu_init(). - -This is just for robustness. I haven't seen any actual bugs here. - -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Cc: Borislav Petkov <bpetkov@suse.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels") -Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/common.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 8339b43..7b9ae04 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -890,6 +890,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - - fpu__init_system(c); -+ -+#ifdef CONFIG_X86_32 -+ /* -+ * Regardless of whether PCID is enumerated, the SDM says -+ * that it can't be enabled in 32-bit mode. -+ */ -+ setup_clear_cpu_cap(X86_FEATURE_PCID); -+#endif - } - - void __init early_cpu_init(void) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch deleted file mode 100644 index 75caec43..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 21d2555ad333e693fc6859bff2a60b9b24de8d99 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 11 May 2018 15:21:01 +0200 -Subject: [PATCH 62/93] KVM: SVM: Move spec control call after restore of GS - -commit 15e6c22fd8e5a42c5ed6d487b7c9fe44c2517765 upstream - -svm_vcpu_run() invokes x86_spec_ctrl_restore_host() after VMEXIT, but -before the host GS is restored. x86_spec_ctrl_restore_host() uses 'current' -to determine the host SSBD state of the thread. 'current' is GS based, but -host GS is not yet restored and the access causes a triple fault. - -Move the call after the host GS restore. - -Fixes: 885f82bfbc6f x86/process: Allow runtime control of Speculative Store Bypass -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Acked-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/svm.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 47779f5..9991462 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -4999,6 +4999,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - #endif - ); - -+ /* Eliminate branch target predictions from guest mode */ -+ vmexit_fill_RSB(); -+ -+#ifdef CONFIG_X86_64 -+ wrmsrl(MSR_GS_BASE, svm->host.gs_base); -+#else -+ loadsegment(fs, svm->host.fs); -+#ifndef CONFIG_X86_32_LAZY_GS -+ loadsegment(gs, svm->host.gs); -+#endif -+#endif -+ - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and -@@ -5019,18 +5031,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - - x86_spec_ctrl_restore_host(svm->spec_ctrl); - -- /* Eliminate branch target predictions from guest mode */ -- vmexit_fill_RSB(); -- --#ifdef CONFIG_X86_64 -- wrmsrl(MSR_GS_BASE, svm->host.gs_base); --#else -- loadsegment(fs, svm->host.fs); --#ifndef CONFIG_X86_32_LAZY_GS -- loadsegment(gs, svm->host.gs); --#endif --#endif -- - reload_tss(vcpu); - - local_irq_disable(); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-objtool-modules-Discard-objtool-annotation-sections-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-objtool-modules-Discard-objtool-annotation-sections-.patch deleted file mode 100644 index de624129..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-objtool-modules-Discard-objtool-annotation-sections-.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 5331b8c32e4b07d25c5edd524581d9ed1c22d030 Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Wed, 1 Mar 2017 12:04:44 -0600 -Subject: [PATCH 062/103] objtool, modules: Discard objtool annotation sections - for modules - -commit e390f9a9689a42f477a6073e2e7df530a4c1b740 upstream. - -The '__unreachable' and '__func_stack_frame_non_standard' sections are -only used at compile time. They're discarded for vmlinux but they -should also be discarded for modules. - -Since this is a recurring pattern, prefix the section names with -".discard.". It's a nice convention and vmlinux.lds.h already discards -such sections. - -Also remove the 'a' (allocatable) flag from the __unreachable section -since it doesn't make sense for a discarded section. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Jessica Yu <jeyu@redhat.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") -Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble -Signed-off-by: Ingo Molnar <mingo@kernel.org> -[dwmw2: Remove the unreachable part in backporting since it's not here yet] -Signed-off-by: David Woodhouse <dwmw@amazon.co.ku> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/frame.h | 2 +- - scripts/mod/modpost.c | 1 + - scripts/module-common.lds | 5 ++++- - tools/objtool/builtin-check.c | 2 +- - 4 files changed, 7 insertions(+), 3 deletions(-) - -diff --git a/include/linux/frame.h b/include/linux/frame.h -index e6baaba..d772c61 100644 ---- a/include/linux/frame.h -+++ b/include/linux/frame.h -@@ -11,7 +11,7 @@ - * For more information, see tools/objtool/Documentation/stack-validation.txt. - */ - #define STACK_FRAME_NON_STANDARD(func) \ -- static void __used __section(__func_stack_frame_non_standard) \ -+ static void __used __section(.discard.func_stack_frame_non_standard) \ - *__func_stack_frame_non_standard_##func = func - - #else /* !CONFIG_STACK_VALIDATION */ -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index cbb1553..325f1af 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -838,6 +838,7 @@ static const char *const section_white_list[] = - ".cmem*", /* EZchip */ - ".fmt_slot*", /* EZchip */ - ".gnu.lto*", -+ ".discard.*", - NULL - }; - -diff --git a/scripts/module-common.lds b/scripts/module-common.lds -index 73a2c7d..cf7e52e 100644 ---- a/scripts/module-common.lds -+++ b/scripts/module-common.lds -@@ -4,7 +4,10 @@ - * combine them automatically. - */ - SECTIONS { -- /DISCARD/ : { *(.discard) } -+ /DISCARD/ : { -+ *(.discard) -+ *(.discard.*) -+ } - - __ksymtab 0 : { *(SORT(___ksymtab+*)) } - __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } -diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c -index e8a1f69..7b1f7b5 100644 ---- a/tools/objtool/builtin-check.c -+++ b/tools/objtool/builtin-check.c -@@ -1220,7 +1220,7 @@ int cmd_check(int argc, const char **argv) - - INIT_LIST_HEAD(&file.insn_list); - hash_init(file.insn_hash); -- file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); -+ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); - file.rodata = find_section_by_name(file.elf, ".rodata"); - file.ignore_unreachables = false; - file.c_file = find_section_by_name(file.elf, ".comment"); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-objtool-Detect-jumps-to-retpoline-thunks.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-objtool-Detect-jumps-to-retpoline-thunks.patch deleted file mode 100644 index 674590cb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-objtool-Detect-jumps-to-retpoline-thunks.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 3ac4cf3eaec7c1588965f724dc68b1a196754902 Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Thu, 11 Jan 2018 21:46:23 +0000 -Subject: [PATCH 063/103] objtool: Detect jumps to retpoline thunks - -commit 39b735332cb8b33a27c28592d969e4016c86c3ea upstream. - -A direct jump to a retpoline thunk is really an indirect jump in -disguise. Change the objtool instruction type accordingly. - -Objtool needs to know where indirect branches are so it can detect -switch statement jump tables. - -This fixes a bunch of warnings with CONFIG_RETPOLINE like: - - arch/x86/events/intel/uncore_nhmex.o: warning: objtool: nhmex_rbox_msr_enable_event()+0x44: sibling call from callable instruction with modified stack frame - kernel/signal.o: warning: objtool: copy_siginfo_to_user()+0x91: sibling call from callable instruction with modified stack frame - ... - -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-2-git-send-email-dwmw@amazon.co.uk -[dwmw2: Applies to tools/objtool/builtin-check.c not check.c] -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - tools/objtool/builtin-check.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c -index 7b1f7b5..36784b8 100644 ---- a/tools/objtool/builtin-check.c -+++ b/tools/objtool/builtin-check.c -@@ -382,6 +382,13 @@ static int add_jump_destinations(struct objtool_file *file) - } else if (rela->sym->sec->idx) { - dest_sec = rela->sym->sec; - dest_off = rela->sym->sym.st_value + rela->addend + 4; -+ } else if (strstr(rela->sym->name, "_indirect_thunk_")) { -+ /* -+ * Retpoline jumps are really dynamic jumps in -+ * disguise, so convert them accordingly. -+ */ -+ insn->type = INSN_JUMP_DYNAMIC; -+ continue; - } else { - /* sibling call */ - insn->jump_dest = 0; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch deleted file mode 100644 index a004c9a0..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 471e61fb50a8b552bf18db27c7ff9808182008dd Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Wed, 2 May 2018 18:15:14 +0200 -Subject: [PATCH 63/93] x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit e7c587da125291db39ddf1f49b18e5970adbac17 upstream - -Intel and AMD have different CPUID bits hence for those use synthetic bits -which get set on the respective vendor's in init_speculation_control(). So -that debacles like what the commit message of - - c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") - -talks about don't happen anymore. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Tested-by: Jörg Otte <jrg.otte@gmail.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> -Link: https://lkml.kernel.org/r/20180504161815.GG9257@pd.tnic -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 12 ++++++++---- - arch/x86/kernel/cpu/common.c | 14 ++++++++++---- - arch/x86/kvm/cpuid.c | 10 +++++----- - arch/x86/kvm/cpuid.h | 4 ++-- - 4 files changed, 25 insertions(+), 15 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 0ed8ea5..059437a 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -205,7 +205,10 @@ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ - #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ - #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ --#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ -+#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ -+#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -+#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -+#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -@@ -263,9 +266,9 @@ - /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ - #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ - #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ --#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ --#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ --#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ -+#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -+#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ - - /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ - #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -@@ -301,6 +304,7 @@ - #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ - #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ - -+ - /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ - #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index d0dd736..67bfa3c 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -725,17 +725,23 @@ static void init_speculation_control(struct cpuinfo_x86 *c) - * and they also have a different bit for STIBP support. Also, - * a hypervisor might have set the individual AMD bits even on - * Intel CPUs, for finer-grained selection of what's available. -- * -- * We use the AMD bits in 0x8000_0008 EBX as the generic hardware -- * features, which are visible in /proc/cpuinfo and used by the -- * kernel. So set those accordingly from the Intel bits. - */ - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); - } -+ - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); -+ -+ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) -+ set_cpu_cap(c, X86_FEATURE_IBRS); -+ -+ if (cpu_has(c, X86_FEATURE_AMD_IBPB)) -+ set_cpu_cap(c, X86_FEATURE_IBPB); -+ -+ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) -+ set_cpu_cap(c, X86_FEATURE_STIBP); - } - - void get_cpu_cap(struct cpuinfo_x86 *c) -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index db95637..4ccdfbe 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - - /* cpuid 0x80000008.ebx */ - const u32 kvm_cpuid_8000_0008_ebx_x86_features = -- F(IBPB) | F(IBRS); -+ F(AMD_IBPB) | F(AMD_IBRS); - - /* cpuid 0xC0000001.edx */ - const u32 kvm_cpuid_C000_0001_edx_x86_features = -@@ -619,10 +619,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - entry->eax = g_phys_as | (virt_as << 8); - entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ -- if (boot_cpu_has(X86_FEATURE_IBPB)) -- entry->ebx |= F(IBPB); -- if (boot_cpu_has(X86_FEATURE_IBRS)) -- entry->ebx |= F(IBRS); -+ if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) -+ entry->ebx |= F(AMD_IBPB); -+ if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) -+ entry->ebx |= F(AMD_IBRS); - entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; - cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - break; -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index 72551c5..410070c 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -157,7 +157,7 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); -- if (best && (best->ebx & bit(X86_FEATURE_IBPB))) -+ if (best && (best->ebx & bit(X86_FEATURE_AMD_IBPB))) - return true; - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); -@@ -168,7 +168,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); -- if (best && (best->ebx & bit(X86_FEATURE_IBRS))) -+ if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) - return true; - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-objtool-Allow-alternatives-to-be-ignored.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-objtool-Allow-alternatives-to-be-ignored.patch deleted file mode 100644 index f0068591..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-objtool-Allow-alternatives-to-be-ignored.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 08a8d8feae37bbd8830fc63cc7e8c618497279db Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Thu, 11 Jan 2018 21:46:24 +0000 -Subject: [PATCH 064/103] objtool: Allow alternatives to be ignored - -commit 258c76059cece01bebae098e81bacb1af2edad17 upstream. - -Getting objtool to understand retpolines is going to be a bit of a -challenge. For now, take advantage of the fact that retpolines are -patched in with alternatives. Just read the original (sane) -non-alternative instruction, and ignore the patched-in retpoline. - -This allows objtool to understand the control flow *around* the -retpoline, even if it can't yet follow what's inside. This means the -ORC unwinder will fail to unwind from inside a retpoline, but will work -fine otherwise. - -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-3-git-send-email-dwmw@amazon.co.uk -[dwmw2: Applies to tools/objtool/builtin-check.c not check.[ch]] -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - tools/objtool/builtin-check.c | 64 ++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 57 insertions(+), 7 deletions(-) - -diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c -index 36784b8..ee71d4c 100644 ---- a/tools/objtool/builtin-check.c -+++ b/tools/objtool/builtin-check.c -@@ -51,7 +51,7 @@ struct instruction { - unsigned int len, state; - unsigned char type; - unsigned long immediate; -- bool alt_group, visited; -+ bool alt_group, visited, ignore_alts; - struct symbol *call_dest; - struct instruction *jump_dest; - struct list_head alts; -@@ -353,6 +353,40 @@ static void add_ignores(struct objtool_file *file) - } - - /* -+ * FIXME: For now, just ignore any alternatives which add retpolines. This is -+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. -+ * But it at least allows objtool to understand the control flow *around* the -+ * retpoline. -+ */ -+static int add_nospec_ignores(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct rela *rela; -+ struct instruction *insn; -+ -+ sec = find_section_by_name(file->elf, ".rela.discard.nospec"); -+ if (!sec) -+ return 0; -+ -+ list_for_each_entry(rela, &sec->rela_list, list) { -+ if (rela->sym->type != STT_SECTION) { -+ WARN("unexpected relocation symbol type in %s", sec->name); -+ return -1; -+ } -+ -+ insn = find_insn(file, rela->sym->sec, rela->addend); -+ if (!insn) { -+ WARN("bad .discard.nospec entry"); -+ return -1; -+ } -+ -+ insn->ignore_alts = true; -+ } -+ -+ return 0; -+} -+ -+/* - * Find the destination instructions for all jumps. - */ - static int add_jump_destinations(struct objtool_file *file) -@@ -435,11 +469,18 @@ static int add_call_destinations(struct objtool_file *file) - dest_off = insn->offset + insn->len + insn->immediate; - insn->call_dest = find_symbol_by_offset(insn->sec, - dest_off); -+ /* -+ * FIXME: Thanks to retpolines, it's now considered -+ * normal for a function to call within itself. So -+ * disable this warning for now. -+ */ -+#if 0 - if (!insn->call_dest) { - WARN_FUNC("can't find call dest symbol at offset 0x%lx", - insn->sec, insn->offset, dest_off); - return -1; - } -+#endif - } else if (rela->sym->type == STT_SECTION) { - insn->call_dest = find_symbol_by_offset(rela->sym->sec, - rela->addend+4); -@@ -601,12 +642,6 @@ static int add_special_section_alts(struct objtool_file *file) - return ret; - - list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { -- alt = malloc(sizeof(*alt)); -- if (!alt) { -- WARN("malloc failed"); -- ret = -1; -- goto out; -- } - - orig_insn = find_insn(file, special_alt->orig_sec, - special_alt->orig_off); -@@ -617,6 +652,10 @@ static int add_special_section_alts(struct objtool_file *file) - goto out; - } - -+ /* Ignore retpoline alternatives. */ -+ if (orig_insn->ignore_alts) -+ continue; -+ - new_insn = NULL; - if (!special_alt->group || special_alt->new_len) { - new_insn = find_insn(file, special_alt->new_sec, -@@ -642,6 +681,13 @@ static int add_special_section_alts(struct objtool_file *file) - goto out; - } - -+ alt = malloc(sizeof(*alt)); -+ if (!alt) { -+ WARN("malloc failed"); -+ ret = -1; -+ goto out; -+ } -+ - alt->insn = new_insn; - list_add_tail(&alt->list, &orig_insn->alts); - -@@ -852,6 +898,10 @@ static int decode_sections(struct objtool_file *file) - - add_ignores(file); - -+ ret = add_nospec_ignores(file); -+ if (ret) -+ return ret; -+ - ret = add_jump_destinations(file); - if (ret) - return ret; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch deleted file mode 100644 index b84bc768..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch +++ /dev/null @@ -1,155 +0,0 @@ -From 7731d9040d16874cb3fe11f52c4a238ab3fd658d Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 10 May 2018 19:13:18 +0200 -Subject: [PATCH 64/93] x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration - from IBRS - -commit 7eb8956a7fec3c1f0abc2a5517dada99ccc8a961 upstream - -The availability of the SPEC_CTRL MSR is enumerated by a CPUID bit on -Intel and implied by IBRS or STIBP support on AMD. That's just confusing -and in case an AMD CPU has IBRS not supported because the underlying -problem has been fixed but has another bit valid in the SPEC_CTRL MSR, -the thing falls apart. - -Add a synthetic feature bit X86_FEATURE_MSR_SPEC_CTRL to denote the -availability on both Intel and AMD. - -While at it replace the boot_cpu_has() checks with static_cpu_has() where -possible. This prevents late microcode loading from exposing SPEC_CTRL, but -late loading is already very limited as it does not reevaluate the -mitigation options and other bits and pieces. Having static_cpu_has() is -the simplest and least fragile solution. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 2 ++ - arch/x86/kernel/cpu/bugs.c | 18 +++++++++++------- - arch/x86/kernel/cpu/common.c | 9 +++++++-- - arch/x86/kernel/cpu/intel.c | 1 + - 4 files changed, 21 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 059437a..ca0f33f 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -197,6 +197,8 @@ - #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ - #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ - -+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -+ - #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ - - /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 4f8c88e..59649310 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -63,7 +63,7 @@ void __init check_bugs(void) - * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD - * init code as it is not enumerated and depends on the family. - */ -- if (boot_cpu_has(X86_FEATURE_IBRS)) -+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - - /* Select the proper spectre mitigation before patching alternatives */ -@@ -144,7 +144,7 @@ u64 x86_spec_ctrl_get_default(void) - { - u64 msrval = x86_spec_ctrl_base; - -- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) - msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - return msrval; - } -@@ -154,10 +154,12 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) - { - u64 host = x86_spec_ctrl_base; - -- if (!boot_cpu_has(X86_FEATURE_IBRS)) -+ /* Is MSR_SPEC_CTRL implemented ? */ -+ if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - -- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ /* Intel controls SSB in MSR_SPEC_CTRL */ -+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) -@@ -169,10 +171,12 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) - { - u64 host = x86_spec_ctrl_base; - -- if (!boot_cpu_has(X86_FEATURE_IBRS)) -+ /* Is MSR_SPEC_CTRL implemented ? */ -+ if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - -- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ /* Intel controls SSB in MSR_SPEC_CTRL */ -+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) -@@ -630,7 +634,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) - - void x86_spec_ctrl_setup_ap(void) - { -- if (boot_cpu_has(X86_FEATURE_IBRS)) -+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); - - if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 67bfa3c..04362282 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -729,19 +729,24 @@ static void init_speculation_control(struct cpuinfo_x86 *c) - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); -+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); - } - - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - -- if (cpu_has(c, X86_FEATURE_AMD_IBRS)) -+ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { - set_cpu_cap(c, X86_FEATURE_IBRS); -+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); -+ } - - if (cpu_has(c, X86_FEATURE_AMD_IBPB)) - set_cpu_cap(c, X86_FEATURE_IBPB); - -- if (cpu_has(c, X86_FEATURE_AMD_STIBP)) -+ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { - set_cpu_cap(c, X86_FEATURE_STIBP); -+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); -+ } - } - - void get_cpu_cap(struct cpuinfo_x86 *c) -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 047adaa..7f495e8 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -153,6 +153,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) - setup_clear_cpu_cap(X86_FEATURE_IBPB); - setup_clear_cpu_cap(X86_FEATURE_STIBP); - setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); -+ setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); - setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); - setup_clear_cpu_cap(X86_FEATURE_SSBD); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch deleted file mode 100644 index d6be8601..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch +++ /dev/null @@ -1,150 +0,0 @@ -From e162de37c3c74f0615f030cb30571a6adad3a6c7 Mon Sep 17 00:00:00 2001 -From: Andrey Ryabinin <aryabinin@virtuozzo.com> -Date: Fri, 29 Sep 2017 17:15:36 +0300 -Subject: [PATCH 065/103] x86/asm: Use register variable to get stack pointer - value - -commit 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc upstream. - -Currently we use current_stack_pointer() function to get the value -of the stack pointer register. Since commit: - - f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") - -... we have a stack register variable declared. It can be used instead of -current_stack_pointer() function which allows to optimize away some -excessive "mov %rsp, %<dst>" instructions: - - -mov %rsp,%rdx - -sub %rdx,%rax - -cmp $0x3fff,%rax - -ja ffffffff810722fd <ist_begin_non_atomic+0x2d> - - +sub %rsp,%rax - +cmp $0x3fff,%rax - +ja ffffffff810722fa <ist_begin_non_atomic+0x2a> - -Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer -and use it instead of the removed function. - -Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> -Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -[dwmw2: We want ASM_CALL_CONSTRAINT for retpoline] -Signed-off-by: David Woodhouse <dwmw@amazon.co.ku> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/asm.h | 11 +++++++++++ - arch/x86/include/asm/thread_info.h | 11 ----------- - arch/x86/kernel/irq_32.c | 6 +++--- - arch/x86/kernel/traps.c | 2 +- - arch/x86/mm/tlb.c | 2 +- - 5 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h -index 7acb51c..0052352 100644 ---- a/arch/x86/include/asm/asm.h -+++ b/arch/x86/include/asm/asm.h -@@ -125,4 +125,15 @@ - /* For C file, we already have NOKPROBE_SYMBOL macro */ - #endif - -+#ifndef __ASSEMBLY__ -+/* -+ * This output constraint should be used for any inline asm which has a "call" -+ * instruction. Otherwise the asm may be inserted before the frame pointer -+ * gets set up by the containing function. If you forget to do this, objtool -+ * may print a "call without frame pointer save/setup" warning. -+ */ -+register unsigned long current_stack_pointer asm(_ASM_SP); -+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) -+#endif -+ - #endif /* _ASM_X86_ASM_H */ -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index ad6f5eb0..bdf9c4c 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -152,17 +152,6 @@ struct thread_info { - */ - #ifndef __ASSEMBLY__ - --static inline unsigned long current_stack_pointer(void) --{ -- unsigned long sp; --#ifdef CONFIG_X86_64 -- asm("mov %%rsp,%0" : "=g" (sp)); --#else -- asm("mov %%esp,%0" : "=g" (sp)); --#endif -- return sp; --} -- - /* - * Walks up the stack frames to make sure that the specified object is - * entirely contained by a single stack frame. -diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c -index 1f38d9a..d4eb450 100644 ---- a/arch/x86/kernel/irq_32.c -+++ b/arch/x86/kernel/irq_32.c -@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack) - - static inline void *current_stack(void) - { -- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); -+ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); - } - - static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) -@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) - - /* Save the next esp at the bottom of the stack */ - prev_esp = (u32 *)irqstk; -- *prev_esp = current_stack_pointer(); -+ *prev_esp = current_stack_pointer; - - if (unlikely(overflow)) - call_on_stack(print_stack_overflow, isp); -@@ -139,7 +139,7 @@ void do_softirq_own_stack(void) - - /* Push the previous esp onto the stack */ - prev_esp = (u32 *)irqstk; -- *prev_esp = current_stack_pointer(); -+ *prev_esp = current_stack_pointer; - - call_on_stack(__do_softirq, isp); - } -diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c -index bd4e3d4..322f433 100644 ---- a/arch/x86/kernel/traps.c -+++ b/arch/x86/kernel/traps.c -@@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) - * from double_fault. - */ - BUG_ON((unsigned long)(current_top_of_stack() - -- current_stack_pointer()) >= THREAD_SIZE); -+ current_stack_pointer) >= THREAD_SIZE); - - preempt_enable_no_resched(); - } -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index e81f8bb..0cf44ac 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -114,7 +114,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - * mapped in the new pgd, we'll double-fault. Forcibly - * map it. - */ -- unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); -+ unsigned int stack_pgd_index = pgd_index(current_stack_pointer); - - pgd_t *pgd = next->pgd + stack_pgd_index; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch deleted file mode 100644 index 84d35057..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch +++ /dev/null @@ -1,163 +0,0 @@ -From f8a3968ae9a100977e28f434f303fd74a0a8591b Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 10 May 2018 20:21:36 +0200 -Subject: [PATCH 65/93] x86/cpufeatures: Disentangle SSBD enumeration - -commit 52817587e706686fcdb27f14c1b000c92f266c96 upstream - -The SSBD enumeration is similarly to the other bits magically shared -between Intel and AMD though the mechanisms are different. - -Make X86_FEATURE_SSBD synthetic and set it depending on the vendor specific -features or family dependent setup. - -Change the Intel bit to X86_FEATURE_SPEC_CTRL_SSBD to denote that SSBD is -controlled via MSR_SPEC_CTRL and fix up the usage sites. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 5 +++-- - arch/x86/kernel/cpu/amd.c | 7 +------ - arch/x86/kernel/cpu/bugs.c | 10 +++++----- - arch/x86/kernel/cpu/common.c | 3 +++ - arch/x86/kernel/cpu/intel.c | 1 + - arch/x86/kernel/process.c | 2 +- - 6 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index ca0f33f..d071767 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -198,6 +198,7 @@ - #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ - - #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -+#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ - - #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ - -@@ -207,7 +208,7 @@ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ - #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ - #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ --#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ -+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ - #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ - #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ - #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -@@ -314,7 +315,7 @@ - #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ - #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ - #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ --#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ -+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ - - /* - * BUG word(s) -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index acb2fcc..179d572 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -558,8 +558,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) - * avoid RMW. If that faults, do not enable SSBD. - */ - if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { -+ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); - setup_force_cpu_cap(X86_FEATURE_SSBD); -- setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); - x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; - } - } -@@ -848,11 +848,6 @@ static void init_amd(struct cpuinfo_x86 *c) - /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ - if (!cpu_has(c, X86_FEATURE_XENPV)) - set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); -- -- if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { -- set_cpu_cap(c, X86_FEATURE_SSBD); -- set_cpu_cap(c, X86_FEATURE_AMD_SSBD); -- } - } - - #ifdef CONFIG_X86_32 -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 59649310..15a6c58 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -158,8 +158,8 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - -- /* Intel controls SSB in MSR_SPEC_CTRL */ -- if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) -+ /* SSBD controlled in MSR_SPEC_CTRL */ -+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) -@@ -175,8 +175,8 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - -- /* Intel controls SSB in MSR_SPEC_CTRL */ -- if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) -+ /* SSBD controlled in MSR_SPEC_CTRL */ -+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) -@@ -188,7 +188,7 @@ static void x86_amd_ssb_disable(void) - { - u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - -- if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) -+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - wrmsrl(MSR_AMD64_LS_CFG, msrval); - } - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 04362282..945e841 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -735,6 +735,9 @@ static void init_speculation_control(struct cpuinfo_x86 *c) - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - -+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) -+ set_cpu_cap(c, X86_FEATURE_SSBD); -+ - if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index 7f495e8..93781e3 100644 ---- a/arch/x86/kernel/cpu/intel.c -+++ b/arch/x86/kernel/cpu/intel.c -@@ -156,6 +156,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) - setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); - setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); - setup_clear_cpu_cap(X86_FEATURE_SSBD); -+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); - } - - /* -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index c344230..b3cd08e 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -207,7 +207,7 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn - { - u64 msr; - -- if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { -+ if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { - msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); - wrmsrl(MSR_AMD64_LS_CFG, msr); - } else { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch deleted file mode 100644 index b9d9a567..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch +++ /dev/null @@ -1,55 +0,0 @@ -From b6aa89b4ab638e59beab4c2d264c02dfc887187f Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Thu, 7 Sep 2017 19:08:21 +0200 -Subject: [PATCH 66/93] x86/cpu/AMD: Fix erratum 1076 (CPB bit) - -commit f7f3dc00f61261cdc9ccd8b886f21bc4dffd6fd9 upstream - -CPUID Fn8000_0007_EDX[CPB] is wrongly 0 on models up to B1. But they do -support CPB (AMD's Core Performance Boosting cpufreq CPU feature), so fix that. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Sherry Hurwitz <sherry.hurwitz@amd.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/20170907170821.16021-1-bp@alien8.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/amd.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 179d572..21367b5 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -749,6 +749,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c) - } - } - -+static void init_amd_zn(struct cpuinfo_x86 *c) -+{ -+ /* -+ * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects -+ * all up to and including B1. -+ */ -+ if (c->x86_model <= 1 && c->x86_stepping <= 1) -+ set_cpu_cap(c, X86_FEATURE_CPB); -+} -+ - static void init_amd(struct cpuinfo_x86 *c) - { - u32 dummy; -@@ -779,6 +789,7 @@ static void init_amd(struct cpuinfo_x86 *c) - case 0x10: init_amd_gh(c); break; - case 0x12: init_amd_ln(c); break; - case 0x15: init_amd_bd(c); break; -+ case 0x17: init_amd_zn(c); break; - } - - /* Enable workaround for FXSAVE leak */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-retpoline-Add-initial-retpoline-support.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-retpoline-Add-initial-retpoline-support.patch deleted file mode 100644 index 2840b3d3..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-retpoline-Add-initial-retpoline-support.patch +++ /dev/null @@ -1,378 +0,0 @@ -From ca8449e9edf4562460bdf669153b39ffe15195a1 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:25 +0000 -Subject: [PATCH 066/103] x86/retpoline: Add initial retpoline support - -commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream. - -Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide -the corresponding thunks. Provide assembler macros for invoking the thunks -in the same way that GCC does, from native and inline assembler. - -This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In -some circumstances, IBRS microcode features may be used instead, and the -retpoline can be disabled. - -On AMD CPUs if lfence is serialising, the retpoline can be dramatically -simplified to a simple "lfence; jmp *\reg". A future patch, after it has -been verified that lfence really is serialising in all circumstances, can -enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition -to X86_FEATURE_RETPOLINE. - -Do not align the retpoline in the altinstr section, because there is no -guarantee that it stays aligned when it's copied over the oldinstr during -alternative patching. - -[ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks] -[ tglx: Put actual function CALL/JMP in front of the macros, convert to - symbolic labels ] -[ dwmw2: Convert back to numeric labels, merge objtool fixes ] - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/Kconfig | 13 ++++ - arch/x86/Makefile | 10 +++ - arch/x86/include/asm/asm-prototypes.h | 25 +++++++ - arch/x86/include/asm/cpufeatures.h | 3 + - arch/x86/include/asm/nospec-branch.h | 128 ++++++++++++++++++++++++++++++++++ - arch/x86/kernel/cpu/common.c | 4 ++ - arch/x86/lib/Makefile | 1 + - arch/x86/lib/retpoline.S | 48 +++++++++++++ - 8 files changed, 232 insertions(+) - create mode 100644 arch/x86/include/asm/nospec-branch.h - create mode 100644 arch/x86/lib/retpoline.S - -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index e0f6590..ee2ba5d 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -408,6 +408,19 @@ config GOLDFISH - def_bool y - depends on X86_GOLDFISH - -+config RETPOLINE -+ bool "Avoid speculative indirect branches in kernel" -+ default y -+ ---help--- -+ Compile kernel with the retpoline compiler options to guard against -+ kernel-to-user data leaks by avoiding speculative indirect -+ branches. Requires a compiler with -mindirect-branch=thunk-extern -+ support for full protection. The kernel may run slower. -+ -+ Without compiler support, at least indirect branches in assembler -+ code are eliminated. Since this includes the syscall entry path, -+ it is not entirely pointless. -+ - if X86_32 - config X86_EXTENDED_PLATFORM - bool "Support for extended (non-PC) x86 platforms" -diff --git a/arch/x86/Makefile b/arch/x86/Makefile -index 2d44933..1e1a733 100644 ---- a/arch/x86/Makefile -+++ b/arch/x86/Makefile -@@ -182,6 +182,16 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables - KBUILD_CFLAGS += $(mflags-y) - KBUILD_AFLAGS += $(mflags-y) - -+# Avoid indirect branches in kernel to deal with Spectre -+ifdef CONFIG_RETPOLINE -+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) -+ ifneq ($(RETPOLINE_CFLAGS),) -+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE -+ else -+ $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) -+ endif -+endif -+ - archscripts: scripts_basic - $(Q)$(MAKE) $(build)=arch/x86/tools relocs - -diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h -index 44b8762..b15aa40 100644 ---- a/arch/x86/include/asm/asm-prototypes.h -+++ b/arch/x86/include/asm/asm-prototypes.h -@@ -10,7 +10,32 @@ - #include <asm/pgtable.h> - #include <asm/special_insns.h> - #include <asm/preempt.h> -+#include <asm/asm.h> - - #ifndef CONFIG_X86_CMPXCHG64 - extern void cmpxchg8b_emu(void); - #endif -+ -+#ifdef CONFIG_RETPOLINE -+#ifdef CONFIG_X86_32 -+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); -+#else -+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); -+INDIRECT_THUNK(8) -+INDIRECT_THUNK(9) -+INDIRECT_THUNK(10) -+INDIRECT_THUNK(11) -+INDIRECT_THUNK(12) -+INDIRECT_THUNK(13) -+INDIRECT_THUNK(14) -+INDIRECT_THUNK(15) -+#endif -+INDIRECT_THUNK(ax) -+INDIRECT_THUNK(bx) -+INDIRECT_THUNK(cx) -+INDIRECT_THUNK(dx) -+INDIRECT_THUNK(si) -+INDIRECT_THUNK(di) -+INDIRECT_THUNK(bp) -+INDIRECT_THUNK(sp) -+#endif /* CONFIG_RETPOLINE */ -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index f364c891..4467568 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -194,6 +194,9 @@ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ - #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ - -+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ -+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ -+ - #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ - #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -new file mode 100644 -index 0000000..e20e92e ---- /dev/null -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -0,0 +1,128 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+#ifndef __NOSPEC_BRANCH_H__ -+#define __NOSPEC_BRANCH_H__ -+ -+#include <asm/alternative.h> -+#include <asm/alternative-asm.h> -+#include <asm/cpufeatures.h> -+ -+#ifdef __ASSEMBLY__ -+ -+/* -+ * This should be used immediately before a retpoline alternative. It tells -+ * objtool where the retpolines are so that it can make sense of the control -+ * flow by just reading the original instruction(s) and ignoring the -+ * alternatives. -+ */ -+.macro ANNOTATE_NOSPEC_ALTERNATIVE -+ .Lannotate_\@: -+ .pushsection .discard.nospec -+ .long .Lannotate_\@ - . -+ .popsection -+.endm -+ -+/* -+ * These are the bare retpoline primitives for indirect jmp and call. -+ * Do not use these directly; they only exist to make the ALTERNATIVE -+ * invocation below less ugly. -+ */ -+.macro RETPOLINE_JMP reg:req -+ call .Ldo_rop_\@ -+.Lspec_trap_\@: -+ pause -+ jmp .Lspec_trap_\@ -+.Ldo_rop_\@: -+ mov \reg, (%_ASM_SP) -+ ret -+.endm -+ -+/* -+ * This is a wrapper around RETPOLINE_JMP so the called function in reg -+ * returns to the instruction after the macro. -+ */ -+.macro RETPOLINE_CALL reg:req -+ jmp .Ldo_call_\@ -+.Ldo_retpoline_jmp_\@: -+ RETPOLINE_JMP \reg -+.Ldo_call_\@: -+ call .Ldo_retpoline_jmp_\@ -+.endm -+ -+/* -+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple -+ * indirect jmp/call which may be susceptible to the Spectre variant 2 -+ * attack. -+ */ -+.macro JMP_NOSPEC reg:req -+#ifdef CONFIG_RETPOLINE -+ ANNOTATE_NOSPEC_ALTERNATIVE -+ ALTERNATIVE_2 __stringify(jmp *\reg), \ -+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ -+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD -+#else -+ jmp *\reg -+#endif -+.endm -+ -+.macro CALL_NOSPEC reg:req -+#ifdef CONFIG_RETPOLINE -+ ANNOTATE_NOSPEC_ALTERNATIVE -+ ALTERNATIVE_2 __stringify(call *\reg), \ -+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ -+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD -+#else -+ call *\reg -+#endif -+.endm -+ -+#else /* __ASSEMBLY__ */ -+ -+#define ANNOTATE_NOSPEC_ALTERNATIVE \ -+ "999:\n\t" \ -+ ".pushsection .discard.nospec\n\t" \ -+ ".long 999b - .\n\t" \ -+ ".popsection\n\t" -+ -+#if defined(CONFIG_X86_64) && defined(RETPOLINE) -+ -+/* -+ * Since the inline asm uses the %V modifier which is only in newer GCC, -+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. -+ */ -+# define CALL_NOSPEC \ -+ ANNOTATE_NOSPEC_ALTERNATIVE \ -+ ALTERNATIVE( \ -+ "call *%[thunk_target]\n", \ -+ "call __x86_indirect_thunk_%V[thunk_target]\n", \ -+ X86_FEATURE_RETPOLINE) -+# define THUNK_TARGET(addr) [thunk_target] "r" (addr) -+ -+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) -+/* -+ * For i386 we use the original ret-equivalent retpoline, because -+ * otherwise we'll run out of registers. We don't care about CET -+ * here, anyway. -+ */ -+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ -+ " jmp 904f;\n" \ -+ " .align 16\n" \ -+ "901: call 903f;\n" \ -+ "902: pause;\n" \ -+ " jmp 902b;\n" \ -+ " .align 16\n" \ -+ "903: addl $4, %%esp;\n" \ -+ " pushl %[thunk_target];\n" \ -+ " ret;\n" \ -+ " .align 16\n" \ -+ "904: call 901b;\n", \ -+ X86_FEATURE_RETPOLINE) -+ -+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) -+#else /* No retpoline */ -+# define CALL_NOSPEC "call *%[thunk_target]\n" -+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) -+#endif -+ -+#endif /* __ASSEMBLY__ */ -+#endif /* __NOSPEC_BRANCH_H__ */ -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 7b9ae04..6e885cc 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -889,6 +889,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - -+#ifdef CONFIG_RETPOLINE -+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE); -+#endif -+ - fpu__init_system(c); - - #ifdef CONFIG_X86_32 -diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile -index 34a7413..6bf1898 100644 ---- a/arch/x86/lib/Makefile -+++ b/arch/x86/lib/Makefile -@@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o - lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o - lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o - lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o -+lib-$(CONFIG_RETPOLINE) += retpoline.o - - obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o - -diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S -new file mode 100644 -index 0000000..cb45c6c ---- /dev/null -+++ b/arch/x86/lib/retpoline.S -@@ -0,0 +1,48 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+#include <linux/stringify.h> -+#include <linux/linkage.h> -+#include <asm/dwarf2.h> -+#include <asm/cpufeatures.h> -+#include <asm/alternative-asm.h> -+#include <asm/export.h> -+#include <asm/nospec-branch.h> -+ -+.macro THUNK reg -+ .section .text.__x86.indirect_thunk.\reg -+ -+ENTRY(__x86_indirect_thunk_\reg) -+ CFI_STARTPROC -+ JMP_NOSPEC %\reg -+ CFI_ENDPROC -+ENDPROC(__x86_indirect_thunk_\reg) -+.endm -+ -+/* -+ * Despite being an assembler file we can't just use .irp here -+ * because __KSYM_DEPS__ only uses the C preprocessor and would -+ * only see one instance of "__x86_indirect_thunk_\reg" rather -+ * than one per register with the correct names. So we do it -+ * the simple and nasty way... -+ */ -+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) -+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) -+ -+GENERATE_THUNK(_ASM_AX) -+GENERATE_THUNK(_ASM_BX) -+GENERATE_THUNK(_ASM_CX) -+GENERATE_THUNK(_ASM_DX) -+GENERATE_THUNK(_ASM_SI) -+GENERATE_THUNK(_ASM_DI) -+GENERATE_THUNK(_ASM_BP) -+GENERATE_THUNK(_ASM_SP) -+#ifdef CONFIG_64BIT -+GENERATE_THUNK(r8) -+GENERATE_THUNK(r9) -+GENERATE_THUNK(r10) -+GENERATE_THUNK(r11) -+GENERATE_THUNK(r12) -+GENERATE_THUNK(r13) -+GENERATE_THUNK(r14) -+GENERATE_THUNK(r15) -+#endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-cpufeatures-Add-FEATURE_ZEN.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-cpufeatures-Add-FEATURE_ZEN.patch deleted file mode 100644 index 4dc85820..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-cpufeatures-Add-FEATURE_ZEN.patch +++ /dev/null @@ -1,48 +0,0 @@ -From c9b69035094a1cadce0c634ad76ded5a4a033ff6 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 10 May 2018 16:26:00 +0200 -Subject: [PATCH 67/93] x86/cpufeatures: Add FEATURE_ZEN - -commit d1035d971829dcf80e8686ccde26f94b0a069472 upstream - -Add a ZEN feature bit so family-dependent static_cpu_has() optimizations -can be built for ZEN. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 2 ++ - arch/x86/kernel/cpu/amd.c | 1 + - 2 files changed, 3 insertions(+) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index d071767..ec87b8c 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -212,6 +212,8 @@ - #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ - #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ - #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ -+ - - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 21367b5..4c2be99 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -751,6 +751,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) - - static void init_amd_zn(struct cpuinfo_x86 *c) - { -+ set_cpu_cap(c, X86_FEATURE_ZEN); - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects - * all up to and including B1. --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch deleted file mode 100644 index e8f8a3b3..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch +++ /dev/null @@ -1,327 +0,0 @@ -From 49ecd329ac6e1bef215e008c40e362f0aa24e7c9 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:26 +0000 -Subject: [PATCH 067/103] x86/spectre: Add boot time option to select Spectre - v2 mitigation - -commit da285121560e769cc31797bba6422eea71d473e0 upstream. - -Add a spectre_v2= option to select the mitigation used for the indirect -branch speculation vulnerability. - -Currently, the only option available is retpoline, in its various forms. -This will be expanded to cover the new IBRS/IBPB microcode features. - -The RETPOLINE_AMD feature relies on a serializing LFENCE for speculation -control. For AMD hardware, only set RETPOLINE_AMD if LFENCE is a -serializing instruction, which is indicated by the LFENCE_RDTSC feature. - -[ tglx: Folded back the LFENCE/AMD fixes and reworked it so IBRS - integration becomes simple ] - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-5-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 28 +++++++ - arch/x86/include/asm/nospec-branch.h | 10 +++ - arch/x86/kernel/cpu/bugs.c | 158 ++++++++++++++++++++++++++++++++++- - arch/x86/kernel/cpu/common.c | 4 - - 4 files changed, 195 insertions(+), 5 deletions(-) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 3d53778..4b438e4 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -2681,6 +2681,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - nosmt [KNL,S390] Disable symmetric multithreading (SMT). - Equivalent to smt=1. - -+ nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 -+ (indirect branch prediction) vulnerability. System may -+ allow data leaks with this option, which is equivalent -+ to spectre_v2=off. -+ - noxsave [BUGS=X86] Disables x86 extended register state save - and restore using xsave. The kernel will fallback to - enabling legacy floating-point and sse state. -@@ -3934,6 +3939,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - sonypi.*= [HW] Sony Programmable I/O Control Device driver - See Documentation/laptops/sonypi.txt - -+ spectre_v2= [X86] Control mitigation of Spectre variant 2 -+ (indirect branch speculation) vulnerability. -+ -+ on - unconditionally enable -+ off - unconditionally disable -+ auto - kernel detects whether your CPU model is -+ vulnerable -+ -+ Selecting 'on' will, and 'auto' may, choose a -+ mitigation method at run time according to the -+ CPU, the available microcode, the setting of the -+ CONFIG_RETPOLINE configuration option, and the -+ compiler with which the kernel was built. -+ -+ Specific mitigations can also be selected manually: -+ -+ retpoline - replace indirect branches -+ retpoline,generic - google's original retpoline -+ retpoline,amd - AMD-specific minimal thunk -+ -+ Not specifying this option is equivalent to -+ spectre_v2=auto. -+ - spia_io_base= [HW,MTD] - spia_fio_base= - spia_pedr= -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index e20e92e..ea034fa 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -124,5 +124,15 @@ - # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) - #endif - -+/* The Spectre V2 mitigation variants */ -+enum spectre_v2_mitigation { -+ SPECTRE_V2_NONE, -+ SPECTRE_V2_RETPOLINE_MINIMAL, -+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD, -+ SPECTRE_V2_RETPOLINE_GENERIC, -+ SPECTRE_V2_RETPOLINE_AMD, -+ SPECTRE_V2_IBRS, -+}; -+ - #endif /* __ASSEMBLY__ */ - #endif /* __NOSPEC_BRANCH_H__ */ -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index cb6b4f9..49d25dd 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -10,6 +10,9 @@ - #include <linux/init.h> - #include <linux/utsname.h> - #include <linux/cpu.h> -+ -+#include <asm/nospec-branch.h> -+#include <asm/cmdline.h> - #include <asm/bugs.h> - #include <asm/processor.h> - #include <asm/processor-flags.h> -@@ -20,6 +23,8 @@ - #include <asm/pgtable.h> - #include <asm/cacheflush.h> - -+static void __init spectre_v2_select_mitigation(void); -+ - void __init check_bugs(void) - { - identify_boot_cpu(); -@@ -29,6 +34,9 @@ void __init check_bugs(void) - print_cpu_info(&boot_cpu_data); - } - -+ /* Select the proper spectre mitigation before patching alternatives */ -+ spectre_v2_select_mitigation(); -+ - #ifdef CONFIG_X86_32 - /* - * Check whether we are able to run this kernel safely on SMP. -@@ -61,6 +69,153 @@ void __init check_bugs(void) - #endif - } - -+/* The kernel command line selection */ -+enum spectre_v2_mitigation_cmd { -+ SPECTRE_V2_CMD_NONE, -+ SPECTRE_V2_CMD_AUTO, -+ SPECTRE_V2_CMD_FORCE, -+ SPECTRE_V2_CMD_RETPOLINE, -+ SPECTRE_V2_CMD_RETPOLINE_GENERIC, -+ SPECTRE_V2_CMD_RETPOLINE_AMD, -+}; -+ -+static const char *spectre_v2_strings[] = { -+ [SPECTRE_V2_NONE] = "Vulnerable", -+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", -+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", -+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", -+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", -+}; -+ -+#undef pr_fmt -+#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt -+ -+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -+ -+static void __init spec2_print_if_insecure(const char *reason) -+{ -+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) -+ pr_info("%s\n", reason); -+} -+ -+static void __init spec2_print_if_secure(const char *reason) -+{ -+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) -+ pr_info("%s\n", reason); -+} -+ -+static inline bool retp_compiler(void) -+{ -+ return __is_defined(RETPOLINE); -+} -+ -+static inline bool match_option(const char *arg, int arglen, const char *opt) -+{ -+ int len = strlen(opt); -+ -+ return len == arglen && !strncmp(arg, opt, len); -+} -+ -+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) -+{ -+ char arg[20]; -+ int ret; -+ -+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, -+ sizeof(arg)); -+ if (ret > 0) { -+ if (match_option(arg, ret, "off")) { -+ goto disable; -+ } else if (match_option(arg, ret, "on")) { -+ spec2_print_if_secure("force enabled on command line."); -+ return SPECTRE_V2_CMD_FORCE; -+ } else if (match_option(arg, ret, "retpoline")) { -+ spec2_print_if_insecure("retpoline selected on command line."); -+ return SPECTRE_V2_CMD_RETPOLINE; -+ } else if (match_option(arg, ret, "retpoline,amd")) { -+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { -+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); -+ return SPECTRE_V2_CMD_AUTO; -+ } -+ spec2_print_if_insecure("AMD retpoline selected on command line."); -+ return SPECTRE_V2_CMD_RETPOLINE_AMD; -+ } else if (match_option(arg, ret, "retpoline,generic")) { -+ spec2_print_if_insecure("generic retpoline selected on command line."); -+ return SPECTRE_V2_CMD_RETPOLINE_GENERIC; -+ } else if (match_option(arg, ret, "auto")) { -+ return SPECTRE_V2_CMD_AUTO; -+ } -+ } -+ -+ if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) -+ return SPECTRE_V2_CMD_AUTO; -+disable: -+ spec2_print_if_insecure("disabled on command line."); -+ return SPECTRE_V2_CMD_NONE; -+} -+ -+static void __init spectre_v2_select_mitigation(void) -+{ -+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); -+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; -+ -+ /* -+ * If the CPU is not affected and the command line mode is NONE or AUTO -+ * then nothing to do. -+ */ -+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && -+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) -+ return; -+ -+ switch (cmd) { -+ case SPECTRE_V2_CMD_NONE: -+ return; -+ -+ case SPECTRE_V2_CMD_FORCE: -+ /* FALLTRHU */ -+ case SPECTRE_V2_CMD_AUTO: -+ goto retpoline_auto; -+ -+ case SPECTRE_V2_CMD_RETPOLINE_AMD: -+ if (IS_ENABLED(CONFIG_RETPOLINE)) -+ goto retpoline_amd; -+ break; -+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC: -+ if (IS_ENABLED(CONFIG_RETPOLINE)) -+ goto retpoline_generic; -+ break; -+ case SPECTRE_V2_CMD_RETPOLINE: -+ if (IS_ENABLED(CONFIG_RETPOLINE)) -+ goto retpoline_auto; -+ break; -+ } -+ pr_err("kernel not compiled with retpoline; no mitigation available!"); -+ return; -+ -+retpoline_auto: -+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { -+ retpoline_amd: -+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { -+ pr_err("LFENCE not serializing. Switching to generic retpoline\n"); -+ goto retpoline_generic; -+ } -+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : -+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD; -+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); -+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE); -+ } else { -+ retpoline_generic: -+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : -+ SPECTRE_V2_RETPOLINE_MINIMAL; -+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE); -+ } -+ -+ spectre_v2_enabled = mode; -+ pr_info("%s\n", spectre_v2_strings[mode]); -+} -+ -+#undef pr_fmt -+ - #ifdef CONFIG_SYSFS - ssize_t cpu_show_meltdown(struct device *dev, - struct device_attribute *attr, char *buf) -@@ -85,6 +240,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, - { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); -- return sprintf(buf, "Vulnerable\n"); -+ -+ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); - } - #endif -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 6e885cc..7b9ae04 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -889,10 +889,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - --#ifdef CONFIG_RETPOLINE -- setup_force_cpu_cap(X86_FEATURE_RETPOLINE); --#endif -- - fpu__init_system(c); - - #ifdef CONFIG_X86_32 --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch deleted file mode 100644 index 50495c77..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 8c7d57d2198fcb6098dcc32c33baaf32dc85dcd1 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:27 +0000 -Subject: [PATCH 068/103] x86/retpoline/crypto: Convert crypto assembler - indirect jumps - -commit 9697fa39efd3fc3692f2949d4045f393ec58450b upstream. - -Convert all indirect jumps in crypto assembler code to use non-speculative -sequences when CONFIG_RETPOLINE is enabled. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-6-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/crypto/aesni-intel_asm.S | 5 +++-- - arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++- - arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++- - arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++- - 4 files changed, 9 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S -index 383a6f8..fa8801b 100644 ---- a/arch/x86/crypto/aesni-intel_asm.S -+++ b/arch/x86/crypto/aesni-intel_asm.S -@@ -32,6 +32,7 @@ - #include <linux/linkage.h> - #include <asm/inst.h> - #include <asm/frame.h> -+#include <asm/nospec-branch.h> - - /* - * The following macros are used to move an (un)aligned 16 byte value to/from -@@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8) - pxor INC, STATE4 - movdqu IV, 0x30(OUTP) - -- call *%r11 -+ CALL_NOSPEC %r11 - - movdqu 0x00(OUTP), INC - pxor INC, STATE1 -@@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8) - _aesni_gf128mul_x_ble() - movups IV, (IVP) - -- call *%r11 -+ CALL_NOSPEC %r11 - - movdqu 0x40(OUTP), INC - pxor INC, STATE1 -diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S -index aa9e8bd..77ff4de 100644 ---- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S -+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S -@@ -17,6 +17,7 @@ - - #include <linux/linkage.h> - #include <asm/frame.h> -+#include <asm/nospec-branch.h> - - #define CAMELLIA_TABLE_BYTE_LEN 272 - -@@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way: - vpxor 14 * 16(%rax), %xmm15, %xmm14; - vpxor 15 * 16(%rax), %xmm15, %xmm15; - -- call *%r9; -+ CALL_NOSPEC %r9; - - addq $(16 * 16), %rsp; - -diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S -index 16186c1..7384342 100644 ---- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S -+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S -@@ -12,6 +12,7 @@ - - #include <linux/linkage.h> - #include <asm/frame.h> -+#include <asm/nospec-branch.h> - - #define CAMELLIA_TABLE_BYTE_LEN 272 - -@@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way: - vpxor 14 * 32(%rax), %ymm15, %ymm14; - vpxor 15 * 32(%rax), %ymm15, %ymm15; - -- call *%r9; -+ CALL_NOSPEC %r9; - - addq $(16 * 32), %rsp; - -diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S -index dc05f01..174fd41 100644 ---- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S -+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S -@@ -45,6 +45,7 @@ - - #include <asm/inst.h> - #include <linux/linkage.h> -+#include <asm/nospec-branch.h> - - ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction - -@@ -172,7 +173,7 @@ continue_block: - movzxw (bufp, %rax, 2), len - lea crc_array(%rip), bufp - lea (bufp, len, 1), bufp -- jmp *bufp -+ JMP_NOSPEC bufp - - ################################################################ - ## 2a) PROCESS FULL BLOCKS: --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-speculation-Handle-HT-correctly-on-AMD.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-speculation-Handle-HT-correctly-on-AMD.patch deleted file mode 100644 index cb74bad4..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-speculation-Handle-HT-correctly-on-AMD.patch +++ /dev/null @@ -1,240 +0,0 @@ -From cbf0028f2c499e981af020c1cdb6bff7d0b4e192 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 9 May 2018 21:53:09 +0200 -Subject: [PATCH 68/93] x86/speculation: Handle HT correctly on AMD - -commit 1f50ddb4f4189243c05926b842dc1a0332195f31 upstream - -The AMD64_LS_CFG MSR is a per core MSR on Family 17H CPUs. That means when -hyperthreading is enabled the SSBD bit toggle needs to take both cores into -account. Otherwise the following situation can happen: - -CPU0 CPU1 - -disable SSB - disable SSB - enable SSB <- Enables it for the Core, i.e. for CPU0 as well - -So after the SSB enable on CPU1 the task on CPU0 runs with SSB enabled -again. - -On Intel the SSBD control is per core as well, but the synchronization -logic is implemented behind the per thread SPEC_CTRL MSR. It works like -this: - - CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL - -i.e. if one of the threads enables a mitigation then this affects both and -the mitigation is only disabled in the core when both threads disabled it. - -Add the necessary synchronization logic for AMD family 17H. Unfortunately -that requires a spinlock to serialize the access to the MSR, but the locks -are only shared between siblings. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/spec-ctrl.h | 6 ++ - arch/x86/kernel/process.c | 125 +++++++++++++++++++++++++++++++++++++-- - arch/x86/kernel/smpboot.c | 5 ++ - 3 files changed, 130 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index dc21209..0cb49c4 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -33,6 +33,12 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) - return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; - } - -+#ifdef CONFIG_SMP -+extern void speculative_store_bypass_ht_init(void); -+#else -+static inline void speculative_store_bypass_ht_init(void) { } -+#endif -+ - extern void speculative_store_bypass_update(void); - - #endif -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index b3cd08e..1e9d155 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -203,22 +203,135 @@ static inline void switch_to_bitmap(struct tss_struct *tss, - } - } - --static __always_inline void __speculative_store_bypass_update(unsigned long tifn) -+#ifdef CONFIG_SMP -+ -+struct ssb_state { -+ struct ssb_state *shared_state; -+ raw_spinlock_t lock; -+ unsigned int disable_state; -+ unsigned long local_state; -+}; -+ -+#define LSTATE_SSB 0 -+ -+static DEFINE_PER_CPU(struct ssb_state, ssb_state); -+ -+void speculative_store_bypass_ht_init(void) - { -- u64 msr; -+ struct ssb_state *st = this_cpu_ptr(&ssb_state); -+ unsigned int this_cpu = smp_processor_id(); -+ unsigned int cpu; -+ -+ st->local_state = 0; -+ -+ /* -+ * Shared state setup happens once on the first bringup -+ * of the CPU. It's not destroyed on CPU hotunplug. -+ */ -+ if (st->shared_state) -+ return; -+ -+ raw_spin_lock_init(&st->lock); - -- if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { -- msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); -+ /* -+ * Go over HT siblings and check whether one of them has set up the -+ * shared state pointer already. -+ */ -+ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { -+ if (cpu == this_cpu) -+ continue; -+ -+ if (!per_cpu(ssb_state, cpu).shared_state) -+ continue; -+ -+ /* Link it to the state of the sibling: */ -+ st->shared_state = per_cpu(ssb_state, cpu).shared_state; -+ return; -+ } -+ -+ /* -+ * First HT sibling to come up on the core. Link shared state of -+ * the first HT sibling to itself. The siblings on the same core -+ * which come up later will see the shared state pointer and link -+ * themself to the state of this CPU. -+ */ -+ st->shared_state = st; -+} -+ -+/* -+ * Logic is: First HT sibling enables SSBD for both siblings in the core -+ * and last sibling to disable it, disables it for the whole core. This how -+ * MSR_SPEC_CTRL works in "hardware": -+ * -+ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL -+ */ -+static __always_inline void amd_set_core_ssb_state(unsigned long tifn) -+{ -+ struct ssb_state *st = this_cpu_ptr(&ssb_state); -+ u64 msr = x86_amd_ls_cfg_base; -+ -+ if (!static_cpu_has(X86_FEATURE_ZEN)) { -+ msr |= ssbd_tif_to_amd_ls_cfg(tifn); - wrmsrl(MSR_AMD64_LS_CFG, msr); -+ return; -+ } -+ -+ if (tifn & _TIF_SSBD) { -+ /* -+ * Since this can race with prctl(), block reentry on the -+ * same CPU. -+ */ -+ if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) -+ return; -+ -+ msr |= x86_amd_ls_cfg_ssbd_mask; -+ -+ raw_spin_lock(&st->shared_state->lock); -+ /* First sibling enables SSBD: */ -+ if (!st->shared_state->disable_state) -+ wrmsrl(MSR_AMD64_LS_CFG, msr); -+ st->shared_state->disable_state++; -+ raw_spin_unlock(&st->shared_state->lock); - } else { -- msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); -- wrmsrl(MSR_IA32_SPEC_CTRL, msr); -+ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) -+ return; -+ -+ raw_spin_lock(&st->shared_state->lock); -+ st->shared_state->disable_state--; -+ if (!st->shared_state->disable_state) -+ wrmsrl(MSR_AMD64_LS_CFG, msr); -+ raw_spin_unlock(&st->shared_state->lock); - } - } -+#else -+static __always_inline void amd_set_core_ssb_state(unsigned long tifn) -+{ -+ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); -+ -+ wrmsrl(MSR_AMD64_LS_CFG, msr); -+} -+#endif -+ -+static __always_inline void intel_set_ssb_state(unsigned long tifn) -+{ -+ u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); -+ -+ wrmsrl(MSR_IA32_SPEC_CTRL, msr); -+} -+ -+static __always_inline void __speculative_store_bypass_update(unsigned long tifn) -+{ -+ if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) -+ amd_set_core_ssb_state(tifn); -+ else -+ intel_set_ssb_state(tifn); -+} - - void speculative_store_bypass_update(void) - { -+ preempt_disable(); - __speculative_store_bypass_update(current_thread_info()->flags); -+ preempt_enable(); - } - - void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, -diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c -index 36171bc..c898a69 100644 ---- a/arch/x86/kernel/smpboot.c -+++ b/arch/x86/kernel/smpboot.c -@@ -75,6 +75,7 @@ - #include <asm/i8259.h> - #include <asm/realmode.h> - #include <asm/misc.h> -+#include <asm/spec-ctrl.h> - - /* Number of siblings per CPU package */ - int smp_num_siblings = 1; -@@ -237,6 +238,8 @@ static void notrace start_secondary(void *unused) - */ - check_tsc_sync_target(); - -+ speculative_store_bypass_ht_init(); -+ - /* - * Lock vector_lock and initialize the vectors on this cpu - * before setting the cpu online. We must set it online with -@@ -1333,6 +1336,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) - set_mtrr_aps_delayed_init(); - - smp_quirk_init_udelay(); -+ -+ speculative_store_bypass_ht_init(); - } - - void arch_enable_nonboot_cpus_begin(void) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch deleted file mode 100644 index e298d3bc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch +++ /dev/null @@ -1,163 +0,0 @@ -From 77aaa77d68bbabee027737671cdc1318e8dfe763 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 9 May 2018 23:01:01 +0200 -Subject: [PATCH 69/93] x86/bugs, KVM: Extend speculation control for - VIRT_SPEC_CTRL - -commit ccbcd2674472a978b48c91c1fbfb66c0ff959f24 upstream - -AMD is proposing a VIRT_SPEC_CTRL MSR to handle the Speculative Store -Bypass Disable via MSR_AMD64_LS_CFG so that guests do not have to care -about the bit position of the SSBD bit and thus facilitate migration. -Also, the sibling coordination on Family 17H CPUs can only be done on -the host. - -Extend x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() with an -extra argument for the VIRT_SPEC_CTRL MSR. - -Hand in 0 from VMX and in SVM add a new virt_spec_ctrl member to the CPU -data structure which is going to be used in later patches for the actual -implementation. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/spec-ctrl.h | 9 ++++++--- - arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- - arch/x86/kvm/svm.c | 11 +++++++++-- - arch/x86/kvm/vmx.c | 5 +++-- - 4 files changed, 36 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index 0cb49c4..6e28740 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -10,10 +10,13 @@ - * the guest has, while on VMEXIT we restore the host view. This - * would be easier if SPEC_CTRL were architecturally maskable or - * shadowable for guests but this is not (currently) the case. -- * Takes the guest view of SPEC_CTRL MSR as a parameter. -+ * Takes the guest view of SPEC_CTRL MSR as a parameter and also -+ * the guest's version of VIRT_SPEC_CTRL, if emulated. - */ --extern void x86_spec_ctrl_set_guest(u64); --extern void x86_spec_ctrl_restore_host(u64); -+extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, -+ u64 guest_virt_spec_ctrl); -+extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, -+ u64 guest_virt_spec_ctrl); - - /* AMD specific Speculative Store Bypass MSR data */ - extern u64 x86_amd_ls_cfg_base; -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 15a6c58..d00e246 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -150,7 +150,15 @@ u64 x86_spec_ctrl_get_default(void) - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); - --void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) -+/** -+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest -+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL -+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL -+ * (may get translated to MSR_AMD64_LS_CFG bits) -+ * -+ * Avoids writing to the MSR if the content/bits are the same -+ */ -+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) - { - u64 host = x86_spec_ctrl_base; - -@@ -167,7 +175,15 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); - --void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) -+/** -+ * x86_spec_ctrl_restore_host - Restore host speculation control registers -+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL -+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL -+ * (may get translated to MSR_AMD64_LS_CFG bits) -+ * -+ * Avoids writing to the MSR if the content/bits are the same -+ */ -+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) - { - u64 host = x86_spec_ctrl_base; - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 9991462..481b106 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -185,6 +185,12 @@ struct vcpu_svm { - } host; - - u64 spec_ctrl; -+ /* -+ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be -+ * translated into the appropriate L2_CFG bits on the host to -+ * perform speculative control. -+ */ -+ u64 virt_spec_ctrl; - - u32 *msrpm; - -@@ -1558,6 +1564,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) - u32 eax = 1; - - svm->spec_ctrl = 0; -+ svm->virt_spec_ctrl = 0; - - if (!init_event) { - svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | -@@ -4905,7 +4912,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ -- x86_spec_ctrl_set_guest(svm->spec_ctrl); -+ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); - - asm volatile ( - "push %%" _ASM_BP "; \n\t" -@@ -5029,7 +5036,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - -- x86_spec_ctrl_restore_host(svm->spec_ctrl); -+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); - - reload_tss(vcpu); - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index c7df5c4..55af4b6 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -8898,9 +8898,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ -- x86_spec_ctrl_set_guest(vmx->spec_ctrl); -+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - - vmx->__launched = vmx->loaded_vmcs->launched; -+ - asm( - /* Store host registers */ - "push %%" _ASM_DX "; push %%" _ASM_BP ";" -@@ -9036,7 +9037,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - -- x86_spec_ctrl_restore_host(vmx->spec_ctrl); -+ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-retpoline-entry-Convert-entry-assembler-indirect.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-retpoline-entry-Convert-entry-assembler-indirect.patch deleted file mode 100644 index db94d3d8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-retpoline-entry-Convert-entry-assembler-indirect.patch +++ /dev/null @@ -1,122 +0,0 @@ -From a2073819181d22ec2197b919f5f5d3a7305dd5c0 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:28 +0000 -Subject: [PATCH 069/103] x86/retpoline/entry: Convert entry assembler indirect - jumps - -commit 2641f08bb7fc63a636a2b18173221d7040a3512e upstream. - -Convert indirect jumps in core 32/64bit entry assembler code to use -non-speculative sequences when CONFIG_RETPOLINE is enabled. - -Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return -address after the 'call' instruction must be *precisely* at the -.Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work, -and the use of alternatives will mess that up unless we play horrid -games to prepend with NOPs and make the variants the same length. It's -not worth it; in the case where we ALTERNATIVE out the retpoline, the -first instruction at __x86.indirect_thunk.rax is going to be a bare -jmp *%rax anyway. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Ingo Molnar <mingo@kernel.org> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_32.S | 5 +++-- - arch/x86/entry/entry_64.S | 10 ++++++++-- - 2 files changed, 11 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S -index edba860..7b95f35 100644 ---- a/arch/x86/entry/entry_32.S -+++ b/arch/x86/entry/entry_32.S -@@ -45,6 +45,7 @@ - #include <asm/asm.h> - #include <asm/smap.h> - #include <asm/export.h> -+#include <asm/nospec-branch.h> - - .section .entry.text, "ax" - -@@ -260,7 +261,7 @@ ENTRY(ret_from_fork) - - /* kernel thread */ - 1: movl %edi, %eax -- call *%ebx -+ CALL_NOSPEC %ebx - /* - * A kernel thread is allowed to return here after successfully - * calling do_execve(). Exit to userspace to complete the execve() -@@ -1062,7 +1063,7 @@ error_code: - movl %ecx, %es - TRACE_IRQS_OFF - movl %esp, %eax # pt_regs pointer -- call *%edi -+ CALL_NOSPEC %edi - jmp ret_from_exception - END(page_fault) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 5bb9b02..f7ebaa1 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -37,6 +37,7 @@ - #include <asm/pgtable_types.h> - #include <asm/export.h> - #include <asm/kaiser.h> -+#include <asm/nospec-branch.h> - #include <linux/err.h> - - /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ -@@ -208,7 +209,12 @@ entry_SYSCALL_64_fastpath: - * It might end up jumping to the slow path. If it jumps, RAX - * and all argument registers are clobbered. - */ -+#ifdef CONFIG_RETPOLINE -+ movq sys_call_table(, %rax, 8), %rax -+ call __x86_indirect_thunk_rax -+#else - call *sys_call_table(, %rax, 8) -+#endif - .Lentry_SYSCALL_64_after_fastpath_call: - - movq %rax, RAX(%rsp) -@@ -380,7 +386,7 @@ ENTRY(stub_ptregs_64) - jmp entry_SYSCALL64_slow_path - - 1: -- jmp *%rax /* Called from C */ -+ JMP_NOSPEC %rax /* Called from C */ - END(stub_ptregs_64) - - .macro ptregs_stub func -@@ -457,7 +463,7 @@ ENTRY(ret_from_fork) - 1: - /* kernel thread */ - movq %r12, %rdi -- call *%rbx -+ CALL_NOSPEC %rbx - /* - * A kernel thread is allowed to return here after successfully - * calling do_execve(). Exit to userspace to complete the execve() --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch deleted file mode 100644 index a05a7c2d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch +++ /dev/null @@ -1,94 +0,0 @@ -From d9839707e5f87bc4b33d634d0d53b38acd3e22de Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:29 +0000 -Subject: [PATCH 070/103] x86/retpoline/ftrace: Convert ftrace assembler - indirect jumps - -commit 9351803bd803cdbeb9b5a7850b7b6f464806e3db upstream. - -Convert all indirect jumps in ftrace assembler code to use non-speculative -sequences when CONFIG_RETPOLINE is enabled. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_32.S | 5 +++-- - arch/x86/kernel/mcount_64.S | 7 ++++--- - 2 files changed, 7 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S -index 7b95f35..bdc9aea 100644 ---- a/arch/x86/entry/entry_32.S -+++ b/arch/x86/entry/entry_32.S -@@ -985,7 +985,8 @@ trace: - movl 0x4(%ebp), %edx - subl $MCOUNT_INSN_SIZE, %eax - -- call *ftrace_trace_function -+ movl ftrace_trace_function, %ecx -+ CALL_NOSPEC %ecx - - popl %edx - popl %ecx -@@ -1021,7 +1022,7 @@ return_to_handler: - movl %eax, %ecx - popl %edx - popl %eax -- jmp *%ecx -+ JMP_NOSPEC %ecx - #endif - - #ifdef CONFIG_TRACING -diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S -index 7b0d3da..287ec3b 100644 ---- a/arch/x86/kernel/mcount_64.S -+++ b/arch/x86/kernel/mcount_64.S -@@ -8,7 +8,7 @@ - #include <asm/ptrace.h> - #include <asm/ftrace.h> - #include <asm/export.h> -- -+#include <asm/nospec-branch.h> - - .code64 - .section .entry.text, "ax" -@@ -290,8 +290,9 @@ trace: - * ip and parent ip are used and the list function is called when - * function tracing is enabled. - */ -- call *ftrace_trace_function - -+ movq ftrace_trace_function, %r8 -+ CALL_NOSPEC %r8 - restore_mcount_regs - - jmp fgraph_trace -@@ -334,5 +335,5 @@ GLOBAL(return_to_handler) - movq 8(%rsp), %rdx - movq (%rsp), %rax - addq $24, %rsp -- jmp *%rdi -+ JMP_NOSPEC %rdi - #endif --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-speculation-Add-virtualized-speculative-store-by.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-speculation-Add-virtualized-speculative-store-by.patch deleted file mode 100644 index f7f668b1..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-speculation-Add-virtualized-speculative-store-by.patch +++ /dev/null @@ -1,104 +0,0 @@ -From fa6ec76841319858ad2046107420a63feda4a0bb Mon Sep 17 00:00:00 2001 -From: Tom Lendacky <thomas.lendacky@amd.com> -Date: Thu, 17 May 2018 17:09:18 +0200 -Subject: [PATCH 70/93] x86/speculation: Add virtualized speculative store - bypass disable support - -commit 11fb0683493b2da112cd64c9dada221b52463bf7 upstream - -Some AMD processors only support a non-architectural means of enabling -speculative store bypass disable (SSBD). To allow a simplified view of -this to a guest, an architectural definition has been created through a new -CPUID bit, 0x80000008_EBX[25], and a new MSR, 0xc001011f. With this, a -hypervisor can virtualize the existence of this definition and provide an -architectural method for using SSBD to a guest. - -Add the new CPUID feature, the new MSR and update the existing SSBD -support to use this MSR when present. - -Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/msr-index.h | 2 ++ - arch/x86/kernel/cpu/bugs.c | 4 +++- - arch/x86/kernel/process.c | 13 ++++++++++++- - 4 files changed, 18 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index ec87b8c..c278f27 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -274,6 +274,7 @@ - #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ - #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ - #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ -+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ - - /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ - #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index 0145a0b..ad5d0d8 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -323,6 +323,8 @@ - #define MSR_AMD64_IBSOPDATA4 0xc001103d - #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ - -+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f -+ - /* Fam 17h MSRs */ - #define MSR_F17H_IRPERF 0xc00000e9 - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index d00e246..97987b5 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -204,7 +204,9 @@ static void x86_amd_ssb_disable(void) - { - u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - -- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) -+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) -+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); -+ else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - wrmsrl(MSR_AMD64_LS_CFG, msrval); - } - -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 1e9d155..6d9e1ee 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -312,6 +312,15 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn) - } - #endif - -+static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) -+{ -+ /* -+ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, -+ * so ssbd_tif_to_spec_ctrl() just works. -+ */ -+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); -+} -+ - static __always_inline void intel_set_ssb_state(unsigned long tifn) - { - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); -@@ -321,7 +330,9 @@ static __always_inline void intel_set_ssb_state(unsigned long tifn) - - static __always_inline void __speculative_store_bypass_update(unsigned long tifn) - { -- if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) -+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) -+ amd_set_ssb_virt_state(tifn); -+ else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - intel_set_ssb_state(tifn); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch deleted file mode 100644 index e5f0d609..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch +++ /dev/null @@ -1,79 +0,0 @@ -From f7a6ab11ff7949605f3be5cc96d8c44e16b8ed9e Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:30 +0000 -Subject: [PATCH 071/103] x86/retpoline/hyperv: Convert assembler indirect - jumps - -commit e70e5892b28c18f517f29ab6e83bd57705104b31 upstream. - -Convert all indirect jumps in hyperv inline asm code to use non-speculative -sequences when CONFIG_RETPOLINE is enabled. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.uk -[ backport to 4.9, hopefully correct, not tested... - gregkh ] -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - drivers/hv/hv.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c -index e0a8216..13c32eb4 100644 ---- a/drivers/hv/hv.c -+++ b/drivers/hv/hv.c -@@ -31,6 +31,7 @@ - #include <linux/clockchips.h> - #include <asm/hyperv.h> - #include <asm/mshyperv.h> -+#include <asm/nospec-branch.h> - #include "hyperv_vmbus.h" - - /* The one and only */ -@@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) - return (u64)ULLONG_MAX; - - __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); -- __asm__ __volatile__("call *%3" : "=a" (hv_status) : -+ __asm__ __volatile__(CALL_NOSPEC : -+ "=a" (hv_status) : - "c" (control), "d" (input_address), -- "m" (hypercall_page)); -+ THUNK_TARGET(hypercall_page)); - - return hv_status; - -@@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) - if (!hypercall_page) - return (u64)ULLONG_MAX; - -- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), -+ __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi), - "=a"(hv_status_lo) : "d" (control_hi), - "a" (control_lo), "b" (input_address_hi), - "c" (input_address_lo), "D"(output_address_hi), -- "S"(output_address_lo), "m" (hypercall_page)); -+ "S"(output_address_lo), -+ THUNK_TARGET(hypercall_page)); - - return hv_status_lo | ((u64)hv_status_hi << 32); - #endif /* !x86_64 */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-speculation-Rework-speculative_store_bypass_upda.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-speculation-Rework-speculative_store_bypass_upda.patch deleted file mode 100644 index daf64371..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-speculation-Rework-speculative_store_bypass_upda.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 10bd199ba2af68b40deb854851b3db51bd97531a Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 10 May 2018 20:31:44 +0200 -Subject: [PATCH 71/93] x86/speculation: Rework - speculative_store_bypass_update() - -commit 0270be3e34efb05a88bc4c422572ece038ef3608 upstream - -The upcoming support for the virtual SPEC_CTRL MSR on AMD needs to reuse -speculative_store_bypass_update() to avoid code duplication. Add an -argument for supplying a thread info (TIF) value and create a wrapper -speculative_store_bypass_update_current() which is used at the existing -call site. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/spec-ctrl.h | 7 ++++++- - arch/x86/kernel/cpu/bugs.c | 2 +- - arch/x86/kernel/process.c | 4 ++-- - 3 files changed, 9 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index 6e28740..82b6c5a 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -42,6 +42,11 @@ extern void speculative_store_bypass_ht_init(void); - static inline void speculative_store_bypass_ht_init(void) { } - #endif - --extern void speculative_store_bypass_update(void); -+extern void speculative_store_bypass_update(unsigned long tif); -+ -+static inline void speculative_store_bypass_update_current(void) -+{ -+ speculative_store_bypass_update(current_thread_info()->flags); -+} - - #endif -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 97987b5..eddbdc8 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -597,7 +597,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) - * mitigation until it is next scheduled. - */ - if (task == current && update) -- speculative_store_bypass_update(); -+ speculative_store_bypass_update_current(); - - return 0; - } -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 6d9e1ee..00a9047 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -338,10 +338,10 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn - intel_set_ssb_state(tifn); - } - --void speculative_store_bypass_update(void) -+void speculative_store_bypass_update(unsigned long tif) - { - preempt_disable(); -- __speculative_store_bypass_update(current_thread_info()->flags); -+ __speculative_store_bypass_update(tif); - preempt_enable(); - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch deleted file mode 100644 index e3e0a67d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch +++ /dev/null @@ -1,145 +0,0 @@ -From f30cba1d35ebb9a07ebd54253086280080b366a6 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Sat, 12 May 2018 00:14:51 +0200 -Subject: [PATCH 72/93] x86/bugs: Unify x86_spec_ctrl_{set_guest,restore_host} - -commit cc69b34989210f067b2c51d5539b5f96ebcc3a01 upstream - -Function bodies are very similar and are going to grow more almost -identical code. Add a bool arg to determine whether SPEC_CTRL is being set -for the guest or restored to the host. - -No functional changes. - -Signed-off-by: Borislav Petkov <bp@suse.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/spec-ctrl.h | 33 +++++++++++++++++++--- - arch/x86/kernel/cpu/bugs.c | 60 ++++++++++------------------------------ - 2 files changed, 44 insertions(+), 49 deletions(-) - -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index 82b6c5a..9cecbe5 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -13,10 +13,35 @@ - * Takes the guest view of SPEC_CTRL MSR as a parameter and also - * the guest's version of VIRT_SPEC_CTRL, if emulated. - */ --extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, -- u64 guest_virt_spec_ctrl); --extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, -- u64 guest_virt_spec_ctrl); -+extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); -+ -+/** -+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest -+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL -+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL -+ * (may get translated to MSR_AMD64_LS_CFG bits) -+ * -+ * Avoids writing to the MSR if the content/bits are the same -+ */ -+static inline -+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) -+{ -+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); -+} -+ -+/** -+ * x86_spec_ctrl_restore_host - Restore host speculation control registers -+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL -+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL -+ * (may get translated to MSR_AMD64_LS_CFG bits) -+ * -+ * Avoids writing to the MSR if the content/bits are the same -+ */ -+static inline -+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) -+{ -+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); -+} - - /* AMD specific Speculative Store Bypass MSR data */ - extern u64 x86_amd_ls_cfg_base; -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index eddbdc8..9203150 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -150,55 +150,25 @@ u64 x86_spec_ctrl_get_default(void) - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); - --/** -- * x86_spec_ctrl_set_guest - Set speculation control registers for the guest -- * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL -- * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL -- * (may get translated to MSR_AMD64_LS_CFG bits) -- * -- * Avoids writing to the MSR if the content/bits are the same -- */ --void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) -+void -+x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) - { -- u64 host = x86_spec_ctrl_base; -+ struct thread_info *ti = current_thread_info(); -+ u64 msr, host = x86_spec_ctrl_base; - - /* Is MSR_SPEC_CTRL implemented ? */ -- if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) -- return; -- -- /* SSBD controlled in MSR_SPEC_CTRL */ -- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) -- host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); -- -- if (host != guest_spec_ctrl) -- wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); --} --EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); -- --/** -- * x86_spec_ctrl_restore_host - Restore host speculation control registers -- * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL -- * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL -- * (may get translated to MSR_AMD64_LS_CFG bits) -- * -- * Avoids writing to the MSR if the content/bits are the same -- */ --void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) --{ -- u64 host = x86_spec_ctrl_base; -- -- /* Is MSR_SPEC_CTRL implemented ? */ -- if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) -- return; -- -- /* SSBD controlled in MSR_SPEC_CTRL */ -- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) -- host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); -- -- if (host != guest_spec_ctrl) -- wrmsrl(MSR_IA32_SPEC_CTRL, host); -+ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { -+ /* SSBD controlled in MSR_SPEC_CTRL */ -+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) -+ host |= ssbd_tif_to_spec_ctrl(ti->flags); -+ -+ if (host != guest_spec_ctrl) { -+ msr = setguest ? guest_spec_ctrl : host; -+ wrmsrl(MSR_IA32_SPEC_CTRL, msr); -+ } -+ } - } --EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); -+EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); - - static void x86_amd_ssb_disable(void) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch deleted file mode 100644 index a800dc71..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 88c3cee0956e7967097e40de0be79898d9faa7fe Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:31 +0000 -Subject: [PATCH 072/103] x86/retpoline/xen: Convert Xen hypercall indirect - jumps - -commit ea08816d5b185ab3d09e95e393f265af54560350 upstream. - -Convert indirect call in Xen hypercall to use non-speculative sequence, -when CONFIG_RETPOLINE is enabled. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Acked-by: Ingo Molnar <mingo@kernel.org> -Reviewed-by: Juergen Gross <jgross@suse.com> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/xen/hypercall.h | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h -index a12a047..8b1f91f 100644 ---- a/arch/x86/include/asm/xen/hypercall.h -+++ b/arch/x86/include/asm/xen/hypercall.h -@@ -43,6 +43,7 @@ - - #include <asm/page.h> - #include <asm/pgtable.h> -+#include <asm/nospec-branch.h> - - #include <xen/interface/xen.h> - #include <xen/interface/sched.h> -@@ -214,9 +215,9 @@ privcmd_call(unsigned call, - __HYPERCALL_DECLS; - __HYPERCALL_5ARG(a1, a2, a3, a4, a5); - -- asm volatile("call *%[call]" -+ asm volatile(CALL_NOSPEC - : __HYPERCALL_5PARAM -- : [call] "a" (&hypercall_page[call]) -+ : [thunk_target] "a" (&hypercall_page[call]) - : __HYPERCALL_CLOBBER5); - - return (long)__res; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch deleted file mode 100644 index 49224dbb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 22a75daea25a170892d8c6cbf0b740ef35219cc8 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat, 12 May 2018 20:49:16 +0200 -Subject: [PATCH 73/93] x86/bugs: Expose x86_spec_ctrl_base directly - -commit fa8ac4988249c38476f6ad678a4848a736373403 upstream - -x86_spec_ctrl_base is the system wide default value for the SPEC_CTRL MSR. -x86_spec_ctrl_get_default() returns x86_spec_ctrl_base and was intended to -prevent modification to that variable. Though the variable is read only -after init and globaly visible already. - -Remove the function and export the variable instead. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 16 +++++----------- - arch/x86/include/asm/spec-ctrl.h | 3 --- - arch/x86/kernel/cpu/bugs.c | 11 +---------- - 3 files changed, 6 insertions(+), 24 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index bc258e6..8d9deec 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -217,16 +217,7 @@ enum spectre_v2_mitigation { - SPECTRE_V2_IBRS, - }; - --/* -- * The Intel specification for the SPEC_CTRL MSR requires that we -- * preserve any already set reserved bits at boot time (e.g. for -- * future additions that this kernel is not currently aware of). -- * We then set any additional mitigation bits that we want -- * ourselves and always use this as the base for SPEC_CTRL. -- * We also use this when handling guest entry/exit as below. -- */ - extern void x86_spec_ctrl_set(u64); --extern u64 x86_spec_ctrl_get_default(void); - - /* The Speculative Store Bypass disable variants */ - enum ssb_mitigation { -@@ -278,6 +269,9 @@ static inline void indirect_branch_prediction_barrier(void) - alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); - } - -+/* The Intel SPEC CTRL MSR base value cache */ -+extern u64 x86_spec_ctrl_base; -+ - /* - * With retpoline, we must use IBRS to restrict branch prediction - * before calling into firmware. -@@ -286,7 +280,7 @@ static inline void indirect_branch_prediction_barrier(void) - */ - #define firmware_restrict_branch_speculation_start() \ - do { \ -- u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ -+ u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ - preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ -@@ -295,7 +289,7 @@ do { \ - - #define firmware_restrict_branch_speculation_end() \ - do { \ -- u64 val = x86_spec_ctrl_get_default(); \ -+ u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ - X86_FEATURE_USE_IBRS_FW); \ -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index 9cecbe5..763d497 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -47,9 +47,6 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) - extern u64 x86_amd_ls_cfg_base; - extern u64 x86_amd_ls_cfg_ssbd_mask; - --/* The Intel SPEC CTRL MSR base value cache */ --extern u64 x86_spec_ctrl_base; -- - static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) - { - BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 9203150..47b7f4f 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -35,6 +35,7 @@ static void __init ssb_select_mitigation(void); - * writes to SPEC_CTRL contain whatever reserved bits have been set. - */ - u64 __ro_after_init x86_spec_ctrl_base; -+EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); - - /* - * The vendor and possibly platform specific bits which can be modified in -@@ -140,16 +141,6 @@ void x86_spec_ctrl_set(u64 val) - } - EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); - --u64 x86_spec_ctrl_get_default(void) --{ -- u64 msrval = x86_spec_ctrl_base; -- -- if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) -- msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); -- return msrval; --} --EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -- - void - x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-retpoline-checksum32-Convert-assembler-indirect-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-retpoline-checksum32-Convert-assembler-indirect-.patch deleted file mode 100644 index 6c76a686..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-retpoline-checksum32-Convert-assembler-indirect-.patch +++ /dev/null @@ -1,70 +0,0 @@ -From bb4bbea7e37e7dd35989dcfb07b760324ec0ef15 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Thu, 11 Jan 2018 21:46:32 +0000 -Subject: [PATCH 073/103] x86/retpoline/checksum32: Convert assembler indirect - jumps - -commit 5096732f6f695001fa2d6f1335a2680b37912c69 upstream. - -Convert all indirect jumps in 32bit checksum assembler code to use -non-speculative sequences when CONFIG_RETPOLINE is enabled. - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/lib/checksum_32.S | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S -index 4d34bb5..46e71a7 100644 ---- a/arch/x86/lib/checksum_32.S -+++ b/arch/x86/lib/checksum_32.S -@@ -29,7 +29,8 @@ - #include <asm/errno.h> - #include <asm/asm.h> - #include <asm/export.h> -- -+#include <asm/nospec-branch.h> -+ - /* - * computes a partial checksum, e.g. for TCP/UDP fragments - */ -@@ -156,7 +157,7 @@ ENTRY(csum_partial) - negl %ebx - lea 45f(%ebx,%ebx,2), %ebx - testl %esi, %esi -- jmp *%ebx -+ JMP_NOSPEC %ebx - - # Handle 2-byte-aligned regions - 20: addw (%esi), %ax -@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) - andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx - testl %esi, %esi -- jmp *%ebx -+ JMP_NOSPEC %ebx - 1: addl $64,%esi - addl $64,%edi - SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-bugs-Remove-x86_spec_ctrl_set.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-bugs-Remove-x86_spec_ctrl_set.patch deleted file mode 100644 index 40bf45d2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-bugs-Remove-x86_spec_ctrl_set.patch +++ /dev/null @@ -1,76 +0,0 @@ -From ac97f3ffd444941e88a86ea4cd8033b686ab9170 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat, 12 May 2018 20:53:14 +0200 -Subject: [PATCH 74/93] x86/bugs: Remove x86_spec_ctrl_set() - -commit 4b59bdb569453a60b752b274ca61f009e37f4dae upstream - -x86_spec_ctrl_set() is only used in bugs.c and the extra mask checks there -provide no real value as both call sites can just write x86_spec_ctrl_base -to MSR_SPEC_CTRL. x86_spec_ctrl_base is valid and does not need any extra -masking or checking. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 2 -- - arch/x86/kernel/cpu/bugs.c | 13 ++----------- - 2 files changed, 2 insertions(+), 13 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 8d9deec..8b38df9 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -217,8 +217,6 @@ enum spectre_v2_mitigation { - SPECTRE_V2_IBRS, - }; - --extern void x86_spec_ctrl_set(u64); -- - /* The Speculative Store Bypass disable variants */ - enum ssb_mitigation { - SPEC_STORE_BYPASS_NONE, -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 47b7f4f..82a99d0 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -132,15 +132,6 @@ static const char *spectre_v2_strings[] = { - static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - --void x86_spec_ctrl_set(u64 val) --{ -- if (val & x86_spec_ctrl_mask) -- WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); -- else -- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); --} --EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); -- - void - x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) - { -@@ -502,7 +493,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) - case X86_VENDOR_INTEL: - x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; -- x86_spec_ctrl_set(SPEC_CTRL_SSBD); -+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; - case X86_VENDOR_AMD: - x86_amd_ssb_disable(); -@@ -614,7 +605,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) - void x86_spec_ctrl_setup_ap(void) - { - if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) -- x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); -+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - - if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) - x86_amd_ssb_disable(); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch deleted file mode 100644 index c51d04bc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 2b525f0d85795152c41980384a7190c8c16d80d9 Mon Sep 17 00:00:00 2001 -From: Andi Kleen <ak@linux.intel.com> -Date: Thu, 11 Jan 2018 21:46:33 +0000 -Subject: [PATCH 074/103] x86/retpoline/irq32: Convert assembler indirect jumps - -commit 7614e913db1f40fff819b36216484dc3808995d4 upstream. - -Convert all indirect jumps in 32bit irq inline asm code to use non -speculative sequences. - -Signed-off-by: Andi Kleen <ak@linux.intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Acked-by: Ingo Molnar <mingo@kernel.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515707194-20531-12-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/irq_32.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c -index d4eb450..2763573 100644 ---- a/arch/x86/kernel/irq_32.c -+++ b/arch/x86/kernel/irq_32.c -@@ -19,6 +19,7 @@ - #include <linux/mm.h> - - #include <asm/apic.h> -+#include <asm/nospec-branch.h> - - #ifdef CONFIG_DEBUG_STACKOVERFLOW - -@@ -54,11 +55,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); - static void call_on_stack(void *func, void *stack) - { - asm volatile("xchgl %%ebx,%%esp \n" -- "call *%%edi \n" -+ CALL_NOSPEC - "movl %%ebx,%%esp \n" - : "=b" (stack) - : "0" (stack), -- "D"(func) -+ [thunk_target] "D"(func) - : "memory", "cc", "edx", "ecx", "eax"); - } - -@@ -94,11 +95,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) - call_on_stack(print_stack_overflow, isp); - - asm volatile("xchgl %%ebx,%%esp \n" -- "call *%%edi \n" -+ CALL_NOSPEC - "movl %%ebx,%%esp \n" - : "=a" (arg1), "=b" (isp) - : "0" (desc), "1" (isp), -- "D" (desc->handle_irq) -+ [thunk_target] "D" (desc->handle_irq) - : "memory", "cc", "ecx"); - return 1; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch deleted file mode 100644 index 27bd0430..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 96c9747df6b51ecfe781ba6c09ded9f406d20093 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat, 12 May 2018 20:10:00 +0200 -Subject: [PATCH 75/93] x86/bugs: Rework spec_ctrl base and mask logic - -commit be6fcb5478e95bb1c91f489121238deb3abca46a upstream - -x86_spec_ctrL_mask is intended to mask out bits from a MSR_SPEC_CTRL value -which are not to be modified. However the implementation is not really used -and the bitmask was inverted to make a check easier, which was removed in -"x86/bugs: Remove x86_spec_ctrl_set()" - -Aside of that it is missing the STIBP bit if it is supported by the -platform, so if the mask would be used in x86_virt_spec_ctrl() then it -would prevent a guest from setting STIBP. - -Add the STIBP bit if supported and use the mask in x86_virt_spec_ctrl() to -sanitize the value which is supplied by the guest. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++------- - 1 file changed, 19 insertions(+), 7 deletions(-) - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 82a99d0..2ae3586 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. - */ --static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; -+static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; - - /* - * AMD specific MSR info for Speculative Store Bypass control. -@@ -67,6 +67,10 @@ void __init check_bugs(void) - if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - -+ /* Allow STIBP in MSR_SPEC_CTRL if supported */ -+ if (boot_cpu_has(X86_FEATURE_STIBP)) -+ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; -+ - /* Select the proper spectre mitigation before patching alternatives */ - spectre_v2_select_mitigation(); - -@@ -135,18 +139,26 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - void - x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) - { -+ u64 msrval, guestval, hostval = x86_spec_ctrl_base; - struct thread_info *ti = current_thread_info(); -- u64 msr, host = x86_spec_ctrl_base; - - /* Is MSR_SPEC_CTRL implemented ? */ - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { -+ /* -+ * Restrict guest_spec_ctrl to supported values. Clear the -+ * modifiable bits in the host base value and or the -+ * modifiable bits from the guest value. -+ */ -+ guestval = hostval & ~x86_spec_ctrl_mask; -+ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; -+ - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) -- host |= ssbd_tif_to_spec_ctrl(ti->flags); -+ hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - -- if (host != guest_spec_ctrl) { -- msr = setguest ? guest_spec_ctrl : host; -- wrmsrl(MSR_IA32_SPEC_CTRL, msr); -+ if (hostval != guestval) { -+ msrval = setguest ? guestval : hostval; -+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval); - } - } - } -@@ -492,7 +504,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - x86_spec_ctrl_base |= SPEC_CTRL_SSBD; -- x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; -+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; - case X86_VENDOR_AMD: --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch deleted file mode 100644 index 40c846fc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch +++ /dev/null @@ -1,195 +0,0 @@ -From 718349fea3d22ecec829ef448f45f6eab4e5e2fa Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Fri, 12 Jan 2018 11:11:27 +0000 -Subject: [PATCH 075/103] x86/retpoline: Fill return stack buffer on vmexit - -commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream. - -In accordance with the Intel and AMD documentation, we need to overwrite -all entries in the RSB on exiting a guest, to prevent malicious branch -target predictions from affecting the host kernel. This is needed both -for retpoline and for IBRS. - -[ak: numbers again for the RSB stuffing labels] - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 78 +++++++++++++++++++++++++++++++++++- - arch/x86/kvm/svm.c | 4 ++ - arch/x86/kvm/vmx.c | 4 ++ - 3 files changed, 85 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index ea034fa..402a11c 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -7,6 +7,48 @@ - #include <asm/alternative-asm.h> - #include <asm/cpufeatures.h> - -+/* -+ * Fill the CPU return stack buffer. -+ * -+ * Each entry in the RSB, if used for a speculative 'ret', contains an -+ * infinite 'pause; jmp' loop to capture speculative execution. -+ * -+ * This is required in various cases for retpoline and IBRS-based -+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to -+ * eliminate potentially bogus entries from the RSB, and sometimes -+ * purely to ensure that it doesn't get empty, which on some CPUs would -+ * allow predictions from other (unwanted!) sources to be used. -+ * -+ * We define a CPP macro such that it can be used from both .S files and -+ * inline assembly. It's possible to do a .macro and then include that -+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. -+ */ -+ -+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -+#define RSB_FILL_LOOPS 16 /* To avoid underflow */ -+ -+/* -+ * Google experimented with loop-unrolling and this turned out to be -+ * the optimal version — two calls, each with their own speculation -+ * trap should their return address end up getting used, in a loop. -+ */ -+#define __FILL_RETURN_BUFFER(reg, nr, sp) \ -+ mov $(nr/2), reg; \ -+771: \ -+ call 772f; \ -+773: /* speculation trap */ \ -+ pause; \ -+ jmp 773b; \ -+772: \ -+ call 774f; \ -+775: /* speculation trap */ \ -+ pause; \ -+ jmp 775b; \ -+774: \ -+ dec reg; \ -+ jnz 771b; \ -+ add $(BITS_PER_LONG/8) * nr, sp; -+ - #ifdef __ASSEMBLY__ - - /* -@@ -76,6 +118,20 @@ - #endif - .endm - -+ /* -+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP -+ * monstrosity above, manually. -+ */ -+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -+#ifdef CONFIG_RETPOLINE -+ ANNOTATE_NOSPEC_ALTERNATIVE -+ ALTERNATIVE "jmp .Lskip_rsb_\@", \ -+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ -+ \ftr -+.Lskip_rsb_\@: -+#endif -+.endm -+ - #else /* __ASSEMBLY__ */ - - #define ANNOTATE_NOSPEC_ALTERNATIVE \ -@@ -119,7 +175,7 @@ - X86_FEATURE_RETPOLINE) - - # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) --#else /* No retpoline */ -+#else /* No retpoline for C / inline asm */ - # define CALL_NOSPEC "call *%[thunk_target]\n" - # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) - #endif -@@ -134,5 +190,25 @@ enum spectre_v2_mitigation { - SPECTRE_V2_IBRS, - }; - -+/* -+ * On VMEXIT we must ensure that no RSB predictions learned in the guest -+ * can be followed in the host, by overwriting the RSB completely. Both -+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future -+ * CPUs with IBRS_ATT *might* it be avoided. -+ */ -+static inline void vmexit_fill_RSB(void) -+{ -+#ifdef CONFIG_RETPOLINE -+ unsigned long loops = RSB_CLEAR_LOOPS / 2; -+ -+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE -+ ALTERNATIVE("jmp 910f", -+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), -+ X86_FEATURE_RETPOLINE) -+ "910:" -+ : "=&r" (loops), ASM_CALL_CONSTRAINT -+ : "r" (loops) : "memory" ); -+#endif -+} - #endif /* __ASSEMBLY__ */ - #endif /* __NOSPEC_BRANCH_H__ */ -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 8ca1eca..975ea99 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -44,6 +44,7 @@ - #include <asm/debugreg.h> - #include <asm/kvm_para.h> - #include <asm/irq_remapping.h> -+#include <asm/nospec-branch.h> - - #include <asm/virtext.h> - #include "trace.h" -@@ -4886,6 +4887,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - #endif - ); - -+ /* Eliminate branch target predictions from guest mode */ -+ vmexit_fill_RSB(); -+ - #ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); - #else -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 69b8f8a..4ead27f 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -48,6 +48,7 @@ - #include <asm/kexec.h> - #include <asm/apic.h> - #include <asm/irq_remapping.h> -+#include <asm/nospec-branch.h> - - #include "trace.h" - #include "pmu.h" -@@ -8989,6 +8990,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - #endif - ); - -+ /* Eliminate branch target predictions from guest mode */ -+ vmexit_fill_RSB(); -+ - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (debugctlmsr) - update_debugctlmsr(debugctlmsr); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-retpoline-Remove-compile-time-warning.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-retpoline-Remove-compile-time-warning.patch deleted file mode 100644 index 31eeb054..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-retpoline-Remove-compile-time-warning.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0f71f50c7d1e47adced3237b77f0edcd25b92c0c Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 14 Jan 2018 22:13:29 +0100 -Subject: [PATCH 076/103] x86/retpoline: Remove compile time warning - -commit b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 upstream. - -Remove the compile time warning when CONFIG_RETPOLINE=y and the compiler -does not have retpoline support. Linus rationale for this is: - - It's wrong because it will just make people turn off RETPOLINE, and the - asm updates - and return stack clearing - that are independent of the - compiler are likely the most important parts because they are likely the - ones easiest to target. - - And it's annoying because most people won't be able to do anything about - it. The number of people building their own compiler? Very small. So if - their distro hasn't got a compiler yet (and pretty much nobody does), the - warning is just annoying crap. - - It is already properly reported as part of the sysfs interface. The - compile-time warning only encourages bad things. - -Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") -Requested-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: Peter Zijlstra (Intel) <peterz@infradead.org> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Link: https://lkml.kernel.org/r/CA+55aFzWgquv4i6Mab6bASqYXg3ErV3XDFEYf=GEcCDQg5uAtw@mail.gmail.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/Makefile | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/arch/x86/Makefile b/arch/x86/Makefile -index 1e1a733..cd22cb8 100644 ---- a/arch/x86/Makefile -+++ b/arch/x86/Makefile -@@ -187,8 +187,6 @@ ifdef CONFIG_RETPOLINE - RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) - ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE -- else -- $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) - endif - endif - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch deleted file mode 100644 index d7ddca7e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch +++ /dev/null @@ -1,84 +0,0 @@ -From d63bb88a1ae9c702ddf7477b0e96be1fc20f8d28 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 10 May 2018 20:42:48 +0200 -Subject: [PATCH 76/93] x86/speculation, KVM: Implement support for - VIRT_SPEC_CTRL/LS_CFG - -commit 47c61b3955cf712cadfc25635bf9bc174af030ea upstream - -Add the necessary logic for supporting the emulated VIRT_SPEC_CTRL MSR to -x86_virt_spec_ctrl(). If either X86_FEATURE_LS_CFG_SSBD or -X86_FEATURE_VIRT_SPEC_CTRL is set then use the new guest_virt_spec_ctrl -argument to check whether the state must be modified on the host. The -update reuses speculative_store_bypass_update() so the ZEN-specific sibling -coordination can be reused. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/spec-ctrl.h | 6 ++++++ - arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++++ - 2 files changed, 36 insertions(+) - -diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h -index 763d497..ae7c2c5 100644 ---- a/arch/x86/include/asm/spec-ctrl.h -+++ b/arch/x86/include/asm/spec-ctrl.h -@@ -53,6 +53,12 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) - return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); - } - -+static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) -+{ -+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); -+ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); -+} -+ - static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) - { - return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 2ae3586..86af9b1 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -161,6 +161,36 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); - } - } -+ -+ /* -+ * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update -+ * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. -+ */ -+ if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && -+ !static_cpu_has(X86_FEATURE_VIRT_SSBD)) -+ return; -+ -+ /* -+ * If the host has SSBD mitigation enabled, force it in the host's -+ * virtual MSR value. If its not permanently enabled, evaluate -+ * current's TIF_SSBD thread flag. -+ */ -+ if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) -+ hostval = SPEC_CTRL_SSBD; -+ else -+ hostval = ssbd_tif_to_spec_ctrl(ti->flags); -+ -+ /* Sanitize the guest value */ -+ guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; -+ -+ if (hostval != guestval) { -+ unsigned long tif; -+ -+ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : -+ ssbd_spec_ctrl_to_tif(hostval); -+ -+ speculative_store_bypass_update(tif); -+ } - } - EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch deleted file mode 100644 index de5ae0c2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch +++ /dev/null @@ -1,241 +0,0 @@ -From 708128a64a6b750b63a5f1ca1e943c48023145b9 Mon Sep 17 00:00:00 2001 -From: Tom Lendacky <thomas.lendacky@amd.com> -Date: Thu, 10 May 2018 22:06:39 +0200 -Subject: [PATCH 77/93] KVM: SVM: Implement VIRT_SPEC_CTRL support for SSBD - -commit bc226f07dcd3c9ef0b7f6236fe356ea4a9cb4769 upstream - -Expose the new virtualized architectural mechanism, VIRT_SSBD, for using -speculative store bypass disable (SSBD) under SVM. This will allow guests -to use SSBD on hardware that uses non-architectural mechanisms for enabling -SSBD. - -[ tglx: Folded the migration fixup from Paolo Bonzini ] - -Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/kvm_host.h | 2 +- - arch/x86/kernel/cpu/common.c | 3 ++- - arch/x86/kvm/cpuid.c | 11 +++++++++-- - arch/x86/kvm/cpuid.h | 9 +++++++++ - arch/x86/kvm/svm.c | 21 +++++++++++++++++++-- - arch/x86/kvm/vmx.c | 18 +++++++++++++++--- - arch/x86/kvm/x86.c | 13 ++++--------- - 7 files changed, 59 insertions(+), 18 deletions(-) - -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 6f6ee68..fd3a854 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -864,7 +864,7 @@ struct kvm_x86_ops { - int (*hardware_setup)(void); /* __init */ - void (*hardware_unsetup)(void); /* __exit */ - bool (*cpu_has_accelerated_tpr)(void); -- bool (*cpu_has_high_real_mode_segbase)(void); -+ bool (*has_emulated_msr)(int index); - void (*cpuid_update)(struct kvm_vcpu *vcpu); - - int (*vm_init)(struct kvm *kvm); -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 945e841..40fc748 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -735,7 +735,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c) - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - -- if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) -+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || -+ cpu_has(c, X86_FEATURE_VIRT_SSBD)) - set_cpu_cap(c, X86_FEATURE_SSBD); - - if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 4ccdfbe..4d3269b 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - - /* cpuid 0x80000008.ebx */ - const u32 kvm_cpuid_8000_0008_ebx_x86_features = -- F(AMD_IBPB) | F(AMD_IBRS); -+ F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); - - /* cpuid 0xC0000001.edx */ - const u32 kvm_cpuid_C000_0001_edx_x86_features = -@@ -618,13 +618,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - g_phys_as = phys_as; - entry->eax = g_phys_as | (virt_as << 8); - entry->edx = 0; -- /* IBRS and IBPB aren't necessarily present in hardware cpuid */ -+ /* -+ * IBRS, IBPB and VIRT_SSBD aren't necessarily present in -+ * hardware cpuid -+ */ - if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) - entry->ebx |= F(AMD_IBPB); - if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) - entry->ebx |= F(AMD_IBRS); -+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) -+ entry->ebx |= F(VIRT_SSBD); - entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; - cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); -+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) -+ entry->ebx |= F(VIRT_SSBD); - break; - } - case 0x80000019: -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index 410070c..d22695c 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -182,6 +182,15 @@ static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) - return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); - } - -+static inline bool guest_cpuid_has_virt_ssbd(struct kvm_vcpu *vcpu) -+{ -+ struct kvm_cpuid_entry2 *best; -+ -+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); -+ return best && (best->ebx & bit(X86_FEATURE_VIRT_SSBD)); -+} -+ -+ - - /* - * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index 481b106..c60d8fc 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -3552,6 +3552,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) - - msr_info->data = svm->spec_ctrl; - break; -+ case MSR_AMD64_VIRT_SPEC_CTRL: -+ if (!msr_info->host_initiated && -+ !guest_cpuid_has_virt_ssbd(vcpu)) -+ return 1; -+ -+ msr_info->data = svm->virt_spec_ctrl; -+ break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; -@@ -3679,6 +3686,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) - break; - set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); - break; -+ case MSR_AMD64_VIRT_SPEC_CTRL: -+ if (!msr->host_initiated && -+ !guest_cpuid_has_virt_ssbd(vcpu)) -+ return 1; -+ -+ if (data & ~SPEC_CTRL_SSBD) -+ return 1; -+ -+ svm->virt_spec_ctrl = data; -+ break; - case MSR_STAR: - svm->vmcb->save.star = data; - break; -@@ -5138,7 +5155,7 @@ static bool svm_cpu_has_accelerated_tpr(void) - return false; - } - --static bool svm_has_high_real_mode_segbase(void) -+static bool svm_has_emulated_msr(int index) - { - return true; - } -@@ -5455,7 +5472,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { - .hardware_enable = svm_hardware_enable, - .hardware_disable = svm_hardware_disable, - .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, -- .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, -+ .has_emulated_msr = svm_has_emulated_msr, - - .vcpu_create = svm_create_vcpu, - .vcpu_free = svm_free_vcpu, -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 55af4b6..7b4739c 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -8673,9 +8673,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) - } - } - --static bool vmx_has_high_real_mode_segbase(void) -+static bool vmx_has_emulated_msr(int index) - { -- return enable_unrestricted_guest || emulate_invalid_guest_state; -+ switch (index) { -+ case MSR_IA32_SMBASE: -+ /* -+ * We cannot do SMM unless we can run the guest in big -+ * real mode. -+ */ -+ return enable_unrestricted_guest || emulate_invalid_guest_state; -+ case MSR_AMD64_VIRT_SPEC_CTRL: -+ /* This is AMD only. */ -+ return false; -+ default: -+ return true; -+ } - } - - static bool vmx_mpx_supported(void) -@@ -11304,7 +11316,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { - .hardware_enable = hardware_enable, - .hardware_disable = hardware_disable, - .cpu_has_accelerated_tpr = report_flexpriority, -- .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, -+ .has_emulated_msr = vmx_has_emulated_msr, - - .vcpu_create = vmx_create_vcpu, - .vcpu_free = vmx_free_vcpu, -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index b27b93d..c531231 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -1002,6 +1002,7 @@ static u32 emulated_msrs[] = { - MSR_IA32_MCG_CTL, - MSR_IA32_MCG_EXT_CTL, - MSR_IA32_SMBASE, -+ MSR_AMD64_VIRT_SPEC_CTRL, - }; - - static unsigned num_emulated_msrs; -@@ -2650,7 +2651,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) - * fringe case that is not enabled except via specific settings - * of the module parameters. - */ -- r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); -+ r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); - break; - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; -@@ -4201,14 +4202,8 @@ static void kvm_init_msr_list(void) - num_msrs_to_save = j; - - for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { -- switch (emulated_msrs[i]) { -- case MSR_IA32_SMBASE: -- if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) -- continue; -- break; -- default: -- break; -- } -+ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) -+ continue; - - if (j < i) - emulated_msrs[j] = emulated_msrs[i]; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch deleted file mode 100644 index 3334f65c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 2a4405e938734d966dc7693f93c93af899f7b4be Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Mon, 15 Jan 2018 11:00:54 -0600 -Subject: [PATCH 077/103] objtool: Fix retpoline support for pre-ORC objtool - -Objtool 1.0 (pre-ORC) produces the following warning when it encounters -a retpoline: - - arch/x86/crypto/camellia-aesni-avx2-asm_64.o: warning: objtool: .altinstr_replacement+0xf: return instruction outside of a callable function - -That warning is meant to catch GCC bugs and missing ENTRY/ENDPROC -annotations, neither of which are applicable to alternatives. Silence -the warning for alternative instructions, just like objtool 2.0 already -does. - -Reported-by: David Woodhouse <dwmw2@infradead.org> -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - tools/objtool/builtin-check.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c -index ee71d4c..377bff0 100644 ---- a/tools/objtool/builtin-check.c -+++ b/tools/objtool/builtin-check.c -@@ -1221,6 +1221,14 @@ static int validate_uncallable_instructions(struct objtool_file *file) - - for_each_insn(file, insn) { - if (!insn->visited && insn->type == INSN_RETURN) { -+ -+ /* -+ * Don't warn about call instructions in unvisited -+ * retpoline alternatives. -+ */ -+ if (!strcmp(insn->sec->name, ".altinstr_replacement")) -+ continue; -+ - WARN_FUNC("return instruction outside of a callable function", - insn->sec, insn->offset); - warnings++; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch deleted file mode 100644 index f2131e66..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch +++ /dev/null @@ -1,48 +0,0 @@ -From b5380d0ef78780a08140c0b4e8d050752e91104a Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Wed, 16 May 2018 23:18:09 -0400 -Subject: [PATCH 78/93] x86/bugs: Rename SSBD_NO to SSB_NO - -commit 240da953fcc6a9008c92fae5b1f727ee5ed167ab upstream - -The "336996 Speculative Execution Side Channel Mitigations" from -May defines this as SSB_NO, hence lets sync-up. - -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/msr-index.h | 2 +- - arch/x86/kernel/cpu/common.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index ad5d0d8..ca41d8f 100644 ---- a/arch/x86/include/asm/msr-index.h -+++ b/arch/x86/include/asm/msr-index.h -@@ -63,7 +63,7 @@ - #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a - #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ - #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ --#define ARCH_CAP_SSBD_NO (1 << 4) /* -+#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 40fc748..b0fd028 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -926,7 +926,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - - if (!x86_match_cpu(cpu_no_spec_store_bypass) && -- !(ia32_cap & ARCH_CAP_SSBD_NO)) -+ !(ia32_cap & ARCH_CAP_SSB_NO)) - setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - - if (x86_match_cpu(cpu_no_speculation)) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch deleted file mode 100644 index 3e743f00..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch +++ /dev/null @@ -1,79 +0,0 @@ -From d0383173a920ac2b316abb04d8a9f2bd9045d598 Mon Sep 17 00:00:00 2001 -From: Pavel Tatashin <pasha.tatashin@oracle.com> -Date: Mon, 15 Jan 2018 11:44:14 -0500 -Subject: [PATCH 078/103] x86/pti/efi: broken conversion from efi to kernel - page table - -The page table order must be increased for EFI table in order to avoid a -bug where NMI tries to change the page table to kernel page table, while -efi page table is active. - -For more disccussion about this bug, see this thread: -http://lkml.iu.edu/hypermail/linux/kernel/1801.1/00951.html - -Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> -Reviewed-by: Steven Sistare <steven.sistare@oracle.com> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/pgalloc.h | 11 +++++++++++ - arch/x86/mm/pgtable.c | 7 ------- - arch/x86/platform/efi/efi_64.c | 2 +- - 3 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h -index b6d4259..1178a51 100644 ---- a/arch/x86/include/asm/pgalloc.h -+++ b/arch/x86/include/asm/pgalloc.h -@@ -27,6 +27,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} - */ - extern gfp_t __userpte_alloc_gfp; - -+#ifdef CONFIG_PAGE_TABLE_ISOLATION -+/* -+ * Instead of one PGD, we acquire two PGDs. Being order-1, it is -+ * both 8k in size and 8k-aligned. That lets us just flip bit 12 -+ * in a pointer to swap between the two 4k halves. -+ */ -+#define PGD_ALLOCATION_ORDER 1 -+#else -+#define PGD_ALLOCATION_ORDER 0 -+#endif -+ - /* - * Allocate and free page tables. - */ -diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index 5aaec8e..209b946 100644 ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd) - } - #else - --/* -- * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is -- * both 8k in size and 8k-aligned. That lets us just flip bit 12 -- * in a pointer to swap between the two 4k halves. -- */ --#define PGD_ALLOCATION_ORDER kaiser_enabled -- - static inline pgd_t *_pgd_alloc(void) - { - return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); -diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c -index 2f25a36..dcb2d9d 100644 ---- a/arch/x86/platform/efi/efi_64.c -+++ b/arch/x86/platform/efi/efi_64.c -@@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void) - return 0; - - gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; -- efi_pgd = (pgd_t *)__get_free_page(gfp_mask); -+ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); - if (!efi_pgd) - return -ENOMEM; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch deleted file mode 100644 index b3f12503..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch +++ /dev/null @@ -1,106 +0,0 @@ -From f4e4c29205e3747d4cc2d033e1c46ad9725e9886 Mon Sep 17 00:00:00 2001 -From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> -Date: Wed, 9 May 2018 19:42:20 +0900 -Subject: [PATCH 79/93] x86/kexec: Avoid double free_page() upon - do_kexec_load() failure - -commit a466ef76b815b86748d9870ef2a430af7b39c710 upstream. - ->From ff82bedd3e12f0d3353282054ae48c3bd8c72012 Mon Sep 17 00:00:00 2001 -From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> -Date: Wed, 9 May 2018 12:12:39 +0900 -Subject: x86/kexec: Avoid double free_page() upon do_kexec_load() failure - -syzbot is reporting crashes after memory allocation failure inside -do_kexec_load() [1]. This is because free_transition_pgtable() is called -by both init_transition_pgtable() and machine_kexec_cleanup() when memory -allocation failed inside init_transition_pgtable(). - -Regarding 32bit code, machine_kexec_free_page_tables() is called by both -machine_kexec_alloc_page_tables() and machine_kexec_cleanup() when memory -allocation failed inside machine_kexec_alloc_page_tables(). - -Fix this by leaving the error handling to machine_kexec_cleanup() -(and optionally setting NULL after free_page()). - -[1] https://syzkaller.appspot.com/bug?id=91e52396168cf2bdd572fe1e1bc0bc645c1c6b40 - -Fixes: f5deb79679af6eb4 ("x86: kexec: Use one page table in x86_64 machine_kexec") -Fixes: 92be3d6bdf2cb349 ("kexec/i386: allocate page table pages dynamically") -Reported-by: syzbot <syzbot+d96f60296ef613fe1d69@syzkaller.appspotmail.com> -Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Baoquan He <bhe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: prudo@linux.vnet.ibm.com -Cc: Huang Ying <ying.huang@intel.com> -Cc: syzkaller-bugs@googlegroups.com -Cc: takahiro.akashi@linaro.org -Cc: H. Peter Anvin <hpa@zytor.com> -Cc: akpm@linux-foundation.org -Cc: dyoung@redhat.com -Cc: kirill.shutemov@linux.intel.com -Link: https://lkml.kernel.org/r/201805091942.DGG12448.tMFVFSJFQOOLHO@I-love.SAKURA.ne.jp -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/machine_kexec_32.c | 6 +++++- - arch/x86/kernel/machine_kexec_64.c | 4 +++- - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c -index 469b23d..fd7e993 100644 ---- a/arch/x86/kernel/machine_kexec_32.c -+++ b/arch/x86/kernel/machine_kexec_32.c -@@ -71,12 +71,17 @@ static void load_segments(void) - static void machine_kexec_free_page_tables(struct kimage *image) - { - free_page((unsigned long)image->arch.pgd); -+ image->arch.pgd = NULL; - #ifdef CONFIG_X86_PAE - free_page((unsigned long)image->arch.pmd0); -+ image->arch.pmd0 = NULL; - free_page((unsigned long)image->arch.pmd1); -+ image->arch.pmd1 = NULL; - #endif - free_page((unsigned long)image->arch.pte0); -+ image->arch.pte0 = NULL; - free_page((unsigned long)image->arch.pte1); -+ image->arch.pte1 = NULL; - } - - static int machine_kexec_alloc_page_tables(struct kimage *image) -@@ -93,7 +98,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image) - !image->arch.pmd0 || !image->arch.pmd1 || - #endif - !image->arch.pte0 || !image->arch.pte1) { -- machine_kexec_free_page_tables(image); - return -ENOMEM; - } - return 0; -diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c -index 8c1f218..26242cd 100644 ---- a/arch/x86/kernel/machine_kexec_64.c -+++ b/arch/x86/kernel/machine_kexec_64.c -@@ -37,8 +37,11 @@ static struct kexec_file_ops *kexec_file_loaders[] = { - static void free_transition_pgtable(struct kimage *image) - { - free_page((unsigned long)image->arch.pud); -+ image->arch.pud = NULL; - free_page((unsigned long)image->arch.pmd); -+ image->arch.pmd = NULL; - free_page((unsigned long)image->arch.pte); -+ image->arch.pte = NULL; - } - - static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) -@@ -79,7 +82,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); - return 0; - err: -- free_transition_pgtable(image); - return result; - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch deleted file mode 100644 index 5f178944..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch +++ /dev/null @@ -1,179 +0,0 @@ -From cd12f9191c530e3e52ad02f08bfd59fc0e5aeb65 Mon Sep 17 00:00:00 2001 -From: David Woodhouse <dwmw@amazon.co.uk> -Date: Fri, 12 Jan 2018 17:49:25 +0000 -Subject: [PATCH 079/103] x86/retpoline: Fill RSB on context switch for - affected CPUs - -commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. - -On context switch from a shallow call stack to a deeper one, as the CPU -does 'ret' up the deeper side it may encounter RSB entries (predictions for -where the 'ret' goes to) which were populated in userspace. - -This is problematic if neither SMEP nor KPTI (the latter of which marks -userspace pages as NX for the kernel) are active, as malicious code in -userspace may then be executed speculatively. - -Overwrite the CPU's return prediction stack with calls which are predicted -to return to an infinite loop, to "capture" speculation if this -happens. This is required both for retpoline, and also in conjunction with -IBRS for !SMEP && !KPTI. - -On Skylake+ the problem is slightly different, and an *underflow* of the -RSB may cause errant branch predictions to occur. So there it's not so much -overwrite, as *filling* the RSB to attempt to prevent it getting -empty. This is only a partial solution for Skylake+ since there are many -other conditions which may result in the RSB becoming empty. The full -solution on Skylake+ is to use IBRS, which will prevent the problem even -when the RSB becomes empty. With IBRS, the RSB-stuffing will not be -required on context switch. - -[ tglx: Added missing vendor check and slighty massaged comments and - changelog ] - -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Cc: gnomes@lxorguk.ukuu.org.uk -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: thomas.lendacky@amd.com -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Andy Lutomirski <luto@amacapital.net> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Kees Cook <keescook@google.com> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Paul Turner <pjt@google.com> -Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_32.S | 11 +++++++++++ - arch/x86/entry/entry_64.S | 11 +++++++++++ - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++++ - 4 files changed, 59 insertions(+) - -diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S -index bdc9aea..a76dc73 100644 ---- a/arch/x86/entry/entry_32.S -+++ b/arch/x86/entry/entry_32.S -@@ -229,6 +229,17 @@ ENTRY(__switch_to_asm) - movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset - #endif - -+#ifdef CONFIG_RETPOLINE -+ /* -+ * When switching from a shallower to a deeper call stack -+ * the RSB may either underflow or use entries populated -+ * with userspace addresses. On CPUs where those concerns -+ * exist, overwrite the RSB with entries which capture -+ * speculative execution to prevent attack. -+ */ -+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -+#endif -+ - /* restore callee-saved registers */ - popl %esi - popl %edi -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index f7ebaa1..eff47f5 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -427,6 +427,17 @@ ENTRY(__switch_to_asm) - movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset - #endif - -+#ifdef CONFIG_RETPOLINE -+ /* -+ * When switching from a shallower to a deeper call stack -+ * the RSB may either underflow or use entries populated -+ * with userspace addresses. On CPUs where those concerns -+ * exist, overwrite the RSB with entries which capture -+ * speculative execution to prevent attack. -+ */ -+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -+#endif -+ - /* restore callee-saved registers */ - popq %r15 - popq %r14 -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 4467568..2f60cb5 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -200,6 +200,7 @@ - #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ - #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ -+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ - - /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ - #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 49d25dd..8cacf62 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -22,6 +22,7 @@ - #include <asm/alternative.h> - #include <asm/pgtable.h> - #include <asm/cacheflush.h> -+#include <asm/intel-family.h> - - static void __init spectre_v2_select_mitigation(void); - -@@ -154,6 +155,23 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) - return SPECTRE_V2_CMD_NONE; - } - -+/* Check for Skylake-like CPUs (for RSB handling) */ -+static bool __init is_skylake_era(void) -+{ -+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && -+ boot_cpu_data.x86 == 6) { -+ switch (boot_cpu_data.x86_model) { -+ case INTEL_FAM6_SKYLAKE_MOBILE: -+ case INTEL_FAM6_SKYLAKE_DESKTOP: -+ case INTEL_FAM6_SKYLAKE_X: -+ case INTEL_FAM6_KABYLAKE_MOBILE: -+ case INTEL_FAM6_KABYLAKE_DESKTOP: -+ return true; -+ } -+ } -+ return false; -+} -+ - static void __init spectre_v2_select_mitigation(void) - { - enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); -@@ -212,6 +230,24 @@ static void __init spectre_v2_select_mitigation(void) - - spectre_v2_enabled = mode; - pr_info("%s\n", spectre_v2_strings[mode]); -+ -+ /* -+ * If neither SMEP or KPTI are available, there is a risk of -+ * hitting userspace addresses in the RSB after a context switch -+ * from a shallow call stack to a deeper one. To prevent this fill -+ * the entire RSB, even when using IBRS. -+ * -+ * Skylake era CPUs have a separate issue with *underflow* of the -+ * RSB, when they will predict 'ret' targets from the generic BTB. -+ * The proper mitigation for this is IBRS. If IBRS is not supported -+ * or deactivated in favour of retpolines the RSB fill on context -+ * switch is required. -+ */ -+ if ((!boot_cpu_has(X86_FEATURE_KAISER) && -+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { -+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); -+ pr_info("Filling RSB on context switch\n"); -+ } - } - - #undef pr_fmt --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch deleted file mode 100644 index ce234269..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 546e325d7b773ae3c0df848b95f06206ebc7cd87 Mon Sep 17 00:00:00 2001 -From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Date: Mon, 21 May 2018 17:54:49 -0400 -Subject: [PATCH 80/93] KVM/VMX: Expose SSBD properly to guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 0aa48468d00959c8a37cd3ac727284f4f7359151 upstream. - -The X86_FEATURE_SSBD is an synthetic CPU feature - that is -it bit location has no relevance to the real CPUID 0x7.EBX[31] -bit position. For that we need the new CPU feature name. - -Fixes: 52817587e706 ("x86/cpufeatures: Disentangle SSBD enumeration") -Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: kvm@vger.kernel.org -Cc: "Radim Krčmář" <rkrcmar@redhat.com> -Cc: stable@vger.kernel.org -Cc: "H. Peter Anvin" <hpa@zytor.com> -Cc: Paolo Bonzini <pbonzini@redhat.com> -Link: https://lkml.kernel.org/r/20180521215449.26423-2-konrad.wilk@oracle.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/cpuid.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 4d3269b..8510b7b 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - - /* cpuid 7.0.edx*/ - const u32 kvm_cpuid_7_0_edx_x86_features = -- F(SPEC_CTRL) | F(SSBD) | F(ARCH_CAPABILITIES); -+ F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); - - /* all calls to cpuid_count() should be made on the same cpu */ - get_cpu(); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch deleted file mode 100644 index acb54587..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch +++ /dev/null @@ -1,94 +0,0 @@ -From b72091ebb94e5cb7808c813989e6b5aefa464751 Mon Sep 17 00:00:00 2001 -From: Tom Lendacky <thomas.lendacky@amd.com> -Date: Sat, 13 Jan 2018 17:27:30 -0600 -Subject: [PATCH 080/103] x86/retpoline: Add LFENCE to the retpoline/RSB - filling RSB macros - -commit 28d437d550e1e39f805d99f9f8ac399c778827b7 upstream. - -The PAUSE instruction is currently used in the retpoline and RSB filling -macros as a speculation trap. The use of PAUSE was originally suggested -because it showed a very, very small difference in the amount of -cycles/time used to execute the retpoline as compared to LFENCE. On AMD, -the PAUSE instruction is not a serializing instruction, so the pause/jmp -loop will use excess power as it is speculated over waiting for return -to mispredict to the correct target. - -The RSB filling macro is applicable to AMD, and, if software is unable to -verify that LFENCE is serializing on AMD (possible when running under a -hypervisor), the generic retpoline support will be used and, so, is also -applicable to AMD. Keep the current usage of PAUSE for Intel, but add an -LFENCE instruction to the speculation trap for AMD. - -The same sequence has been adopted by GCC for the GCC generated retpolines. - -Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@alien8.de> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Acked-by: Arjan van de Ven <arjan@linux.intel.com> -Cc: Rik van Riel <riel@redhat.com> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Paul Turner <pjt@google.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Tim Chen <tim.c.chen@linux.intel.com> -Cc: Jiri Kosina <jikos@kernel.org> -Cc: Dave Hansen <dave.hansen@intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Dan Williams <dan.j.williams@intel.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Cc: Kees Cook <keescook@google.com> -Link: https://lkml.kernel.org/r/20180113232730.31060.36287.stgit@tlendack-t1.amdoffice.net -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 402a11c..7b45d84 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -11,7 +11,7 @@ - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an -- * infinite 'pause; jmp' loop to capture speculative execution. -+ * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to -@@ -38,11 +38,13 @@ - call 772f; \ - 773: /* speculation trap */ \ - pause; \ -+ lfence; \ - jmp 773b; \ - 772: \ - call 774f; \ - 775: /* speculation trap */ \ - pause; \ -+ lfence; \ - jmp 775b; \ - 774: \ - dec reg; \ -@@ -73,6 +75,7 @@ - call .Ldo_rop_\@ - .Lspec_trap_\@: - pause -+ lfence - jmp .Lspec_trap_\@ - .Ldo_rop_\@: - mov \reg, (%_ASM_SP) -@@ -165,6 +168,7 @@ - " .align 16\n" \ - "901: call 903f;\n" \ - "902: pause;\n" \ -+ " lfence;\n" \ - " jmp 902b;\n" \ - " .align 16\n" \ - "903: addl $4, %%esp;\n" \ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch deleted file mode 100644 index f44b77a1..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch +++ /dev/null @@ -1,63 +0,0 @@ -From a41340930388022d17c5acfa7c00edc80fa486f6 Mon Sep 17 00:00:00 2001 -From: Wei Huang <wei@redhat.com> -Date: Tue, 1 May 2018 09:49:54 -0500 -Subject: [PATCH 81/93] KVM: x86: Update cpuid properly when CR4.OSXAVE or - CR4.PKE is changed -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit c4d2188206bafa177ea58e9a25b952baa0bf7712 upstream. - -The CPUID bits of OSXSAVE (function=0x1) and OSPKE (func=0x7, leaf=0x0) -allows user apps to detect if OS has set CR4.OSXSAVE or CR4.PKE. KVM is -supposed to update these CPUID bits when CR4 is updated. Current KVM -code doesn't handle some special cases when updates come from emulator. -Here is one example: - - Step 1: guest boots - Step 2: guest OS enables XSAVE ==> CR4.OSXSAVE=1 and CPUID.OSXSAVE=1 - Step 3: guest hot reboot ==> QEMU reset CR4 to 0, but CPUID.OSXAVE==1 - Step 4: guest os checks CPUID.OSXAVE, detects 1, then executes xgetbv - -Step 4 above will cause an #UD and guest crash because guest OS hasn't -turned on OSXAVE yet. This patch solves the problem by comparing the the -old_cr4 with cr4. If the related bits have been changed, -kvm_update_cpuid() needs to be called. - -Signed-off-by: Wei Huang <wei@redhat.com> -Reviewed-by: Bandan Das <bsd@redhat.com> -Cc: stable@vger.kernel.org -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/x86.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index c531231..27e6cf0 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -7201,6 +7201,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - { - struct msr_data apic_base_msr; - int mmu_reset_needed = 0; -+ int cpuid_update_needed = 0; - int pending_vec, max_bits, idx; - struct desc_ptr dt; - -@@ -7232,8 +7233,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - vcpu->arch.cr0 = sregs->cr0; - - mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; -+ cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & -+ (X86_CR4_OSXSAVE | X86_CR4_PKE)); - kvm_x86_ops->set_cr4(vcpu, sregs->cr4); -- if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) -+ if (cpuid_update_needed) - kvm_update_cpuid(vcpu); - - idx = srcu_read_lock(&vcpu->kvm->srcu); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-objtool-Improve-error-message-for-bad-file-argument.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-objtool-Improve-error-message-for-bad-file-argument.patch deleted file mode 100644 index 5371d25e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-objtool-Improve-error-message-for-bad-file-argument.patch +++ /dev/null @@ -1,53 +0,0 @@ -From fb041f21a529d40a97336be266b879f05d282b73 Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Mon, 15 Jan 2018 08:17:08 -0600 -Subject: [PATCH 081/103] objtool: Improve error message for bad file argument - -commit 385d11b152c4eb638eeb769edcb3249533bb9a00 upstream. - -If a nonexistent file is supplied to objtool, it complains with a -non-helpful error: - - open: No such file or directory - -Improve it to: - - objtool: Can't open 'foo': No such file or directory - -Reported-by: Markus <M4rkusXXL@web.de> -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/406a3d00a21225eee2819844048e17f68523ccf6.1516025651.git.jpoimboe@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - tools/objtool/elf.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c -index 0d7983a..14a74d4 100644 ---- a/tools/objtool/elf.c -+++ b/tools/objtool/elf.c -@@ -26,6 +26,7 @@ - #include <stdlib.h> - #include <string.h> - #include <unistd.h> -+#include <errno.h> - - #include "elf.h" - #include "warn.h" -@@ -358,7 +359,8 @@ struct elf *elf_open(const char *name) - - elf->fd = open(name, O_RDONLY); - if (elf->fd == -1) { -- perror("open"); -+ fprintf(stderr, "objtool: Can't open '%s': %s\n", -+ name, strerror(errno)); - goto err; - } - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch deleted file mode 100644 index 313f2577..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch +++ /dev/null @@ -1,54 +0,0 @@ -From e34ebcda27df86037fd748254208aff7e442ff0b Mon Sep 17 00:00:00 2001 -From: Jim Mattson <jmattson@google.com> -Date: Wed, 9 May 2018 14:29:35 -0700 -Subject: [PATCH 82/93] kvm: x86: IA32_ARCH_CAPABILITIES is always supported -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 1eaafe91a0df4157521b6417b3dd8430bf5f52f0 upstream. - -If there is a possibility that a VM may migrate to a Skylake host, -then the hypervisor should report IA32_ARCH_CAPABILITIES.RSBA[bit 2] -as being set (future work, of course). This implies that -CPUID.(EAX=7,ECX=0):EDX.ARCH_CAPABILITIES[bit 29] should be -set. Therefore, kvm should report this CPUID bit as being supported -whether or not the host supports it. Userspace is still free to clear -the bit if it chooses. - -For more information on RSBA, see Intel's white paper, "Retpoline: A -Branch Target Injection Mitigation" (Document Number 337131-001), -currently available at https://bugzilla.kernel.org/show_bug.cgi?id=199511. - -Since the IA32_ARCH_CAPABILITIES MSR is emulated in kvm, there is no -dependency on hardware support for this feature. - -Signed-off-by: Jim Mattson <jmattson@google.com> -Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> -Fixes: 28c1c9fabf48 ("KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES") -Cc: stable@vger.kernel.org -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/cpuid.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 8510b7b..fbd6c62 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -468,6 +468,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - entry->ecx &= ~F(PKU); - entry->edx &= kvm_cpuid_7_0_edx_x86_features; - cpuid_mask(&entry->edx, CPUID_7_EDX); -+ /* -+ * We emulate ARCH_CAPABILITIES in software even -+ * if the host doesn't support it. -+ */ -+ entry->edx |= F(ARCH_CAPABILITIES); - } else { - entry->ebx = 0; - entry->ecx = 0; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch deleted file mode 100644 index c57772cc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch +++ /dev/null @@ -1,73 +0,0 @@ -From f062c304538bab4228fa57dfe33eb1521d764127 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Tue, 16 Jan 2018 16:42:25 +0100 -Subject: [PATCH 082/103] x86/cpufeature: Move processor tracing out of - scattered features -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 4fdec2034b7540dda461c6ba33325dfcff345c64 upstream. - -Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX), -so do not duplicate it in the scattered features word. - -Besides being more tidy, this will be useful for KVM when it presents -processor tracing to the guests. KVM selects host features that are -supported by both the host kernel (depending on command line options, -CPU errata, or whatever) and KVM. Whenever a full feature word exists, -KVM's code is written in the expectation that the CPUID bit number -matches the X86_FEATURE_* bit number, but this is not the case for -X86_FEATURE_INTEL_PT. - -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Cc: Borislav Petkov <bp@suse.de> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Luwei Kang <luwei.kang@intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: kvm@vger.kernel.org -Link: http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonzini@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/cpufeatures.h | 2 +- - arch/x86/kernel/cpu/scattered.c | 1 - - 2 files changed, 1 insertion(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 2f60cb5..8537a21 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -197,7 +197,6 @@ - #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ - #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ - --#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ - #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ - #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ -@@ -236,6 +235,7 @@ - #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ - #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ - #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ - #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ - #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ - #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c -index 1db8dc4..b0dd9ae 100644 ---- a/arch/x86/kernel/cpu/scattered.c -+++ b/arch/x86/kernel/cpu/scattered.c -@@ -31,7 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) - const struct cpuid_bit *cb; - - static const struct cpuid_bit cpuid_bits[] = { -- { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, - { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch deleted file mode 100644 index b4bec832..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 91702980566c39210225154c2a8b1cef41942737 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini <pbonzini@redhat.com> -Date: Thu, 26 Oct 2017 15:45:47 +0200 -Subject: [PATCH 83/93] kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 51776043afa415435c7e4636204fbe4f7edc4501 ] - -This ioctl is obsolete (it was used by Xenner as far as I know) but -still let's not break it gratuitously... Its handler is copying -directly into struct kvm. Go through a bounce buffer instead, with -the added benefit that we can actually do something useful with the -flags argument---the previous code was exiting with -EINVAL but still -doing the copy. - -This technically is a userspace ABI breakage, but since no one should be -using the ioctl, it's a good occasion to see if someone actually -complains. - -Cc: kernel-hardening@lists.openwall.com -Cc: Kees Cook <keescook@chromium.org> -Cc: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/x86.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 27e6cf0..d7974fc 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -4106,13 +4106,14 @@ long kvm_arch_vm_ioctl(struct file *filp, - mutex_unlock(&kvm->lock); - break; - case KVM_XEN_HVM_CONFIG: { -+ struct kvm_xen_hvm_config xhc; - r = -EFAULT; -- if (copy_from_user(&kvm->arch.xen_hvm_config, argp, -- sizeof(struct kvm_xen_hvm_config))) -+ if (copy_from_user(&xhc, argp, sizeof(xhc))) - goto out; - r = -EINVAL; -- if (kvm->arch.xen_hvm_config.flags) -+ if (xhc.flags) - goto out; -+ memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); - r = 0; - break; - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-module-Add-retpoline-tag-to-VERMAGIC.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-module-Add-retpoline-tag-to-VERMAGIC.patch deleted file mode 100644 index becd5732..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-module-Add-retpoline-tag-to-VERMAGIC.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 6d7e28033f88af1e16f0ed63395fd246791e5857 Mon Sep 17 00:00:00 2001 -From: Andi Kleen <ak@linux.intel.com> -Date: Tue, 16 Jan 2018 12:52:28 -0800 -Subject: [PATCH 083/103] module: Add retpoline tag to VERMAGIC - -commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12 upstream. - -Add a marker for retpoline to the module VERMAGIC. This catches the case -when a non RETPOLINE compiled module gets loaded into a retpoline kernel, -making it insecure. - -It doesn't handle the case when retpoline has been runtime disabled. Even -in this case the match of the retcompile status will be enforced. This -implies that even with retpoline run time disabled all modules loaded need -to be recompiled. - -Signed-off-by: Andi Kleen <ak@linux.intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: rusty@rustcorp.com.au -Cc: arjan.van.de.ven@intel.com -Cc: jeyu@kernel.org -Cc: torvalds@linux-foundation.org -Link: https://lkml.kernel.org/r/20180116205228.4890-1-andi@firstfloor.org -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/vermagic.h | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h -index 6f8fbcf..a3d0493 100644 ---- a/include/linux/vermagic.h -+++ b/include/linux/vermagic.h -@@ -24,10 +24,16 @@ - #ifndef MODULE_ARCH_VERMAGIC - #define MODULE_ARCH_VERMAGIC "" - #endif -+#ifdef RETPOLINE -+#define MODULE_VERMAGIC_RETPOLINE "retpoline " -+#else -+#define MODULE_VERMAGIC_RETPOLINE "" -+#endif - - #define VERMAGIC_STRING \ - UTS_RELEASE " " \ - MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ - MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ -- MODULE_ARCH_VERMAGIC -+ MODULE_ARCH_VERMAGIC \ -+ MODULE_VERMAGIC_RETPOLINE - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch deleted file mode 100644 index a2280307..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 075696ba348a4c1eb20a641157f84f8b81220510 Mon Sep 17 00:00:00 2001 -From: Sean Christopherson <sean.j.christopherson@intel.com> -Date: Fri, 23 Mar 2018 09:34:00 -0700 -Subject: [PATCH 84/93] KVM: VMX: raise internal error for exception during - invalid protected mode state -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit add5ff7a216ee545a214013f26d1ef2f44a9c9f8 ] - -Exit to userspace with KVM_INTERNAL_ERROR_EMULATION if we encounter -an exception in Protected Mode while emulating guest due to invalid -guest state. Unlike Big RM, KVM doesn't support emulating exceptions -in PM, i.e. PM exceptions are always injected via the VMCS. Because -we will never do VMRESUME due to emulation_required, the exception is -never realized and we'll keep emulating the faulting instruction over -and over until we receive a signal. - -Exit to userspace iff there is a pending exception, i.e. don't exit -simply on a requested event. The purpose of this check and exit is to -aid in debugging a guest that is in all likelihood already doomed. -Invalid guest state in PM is extremely limited in normal operation, -e.g. it generally only occurs for a few instructions early in BIOS, -and any exception at this time is all but guaranteed to be fatal. -Non-vectored interrupts, e.g. INIT, SIPI and SMI, can be cleanly -handled/emulated, while checking for vectored interrupts, e.g. INTR -and NMI, without hitting false positives would add a fair amount of -complexity for almost no benefit (getting hit by lightning seems -more likely than encountering this specific scenario). - -Add a WARN_ON_ONCE to vmx_queue_exception() if we try to inject an -exception via the VMCS and emulation_required is true. - -Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/vmx.c | 20 ++++++++++++++------ - 1 file changed, 14 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 7b4739c..9307c0d 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -2555,6 +2555,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, - return; - } - -+ WARN_ON_ONCE(vmx->emulation_required); -+ - if (kvm_exception_is_soft(nr)) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); -@@ -6405,12 +6407,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) - goto out; - } - -- if (err != EMULATE_DONE) { -- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; -- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; -- vcpu->run->internal.ndata = 0; -- return 0; -- } -+ if (err != EMULATE_DONE) -+ goto emulation_error; -+ -+ if (vmx->emulation_required && !vmx->rmode.vm86_active && -+ vcpu->arch.exception.pending) -+ goto emulation_error; - - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; -@@ -6426,6 +6428,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) - - out: - return ret; -+ -+emulation_error: -+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; -+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; -+ vcpu->run->internal.ndata = 0; -+ return 0; - } - - static int __grow_ple_window(int val) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch deleted file mode 100644 index 6e315ccf..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch +++ /dev/null @@ -1,48 +0,0 @@ -From ebd4f926752058a42c6da41e80b44a82aa4d14f5 Mon Sep 17 00:00:00 2001 -From: Tom Lendacky <thomas.lendacky@amd.com> -Date: Tue, 26 Dec 2017 23:43:54 -0600 -Subject: [PATCH 084/103] x86/cpu, x86/pti: Do not enable PTI on AMD processors - -commit 694d99d40972f12e59a3696effee8a376b79d7c8 upstream. - -AMD processors are not subject to the types of attacks that the kernel -page table isolation feature protects against. The AMD microarchitecture -does not allow memory references, including speculative references, that -access higher privileged data when running in a lesser privileged mode -when that access would result in a page fault. - -Disable page table isolation by default on AMD processors by not setting -the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI -is set. - -Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Borislav Petkov <bp@suse.de> -Cc: Dave Hansen <dave.hansen@linux.intel.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net -Cc: Nick Lowe <nick.lowe@gmail.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/common.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 7b9ae04..d198ae0 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -883,8 +883,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) - - setup_force_cpu_cap(X86_FEATURE_ALWAYS); - -- /* Assume for now that ALL x86 CPUs are insecure */ -- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); -+ if (c->x86_vendor != X86_VENDOR_AMD) -+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch deleted file mode 100644 index db300b21..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 2ece92e70fbd29fd14c1add63648b7154521b473 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov <vkuznets@redhat.com> -Date: Fri, 9 Feb 2018 14:01:33 +0100 -Subject: [PATCH 85/93] KVM: lapic: stop advertising DIRECTED_EOI when - in-kernel IOAPIC is in use -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 0bcc3fb95b97ac2ca223a5a870287b37f56265ac ] - -Devices which use level-triggered interrupts under Windows 2016 with -Hyper-V role enabled don't work: Windows disables EOI broadcast in SPIV -unconditionally. Our in-kernel IOAPIC implementation emulates an old IOAPIC -version which has no EOI register so EOI never happens. - -The issue was discovered and discussed a while ago: -https://www.spinics.net/lists/kvm/msg148098.html - -While this is a guest OS bug (it should check that IOAPIC has the required -capabilities before disabling EOI broadcast) we can workaround it in KVM: -advertising DIRECTED_EOI with in-kernel IOAPIC makes little sense anyway. - -Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> -Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> -Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kvm/lapic.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c -index 650ff4a..d99e13d 100644 ---- a/arch/x86/kvm/lapic.c -+++ b/arch/x86/kvm/lapic.c -@@ -294,8 +294,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) - if (!lapic_in_kernel(vcpu)) - return; - -+ /* -+ * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) -+ * which doesn't have EOI register; Some buggy OSes (e.g. Windows with -+ * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC -+ * version first and level-triggered interrupts never get EOIed in -+ * IOAPIC. -+ */ - feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); -- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) -+ if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && -+ !ioapic_in_kernel(vcpu->kvm)) - v |= APIC_LVR_DIRECTED_EOI; - kvm_lapic_set_reg(apic, APIC_LVR, v); - } --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-x86-mce-Make-machine-check-speculation-protected.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-x86-mce-Make-machine-check-speculation-protected.patch deleted file mode 100644 index b1ad4a54..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-x86-mce-Make-machine-check-speculation-protected.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 579259fc2702d354fa172e6e16d26c740a3d4421 Mon Sep 17 00:00:00 2001 -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 18 Jan 2018 16:28:26 +0100 -Subject: [PATCH 085/103] x86/mce: Make machine check speculation protected - -commit 6f41c34d69eb005e7848716bbcafc979b35037d5 upstream. - -The machine check idtentry uses an indirect branch directly from the low -level code. This evades the speculation protection. - -Replace it by a direct call into C code and issue the indirect call there -so the compiler can apply the proper speculation protection. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by:Borislav Petkov <bp@alien8.de> -Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> -Niced-by: Peter Zijlstra <peterz@infradead.org> -Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801181626290.1847@nanos -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 2 +- - arch/x86/include/asm/traps.h | 1 + - arch/x86/kernel/cpu/mcheck/mce.c | 5 +++++ - 3 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index eff47f5..16146eb 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1064,7 +1064,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 - #endif - - #ifdef CONFIG_X86_MCE --idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) -+idtentry machine_check do_mce has_error_code=0 paranoid=1 - #endif - - /* -diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h -index 01fd0a7..688315b 100644 ---- a/arch/x86/include/asm/traps.h -+++ b/arch/x86/include/asm/traps.h -@@ -92,6 +92,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); - #ifdef CONFIG_X86_32 - dotraplinkage void do_iret_error(struct pt_regs *, long); - #endif -+dotraplinkage void do_mce(struct pt_regs *, long); - - static inline int get_si_code(unsigned long condition) - { -diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c -index a7fdf45..0035aaa 100644 ---- a/arch/x86/kernel/cpu/mcheck/mce.c -+++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -1765,6 +1765,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) - void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - -+dotraplinkage void do_mce(struct pt_regs *regs, long error_code) -+{ -+ machine_check_vector(regs, error_code); -+} -+ - /* - * Called for each booted CPU to set up machine checks. - * Must be called with preempt off: --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch deleted file mode 100644 index f659e885..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 655125acee5c084743a8bae4ffe2b723856594ce Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Tue, 21 Feb 2017 15:35:32 -0600 -Subject: [PATCH 86/93] objtool: Improve detection of BUG() and other dead ends - -commit d1091c7fa3d52ebce4dd3f15d04155b3469b2f90 upstream. - -The BUG() macro's use of __builtin_unreachable() via the unreachable() -macro tells gcc that the instruction is a dead end, and that it's safe -to assume the current code path will not execute past the previous -instruction. - -On x86, the BUG() macro is implemented with the 'ud2' instruction. When -objtool's branch analysis sees that instruction, it knows the current -code path has come to a dead end. - -Peter Zijlstra has been working on a patch to change the WARN macros to -use 'ud2'. That patch will break objtool's assumption that 'ud2' is -always a dead end. - -Generally it's best for objtool to avoid making those kinds of -assumptions anyway. The more ignorant it is of kernel code internals, -the better. - -So create a more generic way for objtool to detect dead ends by adding -an annotation to the unreachable() macro. The annotation stores a -pointer to the end of the unreachable code path in an '__unreachable' -section. Objtool can read that section to find the dead ends. - -Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Link: http://lkml.kernel.org/r/41a6d33971462ebd944a1c60ad4bf5be86c17b77.1487712920.git.jpoimboe@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/vmlinux.lds.S | 1 + - include/linux/compiler-gcc.h | 13 ++++++++- - tools/objtool/arch.h | 5 ++-- - tools/objtool/arch/x86/decode.c | 3 --- - tools/objtool/builtin-check.c | 60 ++++++++++++++++++++++++++++++++++++++--- - 5 files changed, 71 insertions(+), 11 deletions(-) - -diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S -index c7194e9..4ef267f 100644 ---- a/arch/x86/kernel/vmlinux.lds.S -+++ b/arch/x86/kernel/vmlinux.lds.S -@@ -353,6 +353,7 @@ SECTIONS - /DISCARD/ : { - *(.eh_frame) - *(__func_stack_frame_non_standard) -+ *(__unreachable) - } - } - -diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h -index 362a1e17..b69d102 100644 ---- a/include/linux/compiler-gcc.h -+++ b/include/linux/compiler-gcc.h -@@ -199,6 +199,17 @@ - #endif - #endif - -+#ifdef CONFIG_STACK_VALIDATION -+#define annotate_unreachable() ({ \ -+ asm("1:\t\n" \ -+ ".pushsection __unreachable, \"a\"\t\n" \ -+ ".long 1b\t\n" \ -+ ".popsection\t\n"); \ -+}) -+#else -+#define annotate_unreachable() -+#endif -+ - /* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer -@@ -208,7 +219,7 @@ - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. - */ --#define unreachable() __builtin_unreachable() -+#define unreachable() annotate_unreachable(); __builtin_unreachable() - - /* Mark a function definition as prohibited from being cloned. */ - #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) -diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h -index f7350fc..a59e061 100644 ---- a/tools/objtool/arch.h -+++ b/tools/objtool/arch.h -@@ -31,9 +31,8 @@ - #define INSN_CALL_DYNAMIC 8 - #define INSN_RETURN 9 - #define INSN_CONTEXT_SWITCH 10 --#define INSN_BUG 11 --#define INSN_NOP 12 --#define INSN_OTHER 13 -+#define INSN_NOP 11 -+#define INSN_OTHER 12 - #define INSN_LAST INSN_OTHER - - int arch_decode_instruction(struct elf *elf, struct section *sec, -diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c -index 5e0dea2..9fb487f 100644 ---- a/tools/objtool/arch/x86/decode.c -+++ b/tools/objtool/arch/x86/decode.c -@@ -118,9 +118,6 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, - op2 == 0x35) - /* sysenter, sysret */ - *type = INSN_CONTEXT_SWITCH; -- else if (op2 == 0x0b || op2 == 0xb9) -- /* ud2 */ -- *type = INSN_BUG; - else if (op2 == 0x0d || op2 == 0x1f) - /* nopl/nopw */ - *type = INSN_NOP; -diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c -index 377bff0..ad9eda9 100644 ---- a/tools/objtool/builtin-check.c -+++ b/tools/objtool/builtin-check.c -@@ -51,7 +51,7 @@ struct instruction { - unsigned int len, state; - unsigned char type; - unsigned long immediate; -- bool alt_group, visited, ignore_alts; -+ bool alt_group, visited, dead_end, ignore_alts; - struct symbol *call_dest; - struct instruction *jump_dest; - struct list_head alts; -@@ -330,6 +330,54 @@ static int decode_instructions(struct objtool_file *file) - } - - /* -+ * Find all uses of the unreachable() macro, which are code path dead ends. -+ */ -+static int add_dead_ends(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct rela *rela; -+ struct instruction *insn; -+ bool found; -+ -+ sec = find_section_by_name(file->elf, ".rela__unreachable"); -+ if (!sec) -+ return 0; -+ -+ list_for_each_entry(rela, &sec->rela_list, list) { -+ if (rela->sym->type != STT_SECTION) { -+ WARN("unexpected relocation symbol type in .rela__unreachable"); -+ return -1; -+ } -+ insn = find_insn(file, rela->sym->sec, rela->addend); -+ if (insn) -+ insn = list_prev_entry(insn, list); -+ else if (rela->addend == rela->sym->sec->len) { -+ found = false; -+ list_for_each_entry_reverse(insn, &file->insn_list, list) { -+ if (insn->sec == rela->sym->sec) { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) { -+ WARN("can't find unreachable insn at %s+0x%x", -+ rela->sym->sec->name, rela->addend); -+ return -1; -+ } -+ } else { -+ WARN("can't find unreachable insn at %s+0x%x", -+ rela->sym->sec->name, rela->addend); -+ return -1; -+ } -+ -+ insn->dead_end = true; -+ } -+ -+ return 0; -+} -+ -+/* - * Warnings shouldn't be reported for ignored functions. - */ - static void add_ignores(struct objtool_file *file) -@@ -896,6 +944,10 @@ static int decode_sections(struct objtool_file *file) - if (ret) - return ret; - -+ ret = add_dead_ends(file); -+ if (ret) -+ return ret; -+ - add_ignores(file); - - ret = add_nospec_ignores(file); -@@ -1094,13 +1146,13 @@ static int validate_branch(struct objtool_file *file, - - return 0; - -- case INSN_BUG: -- return 0; -- - default: - break; - } - -+ if (insn->dead_end) -+ return 0; -+ - insn = next_insn_same_sec(file, insn); - if (!insn) { - WARN("%s: unexpected end of section", sec->name); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-retpoline-Introduce-start-end-markers-of-indirect-th.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-retpoline-Introduce-start-end-markers-of-indirect-th.patch deleted file mode 100644 index 9beafd46..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-retpoline-Introduce-start-end-markers-of-indirect-th.patch +++ /dev/null @@ -1,78 +0,0 @@ -From d21c5cadc96cf42c7791840b421afc1a018e6a87 Mon Sep 17 00:00:00 2001 -From: Masami Hiramatsu <mhiramat@kernel.org> -Date: Fri, 19 Jan 2018 01:14:21 +0900 -Subject: [PATCH 086/103] retpoline: Introduce start/end markers of indirect - thunk - -commit 736e80a4213e9bbce40a7c050337047128b472ac upstream. - -Introduce start/end markers of __x86_indirect_thunk_* functions. -To make it easy, consolidate .text.__x86.indirect_thunk.* sections -to one .text.__x86.indirect_thunk section and put it in the -end of kernel text section and adds __indirect_thunk_start/end -so that other subsystem (e.g. kprobes) can identify it. - -Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Link: https://lkml.kernel.org/r/151629206178.10241.6828804696410044771.stgit@devbox -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 3 +++ - arch/x86/kernel/vmlinux.lds.S | 7 +++++++ - arch/x86/lib/retpoline.S | 2 +- - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 7b45d84..19ba5ad 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -194,6 +194,9 @@ enum spectre_v2_mitigation { - SPECTRE_V2_IBRS, - }; - -+extern char __indirect_thunk_start[]; -+extern char __indirect_thunk_end[]; -+ - /* - * On VMEXIT we must ensure that no RSB predictions learned in the guest - * can be followed in the host, by overwriting the RSB completely. Both -diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S -index dbf67f6..c7194e9 100644 ---- a/arch/x86/kernel/vmlinux.lds.S -+++ b/arch/x86/kernel/vmlinux.lds.S -@@ -105,6 +105,13 @@ SECTIONS - SOFTIRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) -+ -+#ifdef CONFIG_RETPOLINE -+ __indirect_thunk_start = .; -+ *(.text.__x86.indirect_thunk) -+ __indirect_thunk_end = .; -+#endif -+ - /* End of text section */ - _etext = .; - } :text = 0x9090 -diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S -index cb45c6c..d3415dc 100644 ---- a/arch/x86/lib/retpoline.S -+++ b/arch/x86/lib/retpoline.S -@@ -9,7 +9,7 @@ - #include <asm/nospec-branch.h> - - .macro THUNK reg -- .section .text.__x86.indirect_thunk.\reg -+ .section .text.__x86.indirect_thunk - - ENTRY(__x86_indirect_thunk_\reg) - CFI_STARTPROC --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch deleted file mode 100644 index ff8c8dad..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 649c2099e7e0a5431b8541c7da5e83d863cb71cf Mon Sep 17 00:00:00 2001 -From: Masami Hiramatsu <mhiramat@kernel.org> -Date: Fri, 19 Jan 2018 01:14:51 +0900 -Subject: [PATCH 087/103] kprobes/x86: Blacklist indirect thunk functions for - kprobes - -commit c1804a236894ecc942da7dc6c5abe209e56cba93 upstream. - -Mark __x86_indirect_thunk_* functions as blacklist for kprobes -because those functions can be called from anywhere in the kernel -including blacklist functions of kprobes. - -Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Link: https://lkml.kernel.org/r/151629209111.10241.5444852823378068683.stgit@devbox -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/lib/retpoline.S | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S -index d3415dc..dfb2ba9 100644 ---- a/arch/x86/lib/retpoline.S -+++ b/arch/x86/lib/retpoline.S -@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg) - * than one per register with the correct names. So we do it - * the simple and nasty way... - */ --#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) -+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) -+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) - #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) - - GENERATE_THUNK(_ASM_AX) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-objtool-Move-checking-code-to-check.c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-objtool-Move-checking-code-to-check.c.patch deleted file mode 100644 index 076eb364..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-objtool-Move-checking-code-to-check.c.patch +++ /dev/null @@ -1,2802 +0,0 @@ -From 1c6b7026213ec74f811957627c80513e75f6fb96 Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Wed, 28 Jun 2017 10:11:05 -0500 -Subject: [PATCH 87/93] objtool: Move checking code to check.c - -commit dcc914f44f065ef73685b37e59877a5bb3cb7358 upstream. - -In preparation for the new 'objtool undwarf generate' command, which -will rely on 'objtool check', move the checking code from -builtin-check.c to check.c where it can be used by other commands. - -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Reviewed-by: Jiri Slaby <jslaby@suse.cz> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: live-patching@vger.kernel.org -Link: http://lkml.kernel.org/r/294c5c695fd73c1a5000bbe5960a7c9bec4ee6b4.1498659915.git.jpoimboe@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -[backported by hand to 4.9, this was a pain... - gregkh] -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - tools/objtool/Build | 1 + - tools/objtool/builtin-check.c | 1337 +---------------------------------------- - tools/objtool/check.c | 1327 ++++++++++++++++++++++++++++++++++++++++ - tools/objtool/check.h | 51 ++ - 4 files changed, 1392 insertions(+), 1324 deletions(-) - create mode 100644 tools/objtool/check.c - create mode 100644 tools/objtool/check.h - -diff --git a/tools/objtool/Build b/tools/objtool/Build -index d6cdece..6f2e198 100644 ---- a/tools/objtool/Build -+++ b/tools/objtool/Build -@@ -1,5 +1,6 @@ - objtool-y += arch/$(SRCARCH)/ - objtool-y += builtin-check.o -+objtool-y += check.o - objtool-y += elf.o - objtool-y += special.o - objtool-y += objtool.o -diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c -index ad9eda9..365c34e 100644 ---- a/tools/objtool/builtin-check.c -+++ b/tools/objtool/builtin-check.c -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> -+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License -@@ -25,1343 +25,32 @@ - * For more information, see tools/objtool/Documentation/stack-validation.txt. - */ - --#include <string.h> --#include <stdlib.h> - #include <subcmd/parse-options.h> -- - #include "builtin.h" --#include "elf.h" --#include "special.h" --#include "arch.h" --#include "warn.h" -- --#include <linux/hashtable.h> -- --#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -- --#define STATE_FP_SAVED 0x1 --#define STATE_FP_SETUP 0x2 --#define STATE_FENTRY 0x4 -- --struct instruction { -- struct list_head list; -- struct hlist_node hash; -- struct section *sec; -- unsigned long offset; -- unsigned int len, state; -- unsigned char type; -- unsigned long immediate; -- bool alt_group, visited, dead_end, ignore_alts; -- struct symbol *call_dest; -- struct instruction *jump_dest; -- struct list_head alts; -- struct symbol *func; --}; -- --struct alternative { -- struct list_head list; -- struct instruction *insn; --}; -- --struct objtool_file { -- struct elf *elf; -- struct list_head insn_list; -- DECLARE_HASHTABLE(insn_hash, 16); -- struct section *rodata, *whitelist; -- bool ignore_unreachables, c_file; --}; -- --const char *objname; --static bool nofp; -- --static struct instruction *find_insn(struct objtool_file *file, -- struct section *sec, unsigned long offset) --{ -- struct instruction *insn; -- -- hash_for_each_possible(file->insn_hash, insn, hash, offset) -- if (insn->sec == sec && insn->offset == offset) -- return insn; -- -- return NULL; --} -- --static struct instruction *next_insn_same_sec(struct objtool_file *file, -- struct instruction *insn) --{ -- struct instruction *next = list_next_entry(insn, list); -- -- if (&next->list == &file->insn_list || next->sec != insn->sec) -- return NULL; -- -- return next; --} -- --static bool gcov_enabled(struct objtool_file *file) --{ -- struct section *sec; -- struct symbol *sym; -- -- list_for_each_entry(sec, &file->elf->sections, list) -- list_for_each_entry(sym, &sec->symbol_list, list) -- if (!strncmp(sym->name, "__gcov_.", 8)) -- return true; -- -- return false; --} -- --#define for_each_insn(file, insn) \ -- list_for_each_entry(insn, &file->insn_list, list) -- --#define func_for_each_insn(file, func, insn) \ -- for (insn = find_insn(file, func->sec, func->offset); \ -- insn && &insn->list != &file->insn_list && \ -- insn->sec == func->sec && \ -- insn->offset < func->offset + func->len; \ -- insn = list_next_entry(insn, list)) -- --#define func_for_each_insn_continue_reverse(file, func, insn) \ -- for (insn = list_prev_entry(insn, list); \ -- &insn->list != &file->insn_list && \ -- insn->sec == func->sec && insn->offset >= func->offset; \ -- insn = list_prev_entry(insn, list)) -- --#define sec_for_each_insn_from(file, insn) \ -- for (; insn; insn = next_insn_same_sec(file, insn)) -- -- --/* -- * Check if the function has been manually whitelisted with the -- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted -- * due to its use of a context switching instruction. -- */ --static bool ignore_func(struct objtool_file *file, struct symbol *func) --{ -- struct rela *rela; -- struct instruction *insn; -- -- /* check for STACK_FRAME_NON_STANDARD */ -- if (file->whitelist && file->whitelist->rela) -- list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) { -- if (rela->sym->type == STT_SECTION && -- rela->sym->sec == func->sec && -- rela->addend == func->offset) -- return true; -- if (rela->sym->type == STT_FUNC && rela->sym == func) -- return true; -- } -- -- /* check if it has a context switching instruction */ -- func_for_each_insn(file, func, insn) -- if (insn->type == INSN_CONTEXT_SWITCH) -- return true; -- -- return false; --} -- --/* -- * This checks to see if the given function is a "noreturn" function. -- * -- * For global functions which are outside the scope of this object file, we -- * have to keep a manual list of them. -- * -- * For local functions, we have to detect them manually by simply looking for -- * the lack of a return instruction. -- * -- * Returns: -- * -1: error -- * 0: no dead end -- * 1: dead end -- */ --static int __dead_end_function(struct objtool_file *file, struct symbol *func, -- int recursion) --{ -- int i; -- struct instruction *insn; -- bool empty = true; -- -- /* -- * Unfortunately these have to be hard coded because the noreturn -- * attribute isn't provided in ELF data. -- */ -- static const char * const global_noreturns[] = { -- "__stack_chk_fail", -- "panic", -- "do_exit", -- "do_task_dead", -- "__module_put_and_exit", -- "complete_and_exit", -- "kvm_spurious_fault", -- "__reiserfs_panic", -- "lbug_with_loc" -- }; -- -- if (func->bind == STB_WEAK) -- return 0; -- -- if (func->bind == STB_GLOBAL) -- for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) -- if (!strcmp(func->name, global_noreturns[i])) -- return 1; -- -- if (!func->sec) -- return 0; -- -- func_for_each_insn(file, func, insn) { -- empty = false; -- -- if (insn->type == INSN_RETURN) -- return 0; -- } -- -- if (empty) -- return 0; -- -- /* -- * A function can have a sibling call instead of a return. In that -- * case, the function's dead-end status depends on whether the target -- * of the sibling call returns. -- */ -- func_for_each_insn(file, func, insn) { -- if (insn->sec != func->sec || -- insn->offset >= func->offset + func->len) -- break; -- -- if (insn->type == INSN_JUMP_UNCONDITIONAL) { -- struct instruction *dest = insn->jump_dest; -- struct symbol *dest_func; -- -- if (!dest) -- /* sibling call to another file */ -- return 0; -- -- if (dest->sec != func->sec || -- dest->offset < func->offset || -- dest->offset >= func->offset + func->len) { -- /* local sibling call */ -- dest_func = find_symbol_by_offset(dest->sec, -- dest->offset); -- if (!dest_func) -- continue; -- -- if (recursion == 5) { -- WARN_FUNC("infinite recursion (objtool bug!)", -- dest->sec, dest->offset); -- return -1; -- } -- -- return __dead_end_function(file, dest_func, -- recursion + 1); -- } -- } -- -- if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) -- /* sibling call */ -- return 0; -- } -- -- return 1; --} -- --static int dead_end_function(struct objtool_file *file, struct symbol *func) --{ -- return __dead_end_function(file, func, 0); --} -- --/* -- * Call the arch-specific instruction decoder for all the instructions and add -- * them to the global instruction list. -- */ --static int decode_instructions(struct objtool_file *file) --{ -- struct section *sec; -- struct symbol *func; -- unsigned long offset; -- struct instruction *insn; -- int ret; -- -- list_for_each_entry(sec, &file->elf->sections, list) { -- -- if (!(sec->sh.sh_flags & SHF_EXECINSTR)) -- continue; -- -- for (offset = 0; offset < sec->len; offset += insn->len) { -- insn = malloc(sizeof(*insn)); -- memset(insn, 0, sizeof(*insn)); -- -- INIT_LIST_HEAD(&insn->alts); -- insn->sec = sec; -- insn->offset = offset; -- -- ret = arch_decode_instruction(file->elf, sec, offset, -- sec->len - offset, -- &insn->len, &insn->type, -- &insn->immediate); -- if (ret) -- return ret; -- -- if (!insn->type || insn->type > INSN_LAST) { -- WARN_FUNC("invalid instruction type %d", -- insn->sec, insn->offset, insn->type); -- return -1; -- } -- -- hash_add(file->insn_hash, &insn->hash, insn->offset); -- list_add_tail(&insn->list, &file->insn_list); -- } -- -- list_for_each_entry(func, &sec->symbol_list, list) { -- if (func->type != STT_FUNC) -- continue; -- -- if (!find_insn(file, sec, func->offset)) { -- WARN("%s(): can't find starting instruction", -- func->name); -- return -1; -- } -- -- func_for_each_insn(file, func, insn) -- if (!insn->func) -- insn->func = func; -- } -- } -- -- return 0; --} -- --/* -- * Find all uses of the unreachable() macro, which are code path dead ends. -- */ --static int add_dead_ends(struct objtool_file *file) --{ -- struct section *sec; -- struct rela *rela; -- struct instruction *insn; -- bool found; -- -- sec = find_section_by_name(file->elf, ".rela__unreachable"); -- if (!sec) -- return 0; -- -- list_for_each_entry(rela, &sec->rela_list, list) { -- if (rela->sym->type != STT_SECTION) { -- WARN("unexpected relocation symbol type in .rela__unreachable"); -- return -1; -- } -- insn = find_insn(file, rela->sym->sec, rela->addend); -- if (insn) -- insn = list_prev_entry(insn, list); -- else if (rela->addend == rela->sym->sec->len) { -- found = false; -- list_for_each_entry_reverse(insn, &file->insn_list, list) { -- if (insn->sec == rela->sym->sec) { -- found = true; -- break; -- } -- } -- -- if (!found) { -- WARN("can't find unreachable insn at %s+0x%x", -- rela->sym->sec->name, rela->addend); -- return -1; -- } -- } else { -- WARN("can't find unreachable insn at %s+0x%x", -- rela->sym->sec->name, rela->addend); -- return -1; -- } -- -- insn->dead_end = true; -- } -- -- return 0; --} -- --/* -- * Warnings shouldn't be reported for ignored functions. -- */ --static void add_ignores(struct objtool_file *file) --{ -- struct instruction *insn; -- struct section *sec; -- struct symbol *func; -- -- list_for_each_entry(sec, &file->elf->sections, list) { -- list_for_each_entry(func, &sec->symbol_list, list) { -- if (func->type != STT_FUNC) -- continue; -- -- if (!ignore_func(file, func)) -- continue; -- -- func_for_each_insn(file, func, insn) -- insn->visited = true; -- } -- } --} -- --/* -- * FIXME: For now, just ignore any alternatives which add retpolines. This is -- * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. -- * But it at least allows objtool to understand the control flow *around* the -- * retpoline. -- */ --static int add_nospec_ignores(struct objtool_file *file) --{ -- struct section *sec; -- struct rela *rela; -- struct instruction *insn; -- -- sec = find_section_by_name(file->elf, ".rela.discard.nospec"); -- if (!sec) -- return 0; -- -- list_for_each_entry(rela, &sec->rela_list, list) { -- if (rela->sym->type != STT_SECTION) { -- WARN("unexpected relocation symbol type in %s", sec->name); -- return -1; -- } -- -- insn = find_insn(file, rela->sym->sec, rela->addend); -- if (!insn) { -- WARN("bad .discard.nospec entry"); -- return -1; -- } -- -- insn->ignore_alts = true; -- } -- -- return 0; --} -- --/* -- * Find the destination instructions for all jumps. -- */ --static int add_jump_destinations(struct objtool_file *file) --{ -- struct instruction *insn; -- struct rela *rela; -- struct section *dest_sec; -- unsigned long dest_off; -- -- for_each_insn(file, insn) { -- if (insn->type != INSN_JUMP_CONDITIONAL && -- insn->type != INSN_JUMP_UNCONDITIONAL) -- continue; -- -- /* skip ignores */ -- if (insn->visited) -- continue; -- -- rela = find_rela_by_dest_range(insn->sec, insn->offset, -- insn->len); -- if (!rela) { -- dest_sec = insn->sec; -- dest_off = insn->offset + insn->len + insn->immediate; -- } else if (rela->sym->type == STT_SECTION) { -- dest_sec = rela->sym->sec; -- dest_off = rela->addend + 4; -- } else if (rela->sym->sec->idx) { -- dest_sec = rela->sym->sec; -- dest_off = rela->sym->sym.st_value + rela->addend + 4; -- } else if (strstr(rela->sym->name, "_indirect_thunk_")) { -- /* -- * Retpoline jumps are really dynamic jumps in -- * disguise, so convert them accordingly. -- */ -- insn->type = INSN_JUMP_DYNAMIC; -- continue; -- } else { -- /* sibling call */ -- insn->jump_dest = 0; -- continue; -- } -- -- insn->jump_dest = find_insn(file, dest_sec, dest_off); -- if (!insn->jump_dest) { -- -- /* -- * This is a special case where an alt instruction -- * jumps past the end of the section. These are -- * handled later in handle_group_alt(). -- */ -- if (!strcmp(insn->sec->name, ".altinstr_replacement")) -- continue; -- -- WARN_FUNC("can't find jump dest instruction at %s+0x%lx", -- insn->sec, insn->offset, dest_sec->name, -- dest_off); -- return -1; -- } -- } -- -- return 0; --} -- --/* -- * Find the destination instructions for all calls. -- */ --static int add_call_destinations(struct objtool_file *file) --{ -- struct instruction *insn; -- unsigned long dest_off; -- struct rela *rela; -- -- for_each_insn(file, insn) { -- if (insn->type != INSN_CALL) -- continue; -- -- rela = find_rela_by_dest_range(insn->sec, insn->offset, -- insn->len); -- if (!rela) { -- dest_off = insn->offset + insn->len + insn->immediate; -- insn->call_dest = find_symbol_by_offset(insn->sec, -- dest_off); -- /* -- * FIXME: Thanks to retpolines, it's now considered -- * normal for a function to call within itself. So -- * disable this warning for now. -- */ --#if 0 -- if (!insn->call_dest) { -- WARN_FUNC("can't find call dest symbol at offset 0x%lx", -- insn->sec, insn->offset, dest_off); -- return -1; -- } --#endif -- } else if (rela->sym->type == STT_SECTION) { -- insn->call_dest = find_symbol_by_offset(rela->sym->sec, -- rela->addend+4); -- if (!insn->call_dest || -- insn->call_dest->type != STT_FUNC) { -- WARN_FUNC("can't find call dest symbol at %s+0x%x", -- insn->sec, insn->offset, -- rela->sym->sec->name, -- rela->addend + 4); -- return -1; -- } -- } else -- insn->call_dest = rela->sym; -- } -- -- return 0; --} -- --/* -- * The .alternatives section requires some extra special care, over and above -- * what other special sections require: -- * -- * 1. Because alternatives are patched in-place, we need to insert a fake jump -- * instruction at the end so that validate_branch() skips all the original -- * replaced instructions when validating the new instruction path. -- * -- * 2. An added wrinkle is that the new instruction length might be zero. In -- * that case the old instructions are replaced with noops. We simulate that -- * by creating a fake jump as the only new instruction. -- * -- * 3. In some cases, the alternative section includes an instruction which -- * conditionally jumps to the _end_ of the entry. We have to modify these -- * jumps' destinations to point back to .text rather than the end of the -- * entry in .altinstr_replacement. -- * -- * 4. It has been requested that we don't validate the !POPCNT feature path -- * which is a "very very small percentage of machines". -- */ --static int handle_group_alt(struct objtool_file *file, -- struct special_alt *special_alt, -- struct instruction *orig_insn, -- struct instruction **new_insn) --{ -- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; -- unsigned long dest_off; -- -- last_orig_insn = NULL; -- insn = orig_insn; -- sec_for_each_insn_from(file, insn) { -- if (insn->offset >= special_alt->orig_off + special_alt->orig_len) -- break; -- -- if (special_alt->skip_orig) -- insn->type = INSN_NOP; -- -- insn->alt_group = true; -- last_orig_insn = insn; -- } -- -- if (!next_insn_same_sec(file, last_orig_insn)) { -- WARN("%s: don't know how to handle alternatives at end of section", -- special_alt->orig_sec->name); -- return -1; -- } -- -- fake_jump = malloc(sizeof(*fake_jump)); -- if (!fake_jump) { -- WARN("malloc failed"); -- return -1; -- } -- memset(fake_jump, 0, sizeof(*fake_jump)); -- INIT_LIST_HEAD(&fake_jump->alts); -- fake_jump->sec = special_alt->new_sec; -- fake_jump->offset = -1; -- fake_jump->type = INSN_JUMP_UNCONDITIONAL; -- fake_jump->jump_dest = list_next_entry(last_orig_insn, list); -- -- if (!special_alt->new_len) { -- *new_insn = fake_jump; -- return 0; -- } -- -- last_new_insn = NULL; -- insn = *new_insn; -- sec_for_each_insn_from(file, insn) { -- if (insn->offset >= special_alt->new_off + special_alt->new_len) -- break; -- -- last_new_insn = insn; -- -- if (insn->type != INSN_JUMP_CONDITIONAL && -- insn->type != INSN_JUMP_UNCONDITIONAL) -- continue; -- -- if (!insn->immediate) -- continue; -- -- dest_off = insn->offset + insn->len + insn->immediate; -- if (dest_off == special_alt->new_off + special_alt->new_len) -- insn->jump_dest = fake_jump; -- -- if (!insn->jump_dest) { -- WARN_FUNC("can't find alternative jump destination", -- insn->sec, insn->offset); -- return -1; -- } -- } -- -- if (!last_new_insn) { -- WARN_FUNC("can't find last new alternative instruction", -- special_alt->new_sec, special_alt->new_off); -- return -1; -- } -- -- list_add(&fake_jump->list, &last_new_insn->list); -- -- return 0; --} -- --/* -- * A jump table entry can either convert a nop to a jump or a jump to a nop. -- * If the original instruction is a jump, make the alt entry an effective nop -- * by just skipping the original instruction. -- */ --static int handle_jump_alt(struct objtool_file *file, -- struct special_alt *special_alt, -- struct instruction *orig_insn, -- struct instruction **new_insn) --{ -- if (orig_insn->type == INSN_NOP) -- return 0; -- -- if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { -- WARN_FUNC("unsupported instruction at jump label", -- orig_insn->sec, orig_insn->offset); -- return -1; -- } -- -- *new_insn = list_next_entry(orig_insn, list); -- return 0; --} -- --/* -- * Read all the special sections which have alternate instructions which can be -- * patched in or redirected to at runtime. Each instruction having alternate -- * instruction(s) has them added to its insn->alts list, which will be -- * traversed in validate_branch(). -- */ --static int add_special_section_alts(struct objtool_file *file) --{ -- struct list_head special_alts; -- struct instruction *orig_insn, *new_insn; -- struct special_alt *special_alt, *tmp; -- struct alternative *alt; -- int ret; -- -- ret = special_get_alts(file->elf, &special_alts); -- if (ret) -- return ret; -- -- list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { -- -- orig_insn = find_insn(file, special_alt->orig_sec, -- special_alt->orig_off); -- if (!orig_insn) { -- WARN_FUNC("special: can't find orig instruction", -- special_alt->orig_sec, special_alt->orig_off); -- ret = -1; -- goto out; -- } -- -- /* Ignore retpoline alternatives. */ -- if (orig_insn->ignore_alts) -- continue; -- -- new_insn = NULL; -- if (!special_alt->group || special_alt->new_len) { -- new_insn = find_insn(file, special_alt->new_sec, -- special_alt->new_off); -- if (!new_insn) { -- WARN_FUNC("special: can't find new instruction", -- special_alt->new_sec, -- special_alt->new_off); -- ret = -1; -- goto out; -- } -- } -- -- if (special_alt->group) { -- ret = handle_group_alt(file, special_alt, orig_insn, -- &new_insn); -- if (ret) -- goto out; -- } else if (special_alt->jump_or_nop) { -- ret = handle_jump_alt(file, special_alt, orig_insn, -- &new_insn); -- if (ret) -- goto out; -- } -- -- alt = malloc(sizeof(*alt)); -- if (!alt) { -- WARN("malloc failed"); -- ret = -1; -- goto out; -- } -- -- alt->insn = new_insn; -- list_add_tail(&alt->list, &orig_insn->alts); -- -- list_del(&special_alt->list); -- free(special_alt); -- } -- --out: -- return ret; --} -- --static int add_switch_table(struct objtool_file *file, struct symbol *func, -- struct instruction *insn, struct rela *table, -- struct rela *next_table) --{ -- struct rela *rela = table; -- struct instruction *alt_insn; -- struct alternative *alt; -- -- list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { -- if (rela == next_table) -- break; -- -- if (rela->sym->sec != insn->sec || -- rela->addend <= func->offset || -- rela->addend >= func->offset + func->len) -- break; -- -- alt_insn = find_insn(file, insn->sec, rela->addend); -- if (!alt_insn) { -- WARN("%s: can't find instruction at %s+0x%x", -- file->rodata->rela->name, insn->sec->name, -- rela->addend); -- return -1; -- } -- -- alt = malloc(sizeof(*alt)); -- if (!alt) { -- WARN("malloc failed"); -- return -1; -- } -- -- alt->insn = alt_insn; -- list_add_tail(&alt->list, &insn->alts); -- } -- -- return 0; --} -- --/* -- * find_switch_table() - Given a dynamic jump, find the switch jump table in -- * .rodata associated with it. -- * -- * There are 3 basic patterns: -- * -- * 1. jmpq *[rodata addr](,%reg,8) -- * -- * This is the most common case by far. It jumps to an address in a simple -- * jump table which is stored in .rodata. -- * -- * 2. jmpq *[rodata addr](%rip) -- * -- * This is caused by a rare GCC quirk, currently only seen in three driver -- * functions in the kernel, only with certain obscure non-distro configs. -- * -- * As part of an optimization, GCC makes a copy of an existing switch jump -- * table, modifies it, and then hard-codes the jump (albeit with an indirect -- * jump) to use a single entry in the table. The rest of the jump table and -- * some of its jump targets remain as dead code. -- * -- * In such a case we can just crudely ignore all unreachable instruction -- * warnings for the entire object file. Ideally we would just ignore them -- * for the function, but that would require redesigning the code quite a -- * bit. And honestly that's just not worth doing: unreachable instruction -- * warnings are of questionable value anyway, and this is such a rare issue. -- * -- * 3. mov [rodata addr],%reg1 -- * ... some instructions ... -- * jmpq *(%reg1,%reg2,8) -- * -- * This is a fairly uncommon pattern which is new for GCC 6. As of this -- * writing, there are 11 occurrences of it in the allmodconfig kernel. -- * -- * TODO: Once we have DWARF CFI and smarter instruction decoding logic, -- * ensure the same register is used in the mov and jump instructions. -- */ --static struct rela *find_switch_table(struct objtool_file *file, -- struct symbol *func, -- struct instruction *insn) --{ -- struct rela *text_rela, *rodata_rela; -- struct instruction *orig_insn = insn; -- -- text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); -- if (text_rela && text_rela->sym == file->rodata->sym) { -- /* case 1 */ -- rodata_rela = find_rela_by_dest(file->rodata, -- text_rela->addend); -- if (rodata_rela) -- return rodata_rela; -- -- /* case 2 */ -- rodata_rela = find_rela_by_dest(file->rodata, -- text_rela->addend + 4); -- if (!rodata_rela) -- return NULL; -- file->ignore_unreachables = true; -- return rodata_rela; -- } -- -- /* case 3 */ -- func_for_each_insn_continue_reverse(file, func, insn) { -- if (insn->type == INSN_JUMP_DYNAMIC) -- break; -- -- /* allow small jumps within the range */ -- if (insn->type == INSN_JUMP_UNCONDITIONAL && -- insn->jump_dest && -- (insn->jump_dest->offset <= insn->offset || -- insn->jump_dest->offset > orig_insn->offset)) -- break; -- -- text_rela = find_rela_by_dest_range(insn->sec, insn->offset, -- insn->len); -- if (text_rela && text_rela->sym == file->rodata->sym) -- return find_rela_by_dest(file->rodata, -- text_rela->addend); -- } -- -- return NULL; --} -- --static int add_func_switch_tables(struct objtool_file *file, -- struct symbol *func) --{ -- struct instruction *insn, *prev_jump = NULL; -- struct rela *rela, *prev_rela = NULL; -- int ret; -- -- func_for_each_insn(file, func, insn) { -- if (insn->type != INSN_JUMP_DYNAMIC) -- continue; -- -- rela = find_switch_table(file, func, insn); -- if (!rela) -- continue; -- -- /* -- * We found a switch table, but we don't know yet how big it -- * is. Don't add it until we reach the end of the function or -- * the beginning of another switch table in the same function. -- */ -- if (prev_jump) { -- ret = add_switch_table(file, func, prev_jump, prev_rela, -- rela); -- if (ret) -- return ret; -- } -- -- prev_jump = insn; -- prev_rela = rela; -- } -- -- if (prev_jump) { -- ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); -- if (ret) -- return ret; -- } -- -- return 0; --} -- --/* -- * For some switch statements, gcc generates a jump table in the .rodata -- * section which contains a list of addresses within the function to jump to. -- * This finds these jump tables and adds them to the insn->alts lists. -- */ --static int add_switch_table_alts(struct objtool_file *file) --{ -- struct section *sec; -- struct symbol *func; -- int ret; -- -- if (!file->rodata || !file->rodata->rela) -- return 0; -- -- list_for_each_entry(sec, &file->elf->sections, list) { -- list_for_each_entry(func, &sec->symbol_list, list) { -- if (func->type != STT_FUNC) -- continue; -- -- ret = add_func_switch_tables(file, func); -- if (ret) -- return ret; -- } -- } -- -- return 0; --} -- --static int decode_sections(struct objtool_file *file) --{ -- int ret; -+#include "check.h" - -- ret = decode_instructions(file); -- if (ret) -- return ret; -+bool nofp; - -- ret = add_dead_ends(file); -- if (ret) -- return ret; -- -- add_ignores(file); -- -- ret = add_nospec_ignores(file); -- if (ret) -- return ret; -- -- ret = add_jump_destinations(file); -- if (ret) -- return ret; -- -- ret = add_call_destinations(file); -- if (ret) -- return ret; -- -- ret = add_special_section_alts(file); -- if (ret) -- return ret; -- -- ret = add_switch_table_alts(file); -- if (ret) -- return ret; -- -- return 0; --} -- --static bool is_fentry_call(struct instruction *insn) --{ -- if (insn->type == INSN_CALL && -- insn->call_dest->type == STT_NOTYPE && -- !strcmp(insn->call_dest->name, "__fentry__")) -- return true; -- -- return false; --} -- --static bool has_modified_stack_frame(struct instruction *insn) --{ -- return (insn->state & STATE_FP_SAVED) || -- (insn->state & STATE_FP_SETUP); --} -- --static bool has_valid_stack_frame(struct instruction *insn) --{ -- return (insn->state & STATE_FP_SAVED) && -- (insn->state & STATE_FP_SETUP); --} -- --static unsigned int frame_state(unsigned long state) --{ -- return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); --} -- --/* -- * Follow the branch starting at the given instruction, and recursively follow -- * any other branches (jumps). Meanwhile, track the frame pointer state at -- * each instruction and validate all the rules described in -- * tools/objtool/Documentation/stack-validation.txt. -- */ --static int validate_branch(struct objtool_file *file, -- struct instruction *first, unsigned char first_state) --{ -- struct alternative *alt; -- struct instruction *insn; -- struct section *sec; -- struct symbol *func = NULL; -- unsigned char state; -- int ret; -- -- insn = first; -- sec = insn->sec; -- state = first_state; -- -- if (insn->alt_group && list_empty(&insn->alts)) { -- WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", -- sec, insn->offset); -- return 1; -- } -- -- while (1) { -- if (file->c_file && insn->func) { -- if (func && func != insn->func) { -- WARN("%s() falls through to next function %s()", -- func->name, insn->func->name); -- return 1; -- } -- -- func = insn->func; -- } -- -- if (insn->visited) { -- if (frame_state(insn->state) != frame_state(state)) { -- WARN_FUNC("frame pointer state mismatch", -- sec, insn->offset); -- return 1; -- } -- -- return 0; -- } -- -- insn->visited = true; -- insn->state = state; -- -- list_for_each_entry(alt, &insn->alts, list) { -- ret = validate_branch(file, alt->insn, state); -- if (ret) -- return 1; -- } -- -- switch (insn->type) { -- -- case INSN_FP_SAVE: -- if (!nofp) { -- if (state & STATE_FP_SAVED) { -- WARN_FUNC("duplicate frame pointer save", -- sec, insn->offset); -- return 1; -- } -- state |= STATE_FP_SAVED; -- } -- break; -- -- case INSN_FP_SETUP: -- if (!nofp) { -- if (state & STATE_FP_SETUP) { -- WARN_FUNC("duplicate frame pointer setup", -- sec, insn->offset); -- return 1; -- } -- state |= STATE_FP_SETUP; -- } -- break; -- -- case INSN_FP_RESTORE: -- if (!nofp) { -- if (has_valid_stack_frame(insn)) -- state &= ~STATE_FP_SETUP; -- -- state &= ~STATE_FP_SAVED; -- } -- break; -- -- case INSN_RETURN: -- if (!nofp && has_modified_stack_frame(insn)) { -- WARN_FUNC("return without frame pointer restore", -- sec, insn->offset); -- return 1; -- } -- return 0; -- -- case INSN_CALL: -- if (is_fentry_call(insn)) { -- state |= STATE_FENTRY; -- break; -- } -- -- ret = dead_end_function(file, insn->call_dest); -- if (ret == 1) -- return 0; -- if (ret == -1) -- return 1; -- -- /* fallthrough */ -- case INSN_CALL_DYNAMIC: -- if (!nofp && !has_valid_stack_frame(insn)) { -- WARN_FUNC("call without frame pointer save/setup", -- sec, insn->offset); -- return 1; -- } -- break; -- -- case INSN_JUMP_CONDITIONAL: -- case INSN_JUMP_UNCONDITIONAL: -- if (insn->jump_dest) { -- ret = validate_branch(file, insn->jump_dest, -- state); -- if (ret) -- return 1; -- } else if (has_modified_stack_frame(insn)) { -- WARN_FUNC("sibling call from callable instruction with changed frame pointer", -- sec, insn->offset); -- return 1; -- } /* else it's a sibling call */ -- -- if (insn->type == INSN_JUMP_UNCONDITIONAL) -- return 0; -- -- break; -- -- case INSN_JUMP_DYNAMIC: -- if (list_empty(&insn->alts) && -- has_modified_stack_frame(insn)) { -- WARN_FUNC("sibling call from callable instruction with changed frame pointer", -- sec, insn->offset); -- return 1; -- } -- -- return 0; -- -- default: -- break; -- } -- -- if (insn->dead_end) -- return 0; -- -- insn = next_insn_same_sec(file, insn); -- if (!insn) { -- WARN("%s: unexpected end of section", sec->name); -- return 1; -- } -- } -- -- return 0; --} -- --static bool is_kasan_insn(struct instruction *insn) --{ -- return (insn->type == INSN_CALL && -- !strcmp(insn->call_dest->name, "__asan_handle_no_return")); --} -- --static bool is_ubsan_insn(struct instruction *insn) --{ -- return (insn->type == INSN_CALL && -- !strcmp(insn->call_dest->name, -- "__ubsan_handle_builtin_unreachable")); --} -- --static bool ignore_unreachable_insn(struct symbol *func, -- struct instruction *insn) --{ -- int i; -- -- if (insn->type == INSN_NOP) -- return true; -- -- /* -- * Check if this (or a subsequent) instruction is related to -- * CONFIG_UBSAN or CONFIG_KASAN. -- * -- * End the search at 5 instructions to avoid going into the weeds. -- */ -- for (i = 0; i < 5; i++) { -- -- if (is_kasan_insn(insn) || is_ubsan_insn(insn)) -- return true; -- -- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { -- insn = insn->jump_dest; -- continue; -- } -- -- if (insn->offset + insn->len >= func->offset + func->len) -- break; -- insn = list_next_entry(insn, list); -- } -- -- return false; --} -- --static int validate_functions(struct objtool_file *file) --{ -- struct section *sec; -- struct symbol *func; -- struct instruction *insn; -- int ret, warnings = 0; -- -- list_for_each_entry(sec, &file->elf->sections, list) { -- list_for_each_entry(func, &sec->symbol_list, list) { -- if (func->type != STT_FUNC) -- continue; -- -- insn = find_insn(file, sec, func->offset); -- if (!insn) -- continue; -- -- ret = validate_branch(file, insn, 0); -- warnings += ret; -- } -- } -- -- list_for_each_entry(sec, &file->elf->sections, list) { -- list_for_each_entry(func, &sec->symbol_list, list) { -- if (func->type != STT_FUNC) -- continue; -- -- func_for_each_insn(file, func, insn) { -- if (insn->visited) -- continue; -- -- insn->visited = true; -- -- if (file->ignore_unreachables || warnings || -- ignore_unreachable_insn(func, insn)) -- continue; -- -- /* -- * gcov produces a lot of unreachable -- * instructions. If we get an unreachable -- * warning and the file has gcov enabled, just -- * ignore it, and all other such warnings for -- * the file. -- */ -- if (!file->ignore_unreachables && -- gcov_enabled(file)) { -- file->ignore_unreachables = true; -- continue; -- } -- -- WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); -- warnings++; -- } -- } -- } -- -- return warnings; --} -- --static int validate_uncallable_instructions(struct objtool_file *file) --{ -- struct instruction *insn; -- int warnings = 0; -- -- for_each_insn(file, insn) { -- if (!insn->visited && insn->type == INSN_RETURN) { -- -- /* -- * Don't warn about call instructions in unvisited -- * retpoline alternatives. -- */ -- if (!strcmp(insn->sec->name, ".altinstr_replacement")) -- continue; -- -- WARN_FUNC("return instruction outside of a callable function", -- insn->sec, insn->offset); -- warnings++; -- } -- } -- -- return warnings; --} -- --static void cleanup(struct objtool_file *file) --{ -- struct instruction *insn, *tmpinsn; -- struct alternative *alt, *tmpalt; -- -- list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { -- list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { -- list_del(&alt->list); -- free(alt); -- } -- list_del(&insn->list); -- hash_del(&insn->hash); -- free(insn); -- } -- elf_close(file->elf); --} -- --const char * const check_usage[] = { -+static const char * const check_usage[] = { - "objtool check [<options>] file.o", - NULL, - }; - -+const struct option check_options[] = { -+ OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), -+ OPT_END(), -+}; -+ - int cmd_check(int argc, const char **argv) - { -- struct objtool_file file; -- int ret, warnings = 0; -+ const char *objname; - -- const struct option options[] = { -- OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), -- OPT_END(), -- }; -- -- argc = parse_options(argc, argv, options, check_usage, 0); -+ argc = parse_options(argc, argv, check_options, check_usage, 0); - - if (argc != 1) -- usage_with_options(check_usage, options); -+ usage_with_options(check_usage, check_options); - - objname = argv[0]; - -- file.elf = elf_open(objname); -- if (!file.elf) { -- fprintf(stderr, "error reading elf file %s\n", objname); -- return 1; -- } -- -- INIT_LIST_HEAD(&file.insn_list); -- hash_init(file.insn_hash); -- file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); -- file.rodata = find_section_by_name(file.elf, ".rodata"); -- file.ignore_unreachables = false; -- file.c_file = find_section_by_name(file.elf, ".comment"); -- -- ret = decode_sections(&file); -- if (ret < 0) -- goto out; -- warnings += ret; -- -- ret = validate_functions(&file); -- if (ret < 0) -- goto out; -- warnings += ret; -- -- ret = validate_uncallable_instructions(&file); -- if (ret < 0) -- goto out; -- warnings += ret; -- --out: -- cleanup(&file); -- -- /* ignore warnings for now until we get all the code cleaned up */ -- if (ret || warnings) -- return 0; -- return 0; -+ return check(objname, nofp); - } -diff --git a/tools/objtool/check.c b/tools/objtool/check.c -new file mode 100644 -index 0000000..b7a0af5 ---- /dev/null -+++ b/tools/objtool/check.c -@@ -0,0 +1,1327 @@ -+/* -+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#include <string.h> -+#include <stdlib.h> -+ -+#include "check.h" -+#include "elf.h" -+#include "special.h" -+#include "arch.h" -+#include "warn.h" -+ -+#include <linux/hashtable.h> -+ -+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -+ -+#define STATE_FP_SAVED 0x1 -+#define STATE_FP_SETUP 0x2 -+#define STATE_FENTRY 0x4 -+ -+struct alternative { -+ struct list_head list; -+ struct instruction *insn; -+}; -+ -+const char *objname; -+static bool nofp; -+ -+static struct instruction *find_insn(struct objtool_file *file, -+ struct section *sec, unsigned long offset) -+{ -+ struct instruction *insn; -+ -+ hash_for_each_possible(file->insn_hash, insn, hash, offset) -+ if (insn->sec == sec && insn->offset == offset) -+ return insn; -+ -+ return NULL; -+} -+ -+static struct instruction *next_insn_same_sec(struct objtool_file *file, -+ struct instruction *insn) -+{ -+ struct instruction *next = list_next_entry(insn, list); -+ -+ if (&next->list == &file->insn_list || next->sec != insn->sec) -+ return NULL; -+ -+ return next; -+} -+ -+static bool gcov_enabled(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct symbol *sym; -+ -+ list_for_each_entry(sec, &file->elf->sections, list) -+ list_for_each_entry(sym, &sec->symbol_list, list) -+ if (!strncmp(sym->name, "__gcov_.", 8)) -+ return true; -+ -+ return false; -+} -+ -+#define for_each_insn(file, insn) \ -+ list_for_each_entry(insn, &file->insn_list, list) -+ -+#define func_for_each_insn(file, func, insn) \ -+ for (insn = find_insn(file, func->sec, func->offset); \ -+ insn && &insn->list != &file->insn_list && \ -+ insn->sec == func->sec && \ -+ insn->offset < func->offset + func->len; \ -+ insn = list_next_entry(insn, list)) -+ -+#define func_for_each_insn_continue_reverse(file, func, insn) \ -+ for (insn = list_prev_entry(insn, list); \ -+ &insn->list != &file->insn_list && \ -+ insn->sec == func->sec && insn->offset >= func->offset; \ -+ insn = list_prev_entry(insn, list)) -+ -+#define sec_for_each_insn_from(file, insn) \ -+ for (; insn; insn = next_insn_same_sec(file, insn)) -+ -+ -+/* -+ * Check if the function has been manually whitelisted with the -+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted -+ * due to its use of a context switching instruction. -+ */ -+static bool ignore_func(struct objtool_file *file, struct symbol *func) -+{ -+ struct rela *rela; -+ struct instruction *insn; -+ -+ /* check for STACK_FRAME_NON_STANDARD */ -+ if (file->whitelist && file->whitelist->rela) -+ list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) { -+ if (rela->sym->type == STT_SECTION && -+ rela->sym->sec == func->sec && -+ rela->addend == func->offset) -+ return true; -+ if (rela->sym->type == STT_FUNC && rela->sym == func) -+ return true; -+ } -+ -+ /* check if it has a context switching instruction */ -+ func_for_each_insn(file, func, insn) -+ if (insn->type == INSN_CONTEXT_SWITCH) -+ return true; -+ -+ return false; -+} -+ -+/* -+ * This checks to see if the given function is a "noreturn" function. -+ * -+ * For global functions which are outside the scope of this object file, we -+ * have to keep a manual list of them. -+ * -+ * For local functions, we have to detect them manually by simply looking for -+ * the lack of a return instruction. -+ * -+ * Returns: -+ * -1: error -+ * 0: no dead end -+ * 1: dead end -+ */ -+static int __dead_end_function(struct objtool_file *file, struct symbol *func, -+ int recursion) -+{ -+ int i; -+ struct instruction *insn; -+ bool empty = true; -+ -+ /* -+ * Unfortunately these have to be hard coded because the noreturn -+ * attribute isn't provided in ELF data. -+ */ -+ static const char * const global_noreturns[] = { -+ "__stack_chk_fail", -+ "panic", -+ "do_exit", -+ "do_task_dead", -+ "__module_put_and_exit", -+ "complete_and_exit", -+ "kvm_spurious_fault", -+ "__reiserfs_panic", -+ "lbug_with_loc" -+ }; -+ -+ if (func->bind == STB_WEAK) -+ return 0; -+ -+ if (func->bind == STB_GLOBAL) -+ for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) -+ if (!strcmp(func->name, global_noreturns[i])) -+ return 1; -+ -+ if (!func->sec) -+ return 0; -+ -+ func_for_each_insn(file, func, insn) { -+ empty = false; -+ -+ if (insn->type == INSN_RETURN) -+ return 0; -+ } -+ -+ if (empty) -+ return 0; -+ -+ /* -+ * A function can have a sibling call instead of a return. In that -+ * case, the function's dead-end status depends on whether the target -+ * of the sibling call returns. -+ */ -+ func_for_each_insn(file, func, insn) { -+ if (insn->sec != func->sec || -+ insn->offset >= func->offset + func->len) -+ break; -+ -+ if (insn->type == INSN_JUMP_UNCONDITIONAL) { -+ struct instruction *dest = insn->jump_dest; -+ struct symbol *dest_func; -+ -+ if (!dest) -+ /* sibling call to another file */ -+ return 0; -+ -+ if (dest->sec != func->sec || -+ dest->offset < func->offset || -+ dest->offset >= func->offset + func->len) { -+ /* local sibling call */ -+ dest_func = find_symbol_by_offset(dest->sec, -+ dest->offset); -+ if (!dest_func) -+ continue; -+ -+ if (recursion == 5) { -+ WARN_FUNC("infinite recursion (objtool bug!)", -+ dest->sec, dest->offset); -+ return -1; -+ } -+ -+ return __dead_end_function(file, dest_func, -+ recursion + 1); -+ } -+ } -+ -+ if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) -+ /* sibling call */ -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static int dead_end_function(struct objtool_file *file, struct symbol *func) -+{ -+ return __dead_end_function(file, func, 0); -+} -+ -+/* -+ * Call the arch-specific instruction decoder for all the instructions and add -+ * them to the global instruction list. -+ */ -+static int decode_instructions(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct symbol *func; -+ unsigned long offset; -+ struct instruction *insn; -+ int ret; -+ -+ list_for_each_entry(sec, &file->elf->sections, list) { -+ -+ if (!(sec->sh.sh_flags & SHF_EXECINSTR)) -+ continue; -+ -+ for (offset = 0; offset < sec->len; offset += insn->len) { -+ insn = malloc(sizeof(*insn)); -+ memset(insn, 0, sizeof(*insn)); -+ -+ INIT_LIST_HEAD(&insn->alts); -+ insn->sec = sec; -+ insn->offset = offset; -+ -+ ret = arch_decode_instruction(file->elf, sec, offset, -+ sec->len - offset, -+ &insn->len, &insn->type, -+ &insn->immediate); -+ if (ret) -+ return ret; -+ -+ if (!insn->type || insn->type > INSN_LAST) { -+ WARN_FUNC("invalid instruction type %d", -+ insn->sec, insn->offset, insn->type); -+ return -1; -+ } -+ -+ hash_add(file->insn_hash, &insn->hash, insn->offset); -+ list_add_tail(&insn->list, &file->insn_list); -+ } -+ -+ list_for_each_entry(func, &sec->symbol_list, list) { -+ if (func->type != STT_FUNC) -+ continue; -+ -+ if (!find_insn(file, sec, func->offset)) { -+ WARN("%s(): can't find starting instruction", -+ func->name); -+ return -1; -+ } -+ -+ func_for_each_insn(file, func, insn) -+ if (!insn->func) -+ insn->func = func; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * Find all uses of the unreachable() macro, which are code path dead ends. -+ */ -+static int add_dead_ends(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct rela *rela; -+ struct instruction *insn; -+ bool found; -+ -+ sec = find_section_by_name(file->elf, ".rela__unreachable"); -+ if (!sec) -+ return 0; -+ -+ list_for_each_entry(rela, &sec->rela_list, list) { -+ if (rela->sym->type != STT_SECTION) { -+ WARN("unexpected relocation symbol type in .rela__unreachable"); -+ return -1; -+ } -+ insn = find_insn(file, rela->sym->sec, rela->addend); -+ if (insn) -+ insn = list_prev_entry(insn, list); -+ else if (rela->addend == rela->sym->sec->len) { -+ found = false; -+ list_for_each_entry_reverse(insn, &file->insn_list, list) { -+ if (insn->sec == rela->sym->sec) { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) { -+ WARN("can't find unreachable insn at %s+0x%x", -+ rela->sym->sec->name, rela->addend); -+ return -1; -+ } -+ } else { -+ WARN("can't find unreachable insn at %s+0x%x", -+ rela->sym->sec->name, rela->addend); -+ return -1; -+ } -+ -+ insn->dead_end = true; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Warnings shouldn't be reported for ignored functions. -+ */ -+static void add_ignores(struct objtool_file *file) -+{ -+ struct instruction *insn; -+ struct section *sec; -+ struct symbol *func; -+ -+ list_for_each_entry(sec, &file->elf->sections, list) { -+ list_for_each_entry(func, &sec->symbol_list, list) { -+ if (func->type != STT_FUNC) -+ continue; -+ -+ if (!ignore_func(file, func)) -+ continue; -+ -+ func_for_each_insn(file, func, insn) -+ insn->visited = true; -+ } -+ } -+} -+ -+/* -+ * FIXME: For now, just ignore any alternatives which add retpolines. This is -+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. -+ * But it at least allows objtool to understand the control flow *around* the -+ * retpoline. -+ */ -+static int add_nospec_ignores(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct rela *rela; -+ struct instruction *insn; -+ -+ sec = find_section_by_name(file->elf, ".rela.discard.nospec"); -+ if (!sec) -+ return 0; -+ -+ list_for_each_entry(rela, &sec->rela_list, list) { -+ if (rela->sym->type != STT_SECTION) { -+ WARN("unexpected relocation symbol type in %s", sec->name); -+ return -1; -+ } -+ -+ insn = find_insn(file, rela->sym->sec, rela->addend); -+ if (!insn) { -+ WARN("bad .discard.nospec entry"); -+ return -1; -+ } -+ -+ insn->ignore_alts = true; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Find the destination instructions for all jumps. -+ */ -+static int add_jump_destinations(struct objtool_file *file) -+{ -+ struct instruction *insn; -+ struct rela *rela; -+ struct section *dest_sec; -+ unsigned long dest_off; -+ -+ for_each_insn(file, insn) { -+ if (insn->type != INSN_JUMP_CONDITIONAL && -+ insn->type != INSN_JUMP_UNCONDITIONAL) -+ continue; -+ -+ /* skip ignores */ -+ if (insn->visited) -+ continue; -+ -+ rela = find_rela_by_dest_range(insn->sec, insn->offset, -+ insn->len); -+ if (!rela) { -+ dest_sec = insn->sec; -+ dest_off = insn->offset + insn->len + insn->immediate; -+ } else if (rela->sym->type == STT_SECTION) { -+ dest_sec = rela->sym->sec; -+ dest_off = rela->addend + 4; -+ } else if (rela->sym->sec->idx) { -+ dest_sec = rela->sym->sec; -+ dest_off = rela->sym->sym.st_value + rela->addend + 4; -+ } else if (strstr(rela->sym->name, "_indirect_thunk_")) { -+ /* -+ * Retpoline jumps are really dynamic jumps in -+ * disguise, so convert them accordingly. -+ */ -+ insn->type = INSN_JUMP_DYNAMIC; -+ continue; -+ } else { -+ /* sibling call */ -+ insn->jump_dest = 0; -+ continue; -+ } -+ -+ insn->jump_dest = find_insn(file, dest_sec, dest_off); -+ if (!insn->jump_dest) { -+ -+ /* -+ * This is a special case where an alt instruction -+ * jumps past the end of the section. These are -+ * handled later in handle_group_alt(). -+ */ -+ if (!strcmp(insn->sec->name, ".altinstr_replacement")) -+ continue; -+ -+ WARN_FUNC("can't find jump dest instruction at %s+0x%lx", -+ insn->sec, insn->offset, dest_sec->name, -+ dest_off); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * Find the destination instructions for all calls. -+ */ -+static int add_call_destinations(struct objtool_file *file) -+{ -+ struct instruction *insn; -+ unsigned long dest_off; -+ struct rela *rela; -+ -+ for_each_insn(file, insn) { -+ if (insn->type != INSN_CALL) -+ continue; -+ -+ rela = find_rela_by_dest_range(insn->sec, insn->offset, -+ insn->len); -+ if (!rela) { -+ dest_off = insn->offset + insn->len + insn->immediate; -+ insn->call_dest = find_symbol_by_offset(insn->sec, -+ dest_off); -+ /* -+ * FIXME: Thanks to retpolines, it's now considered -+ * normal for a function to call within itself. So -+ * disable this warning for now. -+ */ -+#if 0 -+ if (!insn->call_dest) { -+ WARN_FUNC("can't find call dest symbol at offset 0x%lx", -+ insn->sec, insn->offset, dest_off); -+ return -1; -+ } -+#endif -+ } else if (rela->sym->type == STT_SECTION) { -+ insn->call_dest = find_symbol_by_offset(rela->sym->sec, -+ rela->addend+4); -+ if (!insn->call_dest || -+ insn->call_dest->type != STT_FUNC) { -+ WARN_FUNC("can't find call dest symbol at %s+0x%x", -+ insn->sec, insn->offset, -+ rela->sym->sec->name, -+ rela->addend + 4); -+ return -1; -+ } -+ } else -+ insn->call_dest = rela->sym; -+ } -+ -+ return 0; -+} -+ -+/* -+ * The .alternatives section requires some extra special care, over and above -+ * what other special sections require: -+ * -+ * 1. Because alternatives are patched in-place, we need to insert a fake jump -+ * instruction at the end so that validate_branch() skips all the original -+ * replaced instructions when validating the new instruction path. -+ * -+ * 2. An added wrinkle is that the new instruction length might be zero. In -+ * that case the old instructions are replaced with noops. We simulate that -+ * by creating a fake jump as the only new instruction. -+ * -+ * 3. In some cases, the alternative section includes an instruction which -+ * conditionally jumps to the _end_ of the entry. We have to modify these -+ * jumps' destinations to point back to .text rather than the end of the -+ * entry in .altinstr_replacement. -+ * -+ * 4. It has been requested that we don't validate the !POPCNT feature path -+ * which is a "very very small percentage of machines". -+ */ -+static int handle_group_alt(struct objtool_file *file, -+ struct special_alt *special_alt, -+ struct instruction *orig_insn, -+ struct instruction **new_insn) -+{ -+ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; -+ unsigned long dest_off; -+ -+ last_orig_insn = NULL; -+ insn = orig_insn; -+ sec_for_each_insn_from(file, insn) { -+ if (insn->offset >= special_alt->orig_off + special_alt->orig_len) -+ break; -+ -+ if (special_alt->skip_orig) -+ insn->type = INSN_NOP; -+ -+ insn->alt_group = true; -+ last_orig_insn = insn; -+ } -+ -+ if (!next_insn_same_sec(file, last_orig_insn)) { -+ WARN("%s: don't know how to handle alternatives at end of section", -+ special_alt->orig_sec->name); -+ return -1; -+ } -+ -+ fake_jump = malloc(sizeof(*fake_jump)); -+ if (!fake_jump) { -+ WARN("malloc failed"); -+ return -1; -+ } -+ memset(fake_jump, 0, sizeof(*fake_jump)); -+ INIT_LIST_HEAD(&fake_jump->alts); -+ fake_jump->sec = special_alt->new_sec; -+ fake_jump->offset = -1; -+ fake_jump->type = INSN_JUMP_UNCONDITIONAL; -+ fake_jump->jump_dest = list_next_entry(last_orig_insn, list); -+ -+ if (!special_alt->new_len) { -+ *new_insn = fake_jump; -+ return 0; -+ } -+ -+ last_new_insn = NULL; -+ insn = *new_insn; -+ sec_for_each_insn_from(file, insn) { -+ if (insn->offset >= special_alt->new_off + special_alt->new_len) -+ break; -+ -+ last_new_insn = insn; -+ -+ if (insn->type != INSN_JUMP_CONDITIONAL && -+ insn->type != INSN_JUMP_UNCONDITIONAL) -+ continue; -+ -+ if (!insn->immediate) -+ continue; -+ -+ dest_off = insn->offset + insn->len + insn->immediate; -+ if (dest_off == special_alt->new_off + special_alt->new_len) -+ insn->jump_dest = fake_jump; -+ -+ if (!insn->jump_dest) { -+ WARN_FUNC("can't find alternative jump destination", -+ insn->sec, insn->offset); -+ return -1; -+ } -+ } -+ -+ if (!last_new_insn) { -+ WARN_FUNC("can't find last new alternative instruction", -+ special_alt->new_sec, special_alt->new_off); -+ return -1; -+ } -+ -+ list_add(&fake_jump->list, &last_new_insn->list); -+ -+ return 0; -+} -+ -+/* -+ * A jump table entry can either convert a nop to a jump or a jump to a nop. -+ * If the original instruction is a jump, make the alt entry an effective nop -+ * by just skipping the original instruction. -+ */ -+static int handle_jump_alt(struct objtool_file *file, -+ struct special_alt *special_alt, -+ struct instruction *orig_insn, -+ struct instruction **new_insn) -+{ -+ if (orig_insn->type == INSN_NOP) -+ return 0; -+ -+ if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { -+ WARN_FUNC("unsupported instruction at jump label", -+ orig_insn->sec, orig_insn->offset); -+ return -1; -+ } -+ -+ *new_insn = list_next_entry(orig_insn, list); -+ return 0; -+} -+ -+/* -+ * Read all the special sections which have alternate instructions which can be -+ * patched in or redirected to at runtime. Each instruction having alternate -+ * instruction(s) has them added to its insn->alts list, which will be -+ * traversed in validate_branch(). -+ */ -+static int add_special_section_alts(struct objtool_file *file) -+{ -+ struct list_head special_alts; -+ struct instruction *orig_insn, *new_insn; -+ struct special_alt *special_alt, *tmp; -+ struct alternative *alt; -+ int ret; -+ -+ ret = special_get_alts(file->elf, &special_alts); -+ if (ret) -+ return ret; -+ -+ list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { -+ orig_insn = find_insn(file, special_alt->orig_sec, -+ special_alt->orig_off); -+ if (!orig_insn) { -+ WARN_FUNC("special: can't find orig instruction", -+ special_alt->orig_sec, special_alt->orig_off); -+ ret = -1; -+ goto out; -+ } -+ -+ /* Ignore retpoline alternatives. */ -+ if (orig_insn->ignore_alts) -+ continue; -+ -+ new_insn = NULL; -+ if (!special_alt->group || special_alt->new_len) { -+ new_insn = find_insn(file, special_alt->new_sec, -+ special_alt->new_off); -+ if (!new_insn) { -+ WARN_FUNC("special: can't find new instruction", -+ special_alt->new_sec, -+ special_alt->new_off); -+ ret = -1; -+ goto out; -+ } -+ } -+ -+ if (special_alt->group) { -+ ret = handle_group_alt(file, special_alt, orig_insn, -+ &new_insn); -+ if (ret) -+ goto out; -+ } else if (special_alt->jump_or_nop) { -+ ret = handle_jump_alt(file, special_alt, orig_insn, -+ &new_insn); -+ if (ret) -+ goto out; -+ } -+ -+ alt = malloc(sizeof(*alt)); -+ if (!alt) { -+ WARN("malloc failed"); -+ ret = -1; -+ goto out; -+ } -+ -+ alt->insn = new_insn; -+ list_add_tail(&alt->list, &orig_insn->alts); -+ -+ list_del(&special_alt->list); -+ free(special_alt); -+ } -+ -+out: -+ return ret; -+} -+ -+static int add_switch_table(struct objtool_file *file, struct symbol *func, -+ struct instruction *insn, struct rela *table, -+ struct rela *next_table) -+{ -+ struct rela *rela = table; -+ struct instruction *alt_insn; -+ struct alternative *alt; -+ -+ list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { -+ if (rela == next_table) -+ break; -+ -+ if (rela->sym->sec != insn->sec || -+ rela->addend <= func->offset || -+ rela->addend >= func->offset + func->len) -+ break; -+ -+ alt_insn = find_insn(file, insn->sec, rela->addend); -+ if (!alt_insn) { -+ WARN("%s: can't find instruction at %s+0x%x", -+ file->rodata->rela->name, insn->sec->name, -+ rela->addend); -+ return -1; -+ } -+ -+ alt = malloc(sizeof(*alt)); -+ if (!alt) { -+ WARN("malloc failed"); -+ return -1; -+ } -+ -+ alt->insn = alt_insn; -+ list_add_tail(&alt->list, &insn->alts); -+ } -+ -+ return 0; -+} -+ -+/* -+ * find_switch_table() - Given a dynamic jump, find the switch jump table in -+ * .rodata associated with it. -+ * -+ * There are 3 basic patterns: -+ * -+ * 1. jmpq *[rodata addr](,%reg,8) -+ * -+ * This is the most common case by far. It jumps to an address in a simple -+ * jump table which is stored in .rodata. -+ * -+ * 2. jmpq *[rodata addr](%rip) -+ * -+ * This is caused by a rare GCC quirk, currently only seen in three driver -+ * functions in the kernel, only with certain obscure non-distro configs. -+ * -+ * As part of an optimization, GCC makes a copy of an existing switch jump -+ * table, modifies it, and then hard-codes the jump (albeit with an indirect -+ * jump) to use a single entry in the table. The rest of the jump table and -+ * some of its jump targets remain as dead code. -+ * -+ * In such a case we can just crudely ignore all unreachable instruction -+ * warnings for the entire object file. Ideally we would just ignore them -+ * for the function, but that would require redesigning the code quite a -+ * bit. And honestly that's just not worth doing: unreachable instruction -+ * warnings are of questionable value anyway, and this is such a rare issue. -+ * -+ * 3. mov [rodata addr],%reg1 -+ * ... some instructions ... -+ * jmpq *(%reg1,%reg2,8) -+ * -+ * This is a fairly uncommon pattern which is new for GCC 6. As of this -+ * writing, there are 11 occurrences of it in the allmodconfig kernel. -+ * -+ * TODO: Once we have DWARF CFI and smarter instruction decoding logic, -+ * ensure the same register is used in the mov and jump instructions. -+ */ -+static struct rela *find_switch_table(struct objtool_file *file, -+ struct symbol *func, -+ struct instruction *insn) -+{ -+ struct rela *text_rela, *rodata_rela; -+ struct instruction *orig_insn = insn; -+ -+ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); -+ if (text_rela && text_rela->sym == file->rodata->sym) { -+ /* case 1 */ -+ rodata_rela = find_rela_by_dest(file->rodata, -+ text_rela->addend); -+ if (rodata_rela) -+ return rodata_rela; -+ -+ /* case 2 */ -+ rodata_rela = find_rela_by_dest(file->rodata, -+ text_rela->addend + 4); -+ if (!rodata_rela) -+ return NULL; -+ file->ignore_unreachables = true; -+ return rodata_rela; -+ } -+ -+ /* case 3 */ -+ func_for_each_insn_continue_reverse(file, func, insn) { -+ if (insn->type == INSN_JUMP_DYNAMIC) -+ break; -+ -+ /* allow small jumps within the range */ -+ if (insn->type == INSN_JUMP_UNCONDITIONAL && -+ insn->jump_dest && -+ (insn->jump_dest->offset <= insn->offset || -+ insn->jump_dest->offset > orig_insn->offset)) -+ break; -+ -+ /* look for a relocation which references .rodata */ -+ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, -+ insn->len); -+ if (!text_rela || text_rela->sym != file->rodata->sym) -+ continue; -+ -+ /* -+ * Make sure the .rodata address isn't associated with a -+ * symbol. gcc jump tables are anonymous data. -+ */ -+ if (find_symbol_containing(file->rodata, text_rela->addend)) -+ continue; -+ -+ return find_rela_by_dest(file->rodata, text_rela->addend); -+ } -+ -+ return NULL; -+} -+ -+static int add_func_switch_tables(struct objtool_file *file, -+ struct symbol *func) -+{ -+ struct instruction *insn, *prev_jump = NULL; -+ struct rela *rela, *prev_rela = NULL; -+ int ret; -+ -+ func_for_each_insn(file, func, insn) { -+ if (insn->type != INSN_JUMP_DYNAMIC) -+ continue; -+ -+ rela = find_switch_table(file, func, insn); -+ if (!rela) -+ continue; -+ -+ /* -+ * We found a switch table, but we don't know yet how big it -+ * is. Don't add it until we reach the end of the function or -+ * the beginning of another switch table in the same function. -+ */ -+ if (prev_jump) { -+ ret = add_switch_table(file, func, prev_jump, prev_rela, -+ rela); -+ if (ret) -+ return ret; -+ } -+ -+ prev_jump = insn; -+ prev_rela = rela; -+ } -+ -+ if (prev_jump) { -+ ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/* -+ * For some switch statements, gcc generates a jump table in the .rodata -+ * section which contains a list of addresses within the function to jump to. -+ * This finds these jump tables and adds them to the insn->alts lists. -+ */ -+static int add_switch_table_alts(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct symbol *func; -+ int ret; -+ -+ if (!file->rodata || !file->rodata->rela) -+ return 0; -+ -+ list_for_each_entry(sec, &file->elf->sections, list) { -+ list_for_each_entry(func, &sec->symbol_list, list) { -+ if (func->type != STT_FUNC) -+ continue; -+ -+ ret = add_func_switch_tables(file, func); -+ if (ret) -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+static int decode_sections(struct objtool_file *file) -+{ -+ int ret; -+ -+ ret = decode_instructions(file); -+ if (ret) -+ return ret; -+ -+ ret = add_dead_ends(file); -+ if (ret) -+ return ret; -+ -+ add_ignores(file); -+ -+ ret = add_nospec_ignores(file); -+ if (ret) -+ return ret; -+ -+ ret = add_jump_destinations(file); -+ if (ret) -+ return ret; -+ -+ ret = add_call_destinations(file); -+ if (ret) -+ return ret; -+ -+ ret = add_special_section_alts(file); -+ if (ret) -+ return ret; -+ -+ ret = add_switch_table_alts(file); -+ if (ret) -+ return ret; -+ -+ return 0; -+} -+ -+static bool is_fentry_call(struct instruction *insn) -+{ -+ if (insn->type == INSN_CALL && -+ insn->call_dest->type == STT_NOTYPE && -+ !strcmp(insn->call_dest->name, "__fentry__")) -+ return true; -+ -+ return false; -+} -+ -+static bool has_modified_stack_frame(struct instruction *insn) -+{ -+ return (insn->state & STATE_FP_SAVED) || -+ (insn->state & STATE_FP_SETUP); -+} -+ -+static bool has_valid_stack_frame(struct instruction *insn) -+{ -+ return (insn->state & STATE_FP_SAVED) && -+ (insn->state & STATE_FP_SETUP); -+} -+ -+static unsigned int frame_state(unsigned long state) -+{ -+ return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); -+} -+ -+/* -+ * Follow the branch starting at the given instruction, and recursively follow -+ * any other branches (jumps). Meanwhile, track the frame pointer state at -+ * each instruction and validate all the rules described in -+ * tools/objtool/Documentation/stack-validation.txt. -+ */ -+static int validate_branch(struct objtool_file *file, -+ struct instruction *first, unsigned char first_state) -+{ -+ struct alternative *alt; -+ struct instruction *insn; -+ struct section *sec; -+ struct symbol *func = NULL; -+ unsigned char state; -+ int ret; -+ -+ insn = first; -+ sec = insn->sec; -+ state = first_state; -+ -+ if (insn->alt_group && list_empty(&insn->alts)) { -+ WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", -+ sec, insn->offset); -+ return 1; -+ } -+ -+ while (1) { -+ if (file->c_file && insn->func) { -+ if (func && func != insn->func) { -+ WARN("%s() falls through to next function %s()", -+ func->name, insn->func->name); -+ return 1; -+ } -+ -+ func = insn->func; -+ } -+ -+ if (insn->visited) { -+ if (frame_state(insn->state) != frame_state(state)) { -+ WARN_FUNC("frame pointer state mismatch", -+ sec, insn->offset); -+ return 1; -+ } -+ -+ return 0; -+ } -+ -+ insn->visited = true; -+ insn->state = state; -+ -+ list_for_each_entry(alt, &insn->alts, list) { -+ ret = validate_branch(file, alt->insn, state); -+ if (ret) -+ return 1; -+ } -+ -+ switch (insn->type) { -+ -+ case INSN_FP_SAVE: -+ if (!nofp) { -+ if (state & STATE_FP_SAVED) { -+ WARN_FUNC("duplicate frame pointer save", -+ sec, insn->offset); -+ return 1; -+ } -+ state |= STATE_FP_SAVED; -+ } -+ break; -+ -+ case INSN_FP_SETUP: -+ if (!nofp) { -+ if (state & STATE_FP_SETUP) { -+ WARN_FUNC("duplicate frame pointer setup", -+ sec, insn->offset); -+ return 1; -+ } -+ state |= STATE_FP_SETUP; -+ } -+ break; -+ -+ case INSN_FP_RESTORE: -+ if (!nofp) { -+ if (has_valid_stack_frame(insn)) -+ state &= ~STATE_FP_SETUP; -+ -+ state &= ~STATE_FP_SAVED; -+ } -+ break; -+ -+ case INSN_RETURN: -+ if (!nofp && has_modified_stack_frame(insn)) { -+ WARN_FUNC("return without frame pointer restore", -+ sec, insn->offset); -+ return 1; -+ } -+ return 0; -+ -+ case INSN_CALL: -+ if (is_fentry_call(insn)) { -+ state |= STATE_FENTRY; -+ break; -+ } -+ -+ ret = dead_end_function(file, insn->call_dest); -+ if (ret == 1) -+ return 0; -+ if (ret == -1) -+ return 1; -+ -+ /* fallthrough */ -+ case INSN_CALL_DYNAMIC: -+ if (!nofp && !has_valid_stack_frame(insn)) { -+ WARN_FUNC("call without frame pointer save/setup", -+ sec, insn->offset); -+ return 1; -+ } -+ break; -+ -+ case INSN_JUMP_CONDITIONAL: -+ case INSN_JUMP_UNCONDITIONAL: -+ if (insn->jump_dest) { -+ ret = validate_branch(file, insn->jump_dest, -+ state); -+ if (ret) -+ return 1; -+ } else if (has_modified_stack_frame(insn)) { -+ WARN_FUNC("sibling call from callable instruction with changed frame pointer", -+ sec, insn->offset); -+ return 1; -+ } /* else it's a sibling call */ -+ -+ if (insn->type == INSN_JUMP_UNCONDITIONAL) -+ return 0; -+ -+ break; -+ -+ case INSN_JUMP_DYNAMIC: -+ if (list_empty(&insn->alts) && -+ has_modified_stack_frame(insn)) { -+ WARN_FUNC("sibling call from callable instruction with changed frame pointer", -+ sec, insn->offset); -+ return 1; -+ } -+ -+ return 0; -+ -+ default: -+ break; -+ } -+ -+ if (insn->dead_end) -+ return 0; -+ -+ insn = next_insn_same_sec(file, insn); -+ if (!insn) { -+ WARN("%s: unexpected end of section", sec->name); -+ return 1; -+ } -+ } -+ -+ return 0; -+} -+ -+static bool is_kasan_insn(struct instruction *insn) -+{ -+ return (insn->type == INSN_CALL && -+ !strcmp(insn->call_dest->name, "__asan_handle_no_return")); -+} -+ -+static bool is_ubsan_insn(struct instruction *insn) -+{ -+ return (insn->type == INSN_CALL && -+ !strcmp(insn->call_dest->name, -+ "__ubsan_handle_builtin_unreachable")); -+} -+ -+static bool ignore_unreachable_insn(struct symbol *func, -+ struct instruction *insn) -+{ -+ int i; -+ -+ if (insn->type == INSN_NOP) -+ return true; -+ -+ /* -+ * Check if this (or a subsequent) instruction is related to -+ * CONFIG_UBSAN or CONFIG_KASAN. -+ * -+ * End the search at 5 instructions to avoid going into the weeds. -+ */ -+ for (i = 0; i < 5; i++) { -+ -+ if (is_kasan_insn(insn) || is_ubsan_insn(insn)) -+ return true; -+ -+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { -+ insn = insn->jump_dest; -+ continue; -+ } -+ -+ if (insn->offset + insn->len >= func->offset + func->len) -+ break; -+ insn = list_next_entry(insn, list); -+ } -+ -+ return false; -+} -+ -+static int validate_functions(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct symbol *func; -+ struct instruction *insn; -+ int ret, warnings = 0; -+ -+ list_for_each_entry(sec, &file->elf->sections, list) { -+ list_for_each_entry(func, &sec->symbol_list, list) { -+ if (func->type != STT_FUNC) -+ continue; -+ -+ insn = find_insn(file, sec, func->offset); -+ if (!insn) -+ continue; -+ -+ ret = validate_branch(file, insn, 0); -+ warnings += ret; -+ } -+ } -+ -+ list_for_each_entry(sec, &file->elf->sections, list) { -+ list_for_each_entry(func, &sec->symbol_list, list) { -+ if (func->type != STT_FUNC) -+ continue; -+ -+ func_for_each_insn(file, func, insn) { -+ if (insn->visited) -+ continue; -+ -+ insn->visited = true; -+ -+ if (file->ignore_unreachables || warnings || -+ ignore_unreachable_insn(func, insn)) -+ continue; -+ -+ /* -+ * gcov produces a lot of unreachable -+ * instructions. If we get an unreachable -+ * warning and the file has gcov enabled, just -+ * ignore it, and all other such warnings for -+ * the file. -+ */ -+ if (!file->ignore_unreachables && -+ gcov_enabled(file)) { -+ file->ignore_unreachables = true; -+ continue; -+ } -+ -+ WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); -+ warnings++; -+ } -+ } -+ } -+ -+ return warnings; -+} -+ -+static int validate_uncallable_instructions(struct objtool_file *file) -+{ -+ struct instruction *insn; -+ int warnings = 0; -+ -+ for_each_insn(file, insn) { -+ if (!insn->visited && insn->type == INSN_RETURN) { -+ -+ /* -+ * Don't warn about call instructions in unvisited -+ * retpoline alternatives. -+ */ -+ if (!strcmp(insn->sec->name, ".altinstr_replacement")) -+ continue; -+ -+ WARN_FUNC("return instruction outside of a callable function", -+ insn->sec, insn->offset); -+ warnings++; -+ } -+ } -+ -+ return warnings; -+} -+ -+static void cleanup(struct objtool_file *file) -+{ -+ struct instruction *insn, *tmpinsn; -+ struct alternative *alt, *tmpalt; -+ -+ list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { -+ list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { -+ list_del(&alt->list); -+ free(alt); -+ } -+ list_del(&insn->list); -+ hash_del(&insn->hash); -+ free(insn); -+ } -+ elf_close(file->elf); -+} -+ -+int check(const char *_objname, bool _nofp) -+{ -+ struct objtool_file file; -+ int ret, warnings = 0; -+ -+ objname = _objname; -+ nofp = _nofp; -+ -+ file.elf = elf_open(objname); -+ if (!file.elf) { -+ fprintf(stderr, "error reading elf file %s\n", objname); -+ return 1; -+ } -+ -+ INIT_LIST_HEAD(&file.insn_list); -+ hash_init(file.insn_hash); -+ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); -+ file.rodata = find_section_by_name(file.elf, ".rodata"); -+ file.ignore_unreachables = false; -+ file.c_file = find_section_by_name(file.elf, ".comment"); -+ -+ ret = decode_sections(&file); -+ if (ret < 0) -+ goto out; -+ warnings += ret; -+ -+ ret = validate_functions(&file); -+ if (ret < 0) -+ goto out; -+ warnings += ret; -+ -+ ret = validate_uncallable_instructions(&file); -+ if (ret < 0) -+ goto out; -+ warnings += ret; -+ -+out: -+ cleanup(&file); -+ -+ /* ignore warnings for now until we get all the code cleaned up */ -+ if (ret || warnings) -+ return 0; -+ return 0; -+} -diff --git a/tools/objtool/check.h b/tools/objtool/check.h -new file mode 100644 -index 0000000..aca248a ---- /dev/null -+++ b/tools/objtool/check.h -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#ifndef _CHECK_H -+#define _CHECK_H -+ -+#include <stdbool.h> -+#include "elf.h" -+#include "arch.h" -+#include <linux/hashtable.h> -+ -+struct instruction { -+ struct list_head list; -+ struct hlist_node hash; -+ struct section *sec; -+ unsigned long offset; -+ unsigned int len, state; -+ unsigned char type; -+ unsigned long immediate; -+ bool alt_group, visited, dead_end, ignore_alts; -+ struct symbol *call_dest; -+ struct instruction *jump_dest; -+ struct list_head alts; -+ struct symbol *func; -+}; -+ -+struct objtool_file { -+ struct elf *elf; -+ struct list_head insn_list; -+ DECLARE_HASHTABLE(insn_hash, 16); -+ struct section *rodata, *whitelist; -+ bool ignore_unreachables, c_file; -+}; -+ -+int check(const char *objname, bool nofp); -+ -+#endif /* _CHECK_H */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch deleted file mode 100644 index cac6deac..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch +++ /dev/null @@ -1,83 +0,0 @@ -From fdeb7a1f0247fca891d02ce491582ba950f7ee15 Mon Sep 17 00:00:00 2001 -From: Masami Hiramatsu <mhiramat@kernel.org> -Date: Fri, 19 Jan 2018 01:15:20 +0900 -Subject: [PATCH 088/103] kprobes/x86: Disable optimizing on the function jumps - to indirect thunk - -commit c86a32c09f8ced67971a2310e3b0dda4d1749007 upstream. - -Since indirect jump instructions will be replaced by jump -to __x86_indirect_thunk_*, those jmp instruction must be -treated as an indirect jump. Since optprobe prohibits to -optimize probes in the function which uses an indirect jump, -it also needs to find out the function which jump to -__x86_indirect_thunk_* and disable optimization. - -Add a check that the jump target address is between the -__indirect_thunk_start/end when optimizing kprobe. - -Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> -Cc: Arjan van de Ven <arjan@linux.intel.com> -Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> -Link: https://lkml.kernel.org/r/151629212062.10241.6991266100233002273.stgit@devbox -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/kprobes/opt.c | 23 ++++++++++++++++++++++- - 1 file changed, 22 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c -index 3bb4c5f..90f8cd6 100644 ---- a/arch/x86/kernel/kprobes/opt.c -+++ b/arch/x86/kernel/kprobes/opt.c -@@ -37,6 +37,7 @@ - #include <asm/alternative.h> - #include <asm/insn.h> - #include <asm/debugreg.h> -+#include <asm/nospec-branch.h> - - #include "common.h" - -@@ -192,7 +193,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) - } - - /* Check whether insn is indirect jump */ --static int insn_is_indirect_jump(struct insn *insn) -+static int __insn_is_indirect_jump(struct insn *insn) - { - return ((insn->opcode.bytes[0] == 0xff && - (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ -@@ -226,6 +227,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) - return (start <= target && target <= start + len); - } - -+static int insn_is_indirect_jump(struct insn *insn) -+{ -+ int ret = __insn_is_indirect_jump(insn); -+ -+#ifdef CONFIG_RETPOLINE -+ /* -+ * Jump to x86_indirect_thunk_* is treated as an indirect jump. -+ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with -+ * older gcc may use indirect jump. So we add this check instead of -+ * replace indirect-jump check. -+ */ -+ if (!ret) -+ ret = insn_jump_into_range(insn, -+ (unsigned long)__indirect_thunk_start, -+ (unsigned long)__indirect_thunk_end - -+ (unsigned long)__indirect_thunk_start); -+#endif -+ return ret; -+} -+ - /* Decode whole function to ensure any instructions don't jump into target */ - static int can_optimize(unsigned long paddr) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch deleted file mode 100644 index 318297bf..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch +++ /dev/null @@ -1,9906 +0,0 @@ -From 0706298ca42f992d0c1afb93c8d6710d15f88ccb Mon Sep 17 00:00:00 2001 -From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Date: Sun, 3 Jun 2018 12:35:15 +0200 -Subject: [PATCH 88/93] objtool: sync up with the 4.14.47 version of objtool - -There are pros and cons of dealing with tools in the kernel directory. -The pros are the fact that development happens fast, and new features -can be added to the kernel and the tools at the same times. The cons -are when dealing with backported kernel patches, it can be necessary to -backport parts of the tool changes as well. - -For 4.9.y so far, we have backported individual patches. That quickly -breaks down when there are minor differences between how backports were -handled, so grabbing 40+ patch long series can be difficult, not -impossible, but really frustrating to attempt. - -To help mitigate this mess, here's a single big patch to sync up the -objtool logic to the 4.14.47 version of the tool. From this point -forward (after some other minor header file patches are applied), the -tool should be in sync and much easier to maintain over time. - -This has survivied my limited testing, and as the codebase is identical -to 4.14.47, I'm pretty comfortable dropping this big change in here in -4.9.y. Hopefully all goes well... - -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/orc_types.h | 107 ++ - arch/x86/include/asm/unwind_hints.h | 103 ++ - tools/objtool/Build | 3 + - tools/objtool/Documentation/stack-validation.txt | 195 ++- - tools/objtool/Makefile | 35 +- - tools/objtool/arch.h | 65 +- - tools/objtool/arch/x86/Build | 10 +- - tools/objtool/arch/x86/decode.c | 408 +++++- - tools/objtool/arch/x86/include/asm/inat.h | 244 ++++ - tools/objtool/arch/x86/include/asm/inat_types.h | 29 + - tools/objtool/arch/x86/include/asm/insn.h | 211 ++++ - tools/objtool/arch/x86/include/asm/orc_types.h | 107 ++ - tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk | 392 ------ - tools/objtool/arch/x86/insn/inat.c | 97 -- - tools/objtool/arch/x86/insn/inat.h | 234 ---- - tools/objtool/arch/x86/insn/inat_types.h | 29 - - tools/objtool/arch/x86/insn/insn.c | 606 --------- - tools/objtool/arch/x86/insn/insn.h | 211 ---- - tools/objtool/arch/x86/insn/x86-opcode-map.txt | 1063 ---------------- - tools/objtool/arch/x86/lib/inat.c | 97 ++ - tools/objtool/arch/x86/lib/insn.c | 606 +++++++++ - tools/objtool/arch/x86/lib/x86-opcode-map.txt | 1072 ++++++++++++++++ - tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk | 393 ++++++ - tools/objtool/builtin-check.c | 9 +- - tools/objtool/builtin-orc.c | 68 + - tools/objtool/builtin.h | 6 + - tools/objtool/cfi.h | 55 + - tools/objtool/check.c | 1329 ++++++++++++++++---- - tools/objtool/check.h | 39 +- - tools/objtool/elf.c | 284 ++++- - tools/objtool/elf.h | 21 +- - tools/objtool/objtool.c | 12 +- - tools/objtool/orc.h | 30 + - tools/objtool/orc_dump.c | 213 ++++ - tools/objtool/orc_gen.c | 221 ++++ - tools/objtool/special.c | 6 +- - tools/objtool/sync-check.sh | 29 + - tools/objtool/warn.h | 10 + - 38 files changed, 5511 insertions(+), 3138 deletions(-) - create mode 100644 arch/x86/include/asm/orc_types.h - create mode 100644 arch/x86/include/asm/unwind_hints.h - create mode 100644 tools/objtool/arch/x86/include/asm/inat.h - create mode 100644 tools/objtool/arch/x86/include/asm/inat_types.h - create mode 100644 tools/objtool/arch/x86/include/asm/insn.h - create mode 100644 tools/objtool/arch/x86/include/asm/orc_types.h - delete mode 100644 tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk - delete mode 100644 tools/objtool/arch/x86/insn/inat.c - delete mode 100644 tools/objtool/arch/x86/insn/inat.h - delete mode 100644 tools/objtool/arch/x86/insn/inat_types.h - delete mode 100644 tools/objtool/arch/x86/insn/insn.c - delete mode 100644 tools/objtool/arch/x86/insn/insn.h - delete mode 100644 tools/objtool/arch/x86/insn/x86-opcode-map.txt - create mode 100644 tools/objtool/arch/x86/lib/inat.c - create mode 100644 tools/objtool/arch/x86/lib/insn.c - create mode 100644 tools/objtool/arch/x86/lib/x86-opcode-map.txt - create mode 100644 tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk - create mode 100644 tools/objtool/builtin-orc.c - create mode 100644 tools/objtool/cfi.h - create mode 100644 tools/objtool/orc.h - create mode 100644 tools/objtool/orc_dump.c - create mode 100644 tools/objtool/orc_gen.c - create mode 100755 tools/objtool/sync-check.sh - -diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h -new file mode 100644 -index 0000000..7dc777a ---- /dev/null -+++ b/arch/x86/include/asm/orc_types.h -@@ -0,0 +1,107 @@ -+/* -+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#ifndef _ORC_TYPES_H -+#define _ORC_TYPES_H -+ -+#include <linux/types.h> -+#include <linux/compiler.h> -+ -+/* -+ * The ORC_REG_* registers are base registers which are used to find other -+ * registers on the stack. -+ * -+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the -+ * address of the previous frame: the caller's SP before it called the current -+ * function. -+ * -+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in -+ * the current frame. -+ * -+ * The most commonly used base registers are SP and BP -- which the previous SP -+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is -+ * usually based on. -+ * -+ * The rest of the base registers are needed for special cases like entry code -+ * and GCC realigned stacks. -+ */ -+#define ORC_REG_UNDEFINED 0 -+#define ORC_REG_PREV_SP 1 -+#define ORC_REG_DX 2 -+#define ORC_REG_DI 3 -+#define ORC_REG_BP 4 -+#define ORC_REG_SP 5 -+#define ORC_REG_R10 6 -+#define ORC_REG_R13 7 -+#define ORC_REG_BP_INDIRECT 8 -+#define ORC_REG_SP_INDIRECT 9 -+#define ORC_REG_MAX 15 -+ -+/* -+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the -+ * caller's SP right before it made the call). Used for all callable -+ * functions, i.e. all C code and all callable asm functions. -+ * -+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points -+ * to a fully populated pt_regs from a syscall, interrupt, or exception. -+ * -+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset -+ * points to the iret return frame. -+ * -+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They -+ * aren't used in struct orc_entry due to size and complexity constraints. -+ * Objtool converts them to real types when it converts the hints to orc -+ * entries. -+ */ -+#define ORC_TYPE_CALL 0 -+#define ORC_TYPE_REGS 1 -+#define ORC_TYPE_REGS_IRET 2 -+#define UNWIND_HINT_TYPE_SAVE 3 -+#define UNWIND_HINT_TYPE_RESTORE 4 -+ -+#ifndef __ASSEMBLY__ -+/* -+ * This struct is more or less a vastly simplified version of the DWARF Call -+ * Frame Information standard. It contains only the necessary parts of DWARF -+ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the -+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on -+ * the stack for a given code address. Each instance of the struct corresponds -+ * to one or more code locations. -+ */ -+struct orc_entry { -+ s16 sp_offset; -+ s16 bp_offset; -+ unsigned sp_reg:4; -+ unsigned bp_reg:4; -+ unsigned type:2; -+}; -+ -+/* -+ * This struct is used by asm and inline asm code to manually annotate the -+ * location of registers on the stack for the ORC unwinder. -+ * -+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. -+ */ -+struct unwind_hint { -+ u32 ip; -+ s16 sp_offset; -+ u8 sp_reg; -+ u8 type; -+}; -+#endif /* __ASSEMBLY__ */ -+ -+#endif /* _ORC_TYPES_H */ -diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h -new file mode 100644 -index 0000000..5e02b11 ---- /dev/null -+++ b/arch/x86/include/asm/unwind_hints.h -@@ -0,0 +1,103 @@ -+#ifndef _ASM_X86_UNWIND_HINTS_H -+#define _ASM_X86_UNWIND_HINTS_H -+ -+#include "orc_types.h" -+ -+#ifdef __ASSEMBLY__ -+ -+/* -+ * In asm, there are two kinds of code: normal C-type callable functions and -+ * the rest. The normal callable functions can be called by other code, and -+ * don't do anything unusual with the stack. Such normal callable functions -+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this -+ * category. In this case, no special debugging annotations are needed because -+ * objtool can automatically generate the ORC data for the ORC unwinder to read -+ * at runtime. -+ * -+ * Anything which doesn't fall into the above category, such as syscall and -+ * interrupt handlers, tends to not be called directly by other functions, and -+ * often does unusual non-C-function-type things with the stack pointer. Such -+ * code needs to be annotated such that objtool can understand it. The -+ * following CFI hint macros are for this type of code. -+ * -+ * These macros provide hints to objtool about the state of the stack at each -+ * instruction. Objtool starts from the hints and follows the code flow, -+ * making automatic CFI adjustments when it sees pushes and pops, filling out -+ * the debuginfo as necessary. It will also warn if it sees any -+ * inconsistencies. -+ */ -+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL -+#ifdef CONFIG_STACK_VALIDATION -+.Lunwind_hint_ip_\@: -+ .pushsection .discard.unwind_hints -+ /* struct unwind_hint */ -+ .long .Lunwind_hint_ip_\@ - . -+ .short \sp_offset -+ .byte \sp_reg -+ .byte \type -+ .popsection -+#endif -+.endm -+ -+.macro UNWIND_HINT_EMPTY -+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED -+.endm -+ -+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 -+ .if \base == %rsp && \indirect -+ .set sp_reg, ORC_REG_SP_INDIRECT -+ .elseif \base == %rsp -+ .set sp_reg, ORC_REG_SP -+ .elseif \base == %rbp -+ .set sp_reg, ORC_REG_BP -+ .elseif \base == %rdi -+ .set sp_reg, ORC_REG_DI -+ .elseif \base == %rdx -+ .set sp_reg, ORC_REG_DX -+ .elseif \base == %r10 -+ .set sp_reg, ORC_REG_R10 -+ .else -+ .error "UNWIND_HINT_REGS: bad base register" -+ .endif -+ -+ .set sp_offset, \offset -+ -+ .if \iret -+ .set type, ORC_TYPE_REGS_IRET -+ .elseif \extra == 0 -+ .set type, ORC_TYPE_REGS_IRET -+ .set sp_offset, \offset + (16*8) -+ .else -+ .set type, ORC_TYPE_REGS -+ .endif -+ -+ UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type -+.endm -+ -+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 -+ UNWIND_HINT_REGS base=\base offset=\offset iret=1 -+.endm -+ -+.macro UNWIND_HINT_FUNC sp_offset=8 -+ UNWIND_HINT sp_offset=\sp_offset -+.endm -+ -+#else /* !__ASSEMBLY__ */ -+ -+#define UNWIND_HINT(sp_reg, sp_offset, type) \ -+ "987: \n\t" \ -+ ".pushsection .discard.unwind_hints\n\t" \ -+ /* struct unwind_hint */ \ -+ ".long 987b - .\n\t" \ -+ ".short " __stringify(sp_offset) "\n\t" \ -+ ".byte " __stringify(sp_reg) "\n\t" \ -+ ".byte " __stringify(type) "\n\t" \ -+ ".popsection\n\t" -+ -+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE) -+ -+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE) -+ -+#endif /* __ASSEMBLY__ */ -+ -+#endif /* _ASM_X86_UNWIND_HINTS_H */ -diff --git a/tools/objtool/Build b/tools/objtool/Build -index 6f2e198..749becd 100644 ---- a/tools/objtool/Build -+++ b/tools/objtool/Build -@@ -1,6 +1,9 @@ - objtool-y += arch/$(SRCARCH)/ - objtool-y += builtin-check.o -+objtool-y += builtin-orc.o - objtool-y += check.o -+objtool-y += orc_gen.o -+objtool-y += orc_dump.o - objtool-y += elf.o - objtool-y += special.o - objtool-y += objtool.o -diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt -index 55a60d3..3995735 100644 ---- a/tools/objtool/Documentation/stack-validation.txt -+++ b/tools/objtool/Documentation/stack-validation.txt -@@ -11,9 +11,6 @@ analyzes every .o file and ensures the validity of its stack metadata. - It enforces a set of rules on asm code and C inline assembly code so - that stack traces can be reliable. - --Currently it only checks frame pointer usage, but there are plans to add --CFI validation for C files and CFI generation for asm files. -- - For each function, it recursively follows all possible code paths and - validates the correct frame pointer state at each instruction. - -@@ -23,6 +20,10 @@ alternative execution paths to a given instruction (or set of - instructions). Similarly, it knows how to follow switch statements, for - which gcc sometimes uses jump tables. - -+(Objtool also has an 'orc generate' subcommand which generates debuginfo -+for the ORC unwinder. See Documentation/x86/orc-unwinder.txt in the -+kernel tree for more details.) -+ - - Why do we need stack metadata validation? - ----------------------------------------- -@@ -93,62 +94,24 @@ a) More reliable stack traces for frame pointer enabled kernels - or at the very end of the function after the stack frame has been - destroyed. This is an inherent limitation of frame pointers. - --b) 100% reliable stack traces for DWARF enabled kernels -- -- (NOTE: This is not yet implemented) -- -- As an alternative to frame pointers, DWARF Call Frame Information -- (CFI) metadata can be used to walk the stack. Unlike frame pointers, -- CFI metadata is out of band. So it doesn't affect runtime -- performance and it can be reliable even when interrupts or exceptions -- are involved. -- -- For C code, gcc automatically generates DWARF CFI metadata. But for -- asm code, generating CFI is a tedious manual approach which requires -- manually placed .cfi assembler macros to be scattered throughout the -- code. It's clumsy and very easy to get wrong, and it makes the real -- code harder to read. -- -- Stacktool will improve this situation in several ways. For code -- which already has CFI annotations, it will validate them. For code -- which doesn't have CFI annotations, it will generate them. So an -- architecture can opt to strip out all the manual .cfi annotations -- from their asm code and have objtool generate them instead. -+b) ORC (Oops Rewind Capability) unwind table generation - -- We might also add a runtime stack validation debug option where we -- periodically walk the stack from schedule() and/or an NMI to ensure -- that the stack metadata is sane and that we reach the bottom of the -- stack. -+ An alternative to frame pointers and DWARF, ORC unwind data can be -+ used to walk the stack. Unlike frame pointers, ORC data is out of -+ band. So it doesn't affect runtime performance and it can be -+ reliable even when interrupts or exceptions are involved. - -- So the benefit of objtool here will be that external tooling should -- always show perfect stack traces. And the same will be true for -- kernel warning/oops traces if the architecture has a runtime DWARF -- unwinder. -+ For more details, see Documentation/x86/orc-unwinder.txt. - - c) Higher live patching compatibility rate - -- (NOTE: This is not yet implemented) -- -- Currently with CONFIG_LIVEPATCH there's a basic live patching -- framework which is safe for roughly 85-90% of "security" fixes. But -- patches can't have complex features like function dependency or -- prototype changes, or data structure changes. -- -- There's a strong need to support patches which have the more complex -- features so that the patch compatibility rate for security fixes can -- eventually approach something resembling 100%. To achieve that, a -- "consistency model" is needed, which allows tasks to be safely -- transitioned from an unpatched state to a patched state. -- -- One of the key requirements of the currently proposed livepatch -- consistency model [*] is that it needs to walk the stack of each -- sleeping task to determine if it can be transitioned to the patched -- state. If objtool can ensure that stack traces are reliable, this -- consistency model can be used and the live patching compatibility -- rate can be improved significantly. -- -- [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com -+ Livepatch has an optional "consistency model", which is needed for -+ more complex patches. In order for the consistency model to work, -+ stack traces need to be reliable (or an unreliable condition needs to -+ be detectable). Objtool makes that possible. - -+ For more details, see the livepatch documentation in the Linux kernel -+ source tree at Documentation/livepatch/livepatch.txt. - - Rules - ----- -@@ -201,80 +164,84 @@ To achieve the validation, objtool enforces the following rules: - return normally. - - --Errors in .S files -------------------- -+Objtool warnings -+---------------- -+ -+For asm files, if you're getting an error which doesn't make sense, -+first make sure that the affected code follows the above rules. - --If you're getting an error in a compiled .S file which you don't --understand, first make sure that the affected code follows the above --rules. -+For C files, the common culprits are inline asm statements and calls to -+"noreturn" functions. See below for more details. -+ -+Another possible cause for errors in C code is if the Makefile removes -+-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. - - Here are some examples of common warnings reported by objtool, what - they mean, and suggestions for how to fix them. - - --1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup -+1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup - - The func() function made a function call without first saving and/or -- updating the frame pointer. -- -- If func() is indeed a callable function, add proper frame pointer -- logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, remove -- its ELF function annotation by changing ENDPROC to END. -- -- If you're getting this error in a .c file, see the "Errors in .c -- files" section. -+ updating the frame pointer, and CONFIG_FRAME_POINTER is enabled. - -+ If the error is for an asm file, and func() is indeed a callable -+ function, add proper frame pointer logic using the FRAME_BEGIN and -+ FRAME_END macros. Otherwise, if it's not a callable function, remove -+ its ELF function annotation by changing ENDPROC to END, and instead -+ use the manual unwind hint macros in asm/unwind_hints.h. - --2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function -+ If it's a GCC-compiled .c file, the error may be because the function -+ uses an inline asm() statement which has a "call" instruction. An -+ asm() statement with a call instruction must declare the use of the -+ stack pointer in its output operand. On x86_64, this means adding -+ the ASM_CALL_CONSTRAINT as an output constraint: - -- A return instruction was detected, but objtool couldn't find a way -- for a callable function to reach the instruction. -+ asm volatile("call func" : ASM_CALL_CONSTRAINT); - -- If the return instruction is inside (or reachable from) a callable -- function, the function needs to be annotated with the ENTRY/ENDPROC -- macros. -+ Otherwise the stack frame may not get created before the call. - -- If you _really_ need a return instruction outside of a function, and -- are 100% sure that it won't affect stack traces, you can tell -- objtool to ignore it. See the "Adding exceptions" section below. - -+2. file.o: warning: objtool: .text+0x53: unreachable instruction - --3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction -+ Objtool couldn't find a code path to reach the instruction. - -- The instruction lives inside of a callable function, but there's no -- possible control flow path from the beginning of the function to the -- instruction. -+ If the error is for an asm file, and the instruction is inside (or -+ reachable from) a callable function, the function should be annotated -+ with the ENTRY/ENDPROC macros (ENDPROC is the important one). -+ Otherwise, the code should probably be annotated with the unwind hint -+ macros in asm/unwind_hints.h so objtool and the unwinder can know the -+ stack state associated with the code. - -- If the instruction is actually needed, and it's actually in a -- callable function, ensure that its function is properly annotated -- with ENTRY/ENDPROC. -+ If you're 100% sure the code won't affect stack traces, or if you're -+ a just a bad person, you can tell objtool to ignore it. See the -+ "Adding exceptions" section below. - - If it's not actually in a callable function (e.g. kernel entry code), - change ENDPROC to END. - - --4. asm_file.o: warning: objtool: func(): can't find starting instruction -+4. file.o: warning: objtool: func(): can't find starting instruction - or -- asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction -+ file.o: warning: objtool: func()+0x11dd: can't decode instruction - -- Did you put data in a text section? If so, that can confuse -+ Does the file have data in a text section? If so, that can confuse - objtool's instruction decoder. Move the data to a more appropriate - section like .data or .rodata. - - --5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction -- -- This is a kernel entry/exit instruction like sysenter or sysret. -- Such instructions aren't allowed in a callable function, and are most -- likely part of the kernel entry code. -+5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function - -- If the instruction isn't actually in a callable function, change -- ENDPROC to END. -+ This is a kernel entry/exit instruction like sysenter or iret. Such -+ instructions aren't allowed in a callable function, and are most -+ likely part of the kernel entry code. They should usually not have -+ the callable function annotation (ENDPROC) and should always be -+ annotated with the unwind hint macros in asm/unwind_hints.h. - - --6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer -+6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame - -- This is a dynamic jump or a jump to an undefined symbol. Stacktool -+ This is a dynamic jump or a jump to an undefined symbol. Objtool - assumed it's a sibling call and detected that the frame pointer - wasn't first restored to its original state. - -@@ -282,24 +249,28 @@ they mean, and suggestions for how to fix them. - destination code to the local file. - - If the instruction is not actually in a callable function (e.g. -- kernel entry code), change ENDPROC to END. -+ kernel entry code), change ENDPROC to END and annotate manually with -+ the unwind hint macros in asm/unwind_hints.h. - - --7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch -+7. file: warning: objtool: func()+0x5c: stack state mismatch - - The instruction's frame pointer state is inconsistent, depending on - which execution path was taken to reach the instruction. - -- Make sure the function pushes and sets up the frame pointer (for -- x86_64, this means rbp) at the beginning of the function and pops it -- at the end of the function. Also make sure that no other code in the -- function touches the frame pointer. -+ Make sure that, when CONFIG_FRAME_POINTER is enabled, the function -+ pushes and sets up the frame pointer (for x86_64, this means rbp) at -+ the beginning of the function and pops it at the end of the function. -+ Also make sure that no other code in the function touches the frame -+ pointer. - -+ Another possibility is that the code has some asm or inline asm which -+ does some unusual things to the stack or the frame pointer. In such -+ cases it's probably appropriate to use the unwind hint macros in -+ asm/unwind_hints.h. - --Errors in .c files -------------------- - --1. c_file.o: warning: objtool: funcA() falls through to next function funcB() -+8. file.o: warning: objtool: funcA() falls through to next function funcB() - - This means that funcA() doesn't end with a return instruction or an - unconditional jump, and that objtool has determined that the function -@@ -318,22 +289,6 @@ Errors in .c files - might be corrupt due to a gcc bug. For more details, see: - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646 - --2. If you're getting any other objtool error in a compiled .c file, it -- may be because the file uses an asm() statement which has a "call" -- instruction. An asm() statement with a call instruction must declare -- the use of the stack pointer in its output operand. For example, on -- x86_64: -- -- register void *__sp asm("rsp"); -- asm volatile("call func" : "+r" (__sp)); -- -- Otherwise the stack frame may not get created before the call. -- --3. Another possible cause for errors in C code is if the Makefile removes -- -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. -- --Also see the above section for .S file errors for more information what --the individual error messages mean. - - If the error doesn't seem to make sense, it could be a bug in objtool. - Feel free to ask the objtool maintainer for help. -diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile -index 041b493..e6acc28 100644 ---- a/tools/objtool/Makefile -+++ b/tools/objtool/Makefile -@@ -1,3 +1,4 @@ -+# SPDX-License-Identifier: GPL-2.0 - include ../scripts/Makefile.include - include ../scripts/Makefile.arch - -@@ -6,17 +7,19 @@ ARCH := x86 - endif - - # always use the host compiler --CC = gcc --LD = ld --AR = ar -+HOSTCC ?= gcc -+HOSTLD ?= ld -+CC = $(HOSTCC) -+LD = $(HOSTLD) -+AR = ar - - ifeq ($(srctree),) --srctree := $(patsubst %/,%,$(dir $(shell pwd))) -+srctree := $(patsubst %/,%,$(dir $(CURDIR))) - srctree := $(patsubst %/,%,$(dir $(srctree))) - endif - - SUBCMD_SRCDIR = $(srctree)/tools/lib/subcmd/ --LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(PWD)/) -+LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(CURDIR)/) - LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a - - OBJTOOL := $(OUTPUT)objtool -@@ -24,8 +27,11 @@ OBJTOOL_IN := $(OBJTOOL)-in.o - - all: $(OBJTOOL) - --INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi --CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) -+INCLUDES := -I$(srctree)/tools/include \ -+ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -+ -I$(srctree)/tools/objtool/arch/$(ARCH)/include -+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -+CFLAGS += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) - LDFLAGS += -lelf $(LIBSUBCMD) - - # Allow old libelf to be used: -@@ -39,19 +45,8 @@ include $(srctree)/tools/build/Makefile.include - $(OBJTOOL_IN): fixdep FORCE - @$(MAKE) $(build)=objtool - --# Busybox's diff doesn't have -I, avoid warning in that case --# - $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) -- @(diff -I 2>&1 | grep -q 'option requires an argument' && \ -- test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \ -- diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ -- diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ -- diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ -- diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ -- diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ -- diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ -- diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ -- || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true -+ @$(CONFIG_SHELL) ./sync-check.sh - $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ - - -@@ -61,7 +56,7 @@ $(LIBSUBCMD): fixdep FORCE - clean: - $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) - $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete -- $(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep -+ $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep - - FORCE: - -diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h -index a59e061..b0d7dc3 100644 ---- a/tools/objtool/arch.h -+++ b/tools/objtool/arch.h -@@ -19,25 +19,64 @@ - #define _ARCH_H - - #include <stdbool.h> -+#include <linux/list.h> - #include "elf.h" -+#include "cfi.h" - --#define INSN_FP_SAVE 1 --#define INSN_FP_SETUP 2 --#define INSN_FP_RESTORE 3 --#define INSN_JUMP_CONDITIONAL 4 --#define INSN_JUMP_UNCONDITIONAL 5 --#define INSN_JUMP_DYNAMIC 6 --#define INSN_CALL 7 --#define INSN_CALL_DYNAMIC 8 --#define INSN_RETURN 9 --#define INSN_CONTEXT_SWITCH 10 --#define INSN_NOP 11 --#define INSN_OTHER 12 -+#define INSN_JUMP_CONDITIONAL 1 -+#define INSN_JUMP_UNCONDITIONAL 2 -+#define INSN_JUMP_DYNAMIC 3 -+#define INSN_CALL 4 -+#define INSN_CALL_DYNAMIC 5 -+#define INSN_RETURN 6 -+#define INSN_CONTEXT_SWITCH 7 -+#define INSN_STACK 8 -+#define INSN_BUG 9 -+#define INSN_NOP 10 -+#define INSN_OTHER 11 - #define INSN_LAST INSN_OTHER - -+enum op_dest_type { -+ OP_DEST_REG, -+ OP_DEST_REG_INDIRECT, -+ OP_DEST_MEM, -+ OP_DEST_PUSH, -+ OP_DEST_LEAVE, -+}; -+ -+struct op_dest { -+ enum op_dest_type type; -+ unsigned char reg; -+ int offset; -+}; -+ -+enum op_src_type { -+ OP_SRC_REG, -+ OP_SRC_REG_INDIRECT, -+ OP_SRC_CONST, -+ OP_SRC_POP, -+ OP_SRC_ADD, -+ OP_SRC_AND, -+}; -+ -+struct op_src { -+ enum op_src_type type; -+ unsigned char reg; -+ int offset; -+}; -+ -+struct stack_op { -+ struct op_dest dest; -+ struct op_src src; -+}; -+ -+void arch_initial_func_cfi_state(struct cfi_state *state); -+ - int arch_decode_instruction(struct elf *elf, struct section *sec, - unsigned long offset, unsigned int maxlen, - unsigned int *len, unsigned char *type, -- unsigned long *displacement); -+ unsigned long *immediate, struct stack_op *op); -+ -+bool arch_callee_saved_reg(unsigned char reg); - - #endif /* _ARCH_H */ -diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build -index debbdb0..b998412 100644 ---- a/tools/objtool/arch/x86/Build -+++ b/tools/objtool/arch/x86/Build -@@ -1,12 +1,12 @@ - objtool-y += decode.o - --inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk --inat_tables_maps = arch/x86/insn/x86-opcode-map.txt -+inat_tables_script = arch/x86/tools/gen-insn-attr-x86.awk -+inat_tables_maps = arch/x86/lib/x86-opcode-map.txt - --$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) -+$(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) - $(call rule_mkdir) - $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ - --$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c -+$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c - --CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn -+CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib -diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c -index 9fb487f..006b6d7 100644 ---- a/tools/objtool/arch/x86/decode.c -+++ b/tools/objtool/arch/x86/decode.c -@@ -19,14 +19,25 @@ - #include <stdlib.h> - - #define unlikely(cond) (cond) --#include "insn/insn.h" --#include "insn/inat.c" --#include "insn/insn.c" -+#include <asm/insn.h> -+#include "lib/inat.c" -+#include "lib/insn.c" - - #include "../../elf.h" - #include "../../arch.h" - #include "../../warn.h" - -+static unsigned char op_to_cfi_reg[][2] = { -+ {CFI_AX, CFI_R8}, -+ {CFI_CX, CFI_R9}, -+ {CFI_DX, CFI_R10}, -+ {CFI_BX, CFI_R11}, -+ {CFI_SP, CFI_R12}, -+ {CFI_BP, CFI_R13}, -+ {CFI_SI, CFI_R14}, -+ {CFI_DI, CFI_R15}, -+}; -+ - static int is_x86_64(struct elf *elf) - { - switch (elf->ehdr.e_machine) { -@@ -40,24 +51,50 @@ static int is_x86_64(struct elf *elf) - } - } - -+bool arch_callee_saved_reg(unsigned char reg) -+{ -+ switch (reg) { -+ case CFI_BP: -+ case CFI_BX: -+ case CFI_R12: -+ case CFI_R13: -+ case CFI_R14: -+ case CFI_R15: -+ return true; -+ -+ case CFI_AX: -+ case CFI_CX: -+ case CFI_DX: -+ case CFI_SI: -+ case CFI_DI: -+ case CFI_SP: -+ case CFI_R8: -+ case CFI_R9: -+ case CFI_R10: -+ case CFI_R11: -+ case CFI_RA: -+ default: -+ return false; -+ } -+} -+ - int arch_decode_instruction(struct elf *elf, struct section *sec, - unsigned long offset, unsigned int maxlen, - unsigned int *len, unsigned char *type, -- unsigned long *immediate) -+ unsigned long *immediate, struct stack_op *op) - { - struct insn insn; -- int x86_64; -- unsigned char op1, op2, ext; -+ int x86_64, sign; -+ unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, -+ rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, -+ modrm_reg = 0, sib = 0; - - x86_64 = is_x86_64(elf); - if (x86_64 == -1) - return -1; - -- insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64); -+ insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64); - insn_get_length(&insn); -- insn_get_opcode(&insn); -- insn_get_modrm(&insn); -- insn_get_immediate(&insn); - - if (!insn_complete(&insn)) { - WARN_FUNC("can't decode instruction", sec, offset); -@@ -73,67 +110,317 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, - op1 = insn.opcode.bytes[0]; - op2 = insn.opcode.bytes[1]; - -+ if (insn.rex_prefix.nbytes) { -+ rex = insn.rex_prefix.bytes[0]; -+ rex_w = X86_REX_W(rex) >> 3; -+ rex_r = X86_REX_R(rex) >> 2; -+ rex_x = X86_REX_X(rex) >> 1; -+ rex_b = X86_REX_B(rex); -+ } -+ -+ if (insn.modrm.nbytes) { -+ modrm = insn.modrm.bytes[0]; -+ modrm_mod = X86_MODRM_MOD(modrm); -+ modrm_reg = X86_MODRM_REG(modrm); -+ modrm_rm = X86_MODRM_RM(modrm); -+ } -+ -+ if (insn.sib.nbytes) -+ sib = insn.sib.bytes[0]; -+ - switch (op1) { -- case 0x55: -- if (!insn.rex_prefix.nbytes) -- /* push rbp */ -- *type = INSN_FP_SAVE; -+ -+ case 0x1: -+ case 0x29: -+ if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { -+ -+ /* add/sub reg, %rsp */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_ADD; -+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = CFI_SP; -+ } -+ break; -+ -+ case 0x50 ... 0x57: -+ -+ /* push reg */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_REG; -+ op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; -+ op->dest.type = OP_DEST_PUSH; -+ -+ break; -+ -+ case 0x58 ... 0x5f: -+ -+ /* pop reg */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_POP; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; -+ - break; - -- case 0x5d: -- if (!insn.rex_prefix.nbytes) -- /* pop rbp */ -- *type = INSN_FP_RESTORE; -+ case 0x68: -+ case 0x6a: -+ /* push immediate */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_CONST; -+ op->dest.type = OP_DEST_PUSH; - break; - - case 0x70 ... 0x7f: - *type = INSN_JUMP_CONDITIONAL; - break; - -+ case 0x81: -+ case 0x83: -+ if (rex != 0x48) -+ break; -+ -+ if (modrm == 0xe4) { -+ /* and imm, %rsp */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_AND; -+ op->src.reg = CFI_SP; -+ op->src.offset = insn.immediate.value; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = CFI_SP; -+ break; -+ } -+ -+ if (modrm == 0xc4) -+ sign = 1; -+ else if (modrm == 0xec) -+ sign = -1; -+ else -+ break; -+ -+ /* add/sub imm, %rsp */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_ADD; -+ op->src.reg = CFI_SP; -+ op->src.offset = insn.immediate.value * sign; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = CFI_SP; -+ break; -+ - case 0x89: -- if (insn.rex_prefix.nbytes == 1 && -- insn.rex_prefix.bytes[0] == 0x48 && -- insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5) -- /* mov rsp, rbp */ -- *type = INSN_FP_SETUP; -+ if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) { -+ -+ /* mov %rsp, reg */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_REG; -+ op->src.reg = CFI_SP; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; -+ break; -+ } -+ -+ if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { -+ -+ /* mov reg, %rsp */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_REG; -+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = CFI_SP; -+ break; -+ } -+ -+ /* fallthrough */ -+ case 0x88: -+ if (!rex_b && -+ (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) { -+ -+ /* mov reg, disp(%rbp) */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_REG; -+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; -+ op->dest.type = OP_DEST_REG_INDIRECT; -+ op->dest.reg = CFI_BP; -+ op->dest.offset = insn.displacement.value; -+ -+ } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { -+ -+ /* mov reg, disp(%rsp) */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_REG; -+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; -+ op->dest.type = OP_DEST_REG_INDIRECT; -+ op->dest.reg = CFI_SP; -+ op->dest.offset = insn.displacement.value; -+ } -+ -+ break; -+ -+ case 0x8b: -+ if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) { -+ -+ /* mov disp(%rbp), reg */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_REG_INDIRECT; -+ op->src.reg = CFI_BP; -+ op->src.offset = insn.displacement.value; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; -+ -+ } else if (rex_w && !rex_b && sib == 0x24 && -+ modrm_mod != 3 && modrm_rm == 4) { -+ -+ /* mov disp(%rsp), reg */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_REG_INDIRECT; -+ op->src.reg = CFI_SP; -+ op->src.offset = insn.displacement.value; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; -+ } -+ - break; - - case 0x8d: -- if (insn.rex_prefix.nbytes && -- insn.rex_prefix.bytes[0] == 0x48 && -- insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c && -- insn.sib.nbytes && insn.sib.bytes[0] == 0x24) -- /* lea %(rsp), %rbp */ -- *type = INSN_FP_SETUP; -+ if (sib == 0x24 && rex_w && !rex_b && !rex_x) { -+ -+ *type = INSN_STACK; -+ if (!insn.displacement.value) { -+ /* lea (%rsp), reg */ -+ op->src.type = OP_SRC_REG; -+ } else { -+ /* lea disp(%rsp), reg */ -+ op->src.type = OP_SRC_ADD; -+ op->src.offset = insn.displacement.value; -+ } -+ op->src.reg = CFI_SP; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; -+ -+ } else if (rex == 0x48 && modrm == 0x65) { -+ -+ /* lea disp(%rbp), %rsp */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_ADD; -+ op->src.reg = CFI_BP; -+ op->src.offset = insn.displacement.value; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = CFI_SP; -+ -+ } else if (rex == 0x49 && modrm == 0x62 && -+ insn.displacement.value == -8) { -+ -+ /* -+ * lea -0x8(%r10), %rsp -+ * -+ * Restoring rsp back to its original value after a -+ * stack realignment. -+ */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_ADD; -+ op->src.reg = CFI_R10; -+ op->src.offset = -8; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = CFI_SP; -+ -+ } else if (rex == 0x49 && modrm == 0x65 && -+ insn.displacement.value == -16) { -+ -+ /* -+ * lea -0x10(%r13), %rsp -+ * -+ * Restoring rsp back to its original value after a -+ * stack realignment. -+ */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_ADD; -+ op->src.reg = CFI_R13; -+ op->src.offset = -16; -+ op->dest.type = OP_DEST_REG; -+ op->dest.reg = CFI_SP; -+ } -+ -+ break; -+ -+ case 0x8f: -+ /* pop to mem */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_POP; -+ op->dest.type = OP_DEST_MEM; - break; - - case 0x90: - *type = INSN_NOP; - break; - -+ case 0x9c: -+ /* pushf */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_CONST; -+ op->dest.type = OP_DEST_PUSH; -+ break; -+ -+ case 0x9d: -+ /* popf */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_POP; -+ op->dest.type = OP_DEST_MEM; -+ break; -+ - case 0x0f: -- if (op2 >= 0x80 && op2 <= 0x8f) -+ -+ if (op2 >= 0x80 && op2 <= 0x8f) { -+ - *type = INSN_JUMP_CONDITIONAL; -- else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || -- op2 == 0x35) -+ -+ } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || -+ op2 == 0x35) { -+ - /* sysenter, sysret */ - *type = INSN_CONTEXT_SWITCH; -- else if (op2 == 0x0d || op2 == 0x1f) -+ -+ } else if (op2 == 0x0b || op2 == 0xb9) { -+ -+ /* ud2 */ -+ *type = INSN_BUG; -+ -+ } else if (op2 == 0x0d || op2 == 0x1f) { -+ - /* nopl/nopw */ - *type = INSN_NOP; -- else if (op2 == 0x01 && insn.modrm.nbytes && -- (insn.modrm.bytes[0] == 0xc2 || -- insn.modrm.bytes[0] == 0xd8)) -- /* vmlaunch, vmrun */ -- *type = INSN_CONTEXT_SWITCH; -+ -+ } else if (op2 == 0xa0 || op2 == 0xa8) { -+ -+ /* push fs/gs */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_CONST; -+ op->dest.type = OP_DEST_PUSH; -+ -+ } else if (op2 == 0xa1 || op2 == 0xa9) { -+ -+ /* pop fs/gs */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_POP; -+ op->dest.type = OP_DEST_MEM; -+ } - - break; - -- case 0xc9: /* leave */ -- *type = INSN_FP_RESTORE; -+ case 0xc9: -+ /* -+ * leave -+ * -+ * equivalent to: -+ * mov bp, sp -+ * pop bp -+ */ -+ *type = INSN_STACK; -+ op->dest.type = OP_DEST_LEAVE; -+ - break; - -- case 0xe3: /* jecxz/jrcxz */ -+ case 0xe3: -+ /* jecxz/jrcxz */ - *type = INSN_JUMP_CONDITIONAL; - break; - -@@ -158,14 +445,27 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, - break; - - case 0xff: -- ext = X86_MODRM_REG(insn.modrm.bytes[0]); -- if (ext == 2 || ext == 3) -+ if (modrm_reg == 2 || modrm_reg == 3) -+ - *type = INSN_CALL_DYNAMIC; -- else if (ext == 4) -+ -+ else if (modrm_reg == 4) -+ - *type = INSN_JUMP_DYNAMIC; -- else if (ext == 5) /*jmpf */ -+ -+ else if (modrm_reg == 5) -+ -+ /* jmpf */ - *type = INSN_CONTEXT_SWITCH; - -+ else if (modrm_reg == 6) { -+ -+ /* push from mem */ -+ *type = INSN_STACK; -+ op->src.type = OP_SRC_CONST; -+ op->dest.type = OP_DEST_PUSH; -+ } -+ - break; - - default: -@@ -176,3 +476,21 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, - - return 0; - } -+ -+void arch_initial_func_cfi_state(struct cfi_state *state) -+{ -+ int i; -+ -+ for (i = 0; i < CFI_NUM_REGS; i++) { -+ state->regs[i].base = CFI_UNDEFINED; -+ state->regs[i].offset = 0; -+ } -+ -+ /* initial CFA (call frame address) */ -+ state->cfa.base = CFI_SP; -+ state->cfa.offset = 8; -+ -+ /* initial RA (return address) */ -+ state->regs[16].base = CFI_CFA; -+ state->regs[16].offset = -8; -+} -diff --git a/tools/objtool/arch/x86/include/asm/inat.h b/tools/objtool/arch/x86/include/asm/inat.h -new file mode 100644 -index 0000000..1c78580 ---- /dev/null -+++ b/tools/objtool/arch/x86/include/asm/inat.h -@@ -0,0 +1,244 @@ -+#ifndef _ASM_X86_INAT_H -+#define _ASM_X86_INAT_H -+/* -+ * x86 instruction attributes -+ * -+ * Written by Masami Hiramatsu <mhiramat@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ * -+ */ -+#include <asm/inat_types.h> -+ -+/* -+ * Internal bits. Don't use bitmasks directly, because these bits are -+ * unstable. You should use checking functions. -+ */ -+ -+#define INAT_OPCODE_TABLE_SIZE 256 -+#define INAT_GROUP_TABLE_SIZE 8 -+ -+/* Legacy last prefixes */ -+#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ -+#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ -+#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ -+/* Other Legacy prefixes */ -+#define INAT_PFX_LOCK 4 /* 0xF0 */ -+#define INAT_PFX_CS 5 /* 0x2E */ -+#define INAT_PFX_DS 6 /* 0x3E */ -+#define INAT_PFX_ES 7 /* 0x26 */ -+#define INAT_PFX_FS 8 /* 0x64 */ -+#define INAT_PFX_GS 9 /* 0x65 */ -+#define INAT_PFX_SS 10 /* 0x36 */ -+#define INAT_PFX_ADDRSZ 11 /* 0x67 */ -+/* x86-64 REX prefix */ -+#define INAT_PFX_REX 12 /* 0x4X */ -+/* AVX VEX prefixes */ -+#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ -+#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ -+#define INAT_PFX_EVEX 15 /* EVEX prefix */ -+ -+#define INAT_LSTPFX_MAX 3 -+#define INAT_LGCPFX_MAX 11 -+ -+/* Immediate size */ -+#define INAT_IMM_BYTE 1 -+#define INAT_IMM_WORD 2 -+#define INAT_IMM_DWORD 3 -+#define INAT_IMM_QWORD 4 -+#define INAT_IMM_PTR 5 -+#define INAT_IMM_VWORD32 6 -+#define INAT_IMM_VWORD 7 -+ -+/* Legacy prefix */ -+#define INAT_PFX_OFFS 0 -+#define INAT_PFX_BITS 4 -+#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) -+#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) -+/* Escape opcodes */ -+#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) -+#define INAT_ESC_BITS 2 -+#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) -+#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) -+/* Group opcodes (1-16) */ -+#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) -+#define INAT_GRP_BITS 5 -+#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) -+#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) -+/* Immediates */ -+#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) -+#define INAT_IMM_BITS 3 -+#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) -+/* Flags */ -+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) -+#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) -+#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) -+#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) -+#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) -+#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) -+#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) -+#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) -+#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) -+/* Attribute making macros for attribute tables */ -+#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) -+#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) -+#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) -+#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) -+ -+/* Identifiers for segment registers */ -+#define INAT_SEG_REG_IGNORE 0 -+#define INAT_SEG_REG_DEFAULT 1 -+#define INAT_SEG_REG_CS 2 -+#define INAT_SEG_REG_SS 3 -+#define INAT_SEG_REG_DS 4 -+#define INAT_SEG_REG_ES 5 -+#define INAT_SEG_REG_FS 6 -+#define INAT_SEG_REG_GS 7 -+ -+/* Attribute search APIs */ -+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); -+extern int inat_get_last_prefix_id(insn_byte_t last_pfx); -+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, -+ int lpfx_id, -+ insn_attr_t esc_attr); -+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, -+ int lpfx_id, -+ insn_attr_t esc_attr); -+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, -+ insn_byte_t vex_m, -+ insn_byte_t vex_pp); -+ -+/* Attribute checking functions */ -+static inline int inat_is_legacy_prefix(insn_attr_t attr) -+{ -+ attr &= INAT_PFX_MASK; -+ return attr && attr <= INAT_LGCPFX_MAX; -+} -+ -+static inline int inat_is_address_size_prefix(insn_attr_t attr) -+{ -+ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; -+} -+ -+static inline int inat_is_operand_size_prefix(insn_attr_t attr) -+{ -+ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; -+} -+ -+static inline int inat_is_rex_prefix(insn_attr_t attr) -+{ -+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX; -+} -+ -+static inline int inat_last_prefix_id(insn_attr_t attr) -+{ -+ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) -+ return 0; -+ else -+ return attr & INAT_PFX_MASK; -+} -+ -+static inline int inat_is_vex_prefix(insn_attr_t attr) -+{ -+ attr &= INAT_PFX_MASK; -+ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || -+ attr == INAT_PFX_EVEX; -+} -+ -+static inline int inat_is_evex_prefix(insn_attr_t attr) -+{ -+ return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; -+} -+ -+static inline int inat_is_vex3_prefix(insn_attr_t attr) -+{ -+ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; -+} -+ -+static inline int inat_is_escape(insn_attr_t attr) -+{ -+ return attr & INAT_ESC_MASK; -+} -+ -+static inline int inat_escape_id(insn_attr_t attr) -+{ -+ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; -+} -+ -+static inline int inat_is_group(insn_attr_t attr) -+{ -+ return attr & INAT_GRP_MASK; -+} -+ -+static inline int inat_group_id(insn_attr_t attr) -+{ -+ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; -+} -+ -+static inline int inat_group_common_attribute(insn_attr_t attr) -+{ -+ return attr & ~INAT_GRP_MASK; -+} -+ -+static inline int inat_has_immediate(insn_attr_t attr) -+{ -+ return attr & INAT_IMM_MASK; -+} -+ -+static inline int inat_immediate_size(insn_attr_t attr) -+{ -+ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; -+} -+ -+static inline int inat_has_modrm(insn_attr_t attr) -+{ -+ return attr & INAT_MODRM; -+} -+ -+static inline int inat_is_force64(insn_attr_t attr) -+{ -+ return attr & INAT_FORCE64; -+} -+ -+static inline int inat_has_second_immediate(insn_attr_t attr) -+{ -+ return attr & INAT_SCNDIMM; -+} -+ -+static inline int inat_has_moffset(insn_attr_t attr) -+{ -+ return attr & INAT_MOFFSET; -+} -+ -+static inline int inat_has_variant(insn_attr_t attr) -+{ -+ return attr & INAT_VARIANT; -+} -+ -+static inline int inat_accept_vex(insn_attr_t attr) -+{ -+ return attr & INAT_VEXOK; -+} -+ -+static inline int inat_must_vex(insn_attr_t attr) -+{ -+ return attr & (INAT_VEXONLY | INAT_EVEXONLY); -+} -+ -+static inline int inat_must_evex(insn_attr_t attr) -+{ -+ return attr & INAT_EVEXONLY; -+} -+#endif -diff --git a/tools/objtool/arch/x86/include/asm/inat_types.h b/tools/objtool/arch/x86/include/asm/inat_types.h -new file mode 100644 -index 0000000..cb3c20c ---- /dev/null -+++ b/tools/objtool/arch/x86/include/asm/inat_types.h -@@ -0,0 +1,29 @@ -+#ifndef _ASM_X86_INAT_TYPES_H -+#define _ASM_X86_INAT_TYPES_H -+/* -+ * x86 instruction attributes -+ * -+ * Written by Masami Hiramatsu <mhiramat@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ * -+ */ -+ -+/* Instruction attributes */ -+typedef unsigned int insn_attr_t; -+typedef unsigned char insn_byte_t; -+typedef signed int insn_value_t; -+ -+#endif -diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h -new file mode 100644 -index 0000000..b3e32b0 ---- /dev/null -+++ b/tools/objtool/arch/x86/include/asm/insn.h -@@ -0,0 +1,211 @@ -+#ifndef _ASM_X86_INSN_H -+#define _ASM_X86_INSN_H -+/* -+ * x86 instruction analysis -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ * -+ * Copyright (C) IBM Corporation, 2009 -+ */ -+ -+/* insn_attr_t is defined in inat.h */ -+#include <asm/inat.h> -+ -+struct insn_field { -+ union { -+ insn_value_t value; -+ insn_byte_t bytes[4]; -+ }; -+ /* !0 if we've run insn_get_xxx() for this field */ -+ unsigned char got; -+ unsigned char nbytes; -+}; -+ -+struct insn { -+ struct insn_field prefixes; /* -+ * Prefixes -+ * prefixes.bytes[3]: last prefix -+ */ -+ struct insn_field rex_prefix; /* REX prefix */ -+ struct insn_field vex_prefix; /* VEX prefix */ -+ struct insn_field opcode; /* -+ * opcode.bytes[0]: opcode1 -+ * opcode.bytes[1]: opcode2 -+ * opcode.bytes[2]: opcode3 -+ */ -+ struct insn_field modrm; -+ struct insn_field sib; -+ struct insn_field displacement; -+ union { -+ struct insn_field immediate; -+ struct insn_field moffset1; /* for 64bit MOV */ -+ struct insn_field immediate1; /* for 64bit imm or off16/32 */ -+ }; -+ union { -+ struct insn_field moffset2; /* for 64bit MOV */ -+ struct insn_field immediate2; /* for 64bit imm or seg16 */ -+ }; -+ -+ insn_attr_t attr; -+ unsigned char opnd_bytes; -+ unsigned char addr_bytes; -+ unsigned char length; -+ unsigned char x86_64; -+ -+ const insn_byte_t *kaddr; /* kernel address of insn to analyze */ -+ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ -+ const insn_byte_t *next_byte; -+}; -+ -+#define MAX_INSN_SIZE 15 -+ -+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) -+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) -+#define X86_MODRM_RM(modrm) ((modrm) & 0x07) -+ -+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) -+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) -+#define X86_SIB_BASE(sib) ((sib) & 0x07) -+ -+#define X86_REX_W(rex) ((rex) & 8) -+#define X86_REX_R(rex) ((rex) & 4) -+#define X86_REX_X(rex) ((rex) & 2) -+#define X86_REX_B(rex) ((rex) & 1) -+ -+/* VEX bit flags */ -+#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ -+#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ -+#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ -+#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ -+#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ -+/* VEX bit fields */ -+#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ -+#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ -+#define X86_VEX2_M 1 /* VEX2.M always 1 */ -+#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ -+#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ -+#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ -+ -+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); -+extern void insn_get_prefixes(struct insn *insn); -+extern void insn_get_opcode(struct insn *insn); -+extern void insn_get_modrm(struct insn *insn); -+extern void insn_get_sib(struct insn *insn); -+extern void insn_get_displacement(struct insn *insn); -+extern void insn_get_immediate(struct insn *insn); -+extern void insn_get_length(struct insn *insn); -+ -+/* Attribute will be determined after getting ModRM (for opcode groups) */ -+static inline void insn_get_attribute(struct insn *insn) -+{ -+ insn_get_modrm(insn); -+} -+ -+/* Instruction uses RIP-relative addressing */ -+extern int insn_rip_relative(struct insn *insn); -+ -+/* Init insn for kernel text */ -+static inline void kernel_insn_init(struct insn *insn, -+ const void *kaddr, int buf_len) -+{ -+#ifdef CONFIG_X86_64 -+ insn_init(insn, kaddr, buf_len, 1); -+#else /* CONFIG_X86_32 */ -+ insn_init(insn, kaddr, buf_len, 0); -+#endif -+} -+ -+static inline int insn_is_avx(struct insn *insn) -+{ -+ if (!insn->prefixes.got) -+ insn_get_prefixes(insn); -+ return (insn->vex_prefix.value != 0); -+} -+ -+static inline int insn_is_evex(struct insn *insn) -+{ -+ if (!insn->prefixes.got) -+ insn_get_prefixes(insn); -+ return (insn->vex_prefix.nbytes == 4); -+} -+ -+/* Ensure this instruction is decoded completely */ -+static inline int insn_complete(struct insn *insn) -+{ -+ return insn->opcode.got && insn->modrm.got && insn->sib.got && -+ insn->displacement.got && insn->immediate.got; -+} -+ -+static inline insn_byte_t insn_vex_m_bits(struct insn *insn) -+{ -+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ -+ return X86_VEX2_M; -+ else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ -+ return X86_VEX3_M(insn->vex_prefix.bytes[1]); -+ else /* EVEX */ -+ return X86_EVEX_M(insn->vex_prefix.bytes[1]); -+} -+ -+static inline insn_byte_t insn_vex_p_bits(struct insn *insn) -+{ -+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ -+ return X86_VEX_P(insn->vex_prefix.bytes[1]); -+ else -+ return X86_VEX_P(insn->vex_prefix.bytes[2]); -+} -+ -+/* Get the last prefix id from last prefix or VEX prefix */ -+static inline int insn_last_prefix_id(struct insn *insn) -+{ -+ if (insn_is_avx(insn)) -+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ -+ -+ if (insn->prefixes.bytes[3]) -+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]); -+ -+ return 0; -+} -+ -+/* Offset of each field from kaddr */ -+static inline int insn_offset_rex_prefix(struct insn *insn) -+{ -+ return insn->prefixes.nbytes; -+} -+static inline int insn_offset_vex_prefix(struct insn *insn) -+{ -+ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; -+} -+static inline int insn_offset_opcode(struct insn *insn) -+{ -+ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; -+} -+static inline int insn_offset_modrm(struct insn *insn) -+{ -+ return insn_offset_opcode(insn) + insn->opcode.nbytes; -+} -+static inline int insn_offset_sib(struct insn *insn) -+{ -+ return insn_offset_modrm(insn) + insn->modrm.nbytes; -+} -+static inline int insn_offset_displacement(struct insn *insn) -+{ -+ return insn_offset_sib(insn) + insn->sib.nbytes; -+} -+static inline int insn_offset_immediate(struct insn *insn) -+{ -+ return insn_offset_displacement(insn) + insn->displacement.nbytes; -+} -+ -+#endif /* _ASM_X86_INSN_H */ -diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h -new file mode 100644 -index 0000000..9c9dc57 ---- /dev/null -+++ b/tools/objtool/arch/x86/include/asm/orc_types.h -@@ -0,0 +1,107 @@ -+/* -+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#ifndef _ORC_TYPES_H -+#define _ORC_TYPES_H -+ -+#include <linux/types.h> -+#include <linux/compiler.h> -+ -+/* -+ * The ORC_REG_* registers are base registers which are used to find other -+ * registers on the stack. -+ * -+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the -+ * address of the previous frame: the caller's SP before it called the current -+ * function. -+ * -+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in -+ * the current frame. -+ * -+ * The most commonly used base registers are SP and BP -- which the previous SP -+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is -+ * usually based on. -+ * -+ * The rest of the base registers are needed for special cases like entry code -+ * and GCC realigned stacks. -+ */ -+#define ORC_REG_UNDEFINED 0 -+#define ORC_REG_PREV_SP 1 -+#define ORC_REG_DX 2 -+#define ORC_REG_DI 3 -+#define ORC_REG_BP 4 -+#define ORC_REG_SP 5 -+#define ORC_REG_R10 6 -+#define ORC_REG_R13 7 -+#define ORC_REG_BP_INDIRECT 8 -+#define ORC_REG_SP_INDIRECT 9 -+#define ORC_REG_MAX 15 -+ -+/* -+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the -+ * caller's SP right before it made the call). Used for all callable -+ * functions, i.e. all C code and all callable asm functions. -+ * -+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points -+ * to a fully populated pt_regs from a syscall, interrupt, or exception. -+ * -+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset -+ * points to the iret return frame. -+ * -+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They -+ * aren't used in struct orc_entry due to size and complexity constraints. -+ * Objtool converts them to real types when it converts the hints to orc -+ * entries. -+ */ -+#define ORC_TYPE_CALL 0 -+#define ORC_TYPE_REGS 1 -+#define ORC_TYPE_REGS_IRET 2 -+#define UNWIND_HINT_TYPE_SAVE 3 -+#define UNWIND_HINT_TYPE_RESTORE 4 -+ -+#ifndef __ASSEMBLY__ -+/* -+ * This struct is more or less a vastly simplified version of the DWARF Call -+ * Frame Information standard. It contains only the necessary parts of DWARF -+ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the -+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on -+ * the stack for a given code address. Each instance of the struct corresponds -+ * to one or more code locations. -+ */ -+struct orc_entry { -+ s16 sp_offset; -+ s16 bp_offset; -+ unsigned sp_reg:4; -+ unsigned bp_reg:4; -+ unsigned type:2; -+} __packed; -+ -+/* -+ * This struct is used by asm and inline asm code to manually annotate the -+ * location of registers on the stack for the ORC unwinder. -+ * -+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. -+ */ -+struct unwind_hint { -+ u32 ip; -+ s16 sp_offset; -+ u8 sp_reg; -+ u8 type; -+}; -+#endif /* __ASSEMBLY__ */ -+ -+#endif /* _ORC_TYPES_H */ -diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk -deleted file mode 100644 -index a3d2c62..0000000 ---- a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk -+++ /dev/null -@@ -1,392 +0,0 @@ --#!/bin/awk -f --# gen-insn-attr-x86.awk: Instruction attribute table generator --# Written by Masami Hiramatsu <mhiramat@redhat.com> --# --# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c -- --# Awk implementation sanity check --function check_awk_implement() { -- if (sprintf("%x", 0) != "0") -- return "Your awk has a printf-format problem." -- return "" --} -- --# Clear working vars --function clear_vars() { -- delete table -- delete lptable2 -- delete lptable1 -- delete lptable3 -- eid = -1 # escape id -- gid = -1 # group id -- aid = -1 # AVX id -- tname = "" --} -- --BEGIN { -- # Implementation error checking -- awkchecked = check_awk_implement() -- if (awkchecked != "") { -- print "Error: " awkchecked > "/dev/stderr" -- print "Please try to use gawk." > "/dev/stderr" -- exit 1 -- } -- -- # Setup generating tables -- print "/* x86 opcode map generated from x86-opcode-map.txt */" -- print "/* Do not change this code. */\n" -- ggid = 1 -- geid = 1 -- gaid = 0 -- delete etable -- delete gtable -- delete atable -- -- opnd_expr = "^[A-Za-z/]" -- ext_expr = "^\\(" -- sep_expr = "^\\|$" -- group_expr = "^Grp[0-9A-Za-z]+" -- -- imm_expr = "^[IJAOL][a-z]" -- imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" -- imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" -- imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" -- imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" -- imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" -- imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" -- imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" -- imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" -- imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" -- imm_flag["Ob"] = "INAT_MOFFSET" -- imm_flag["Ov"] = "INAT_MOFFSET" -- imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" -- -- modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" -- force64_expr = "\\([df]64\\)" -- rex_expr = "^REX(\\.[XRWB]+)*" -- fpu_expr = "^ESC" # TODO -- -- lprefix1_expr = "\\((66|!F3)\\)" -- lprefix2_expr = "\\(F3\\)" -- lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" -- lprefix_expr = "\\((66|F2|F3)\\)" -- max_lprefix = 4 -- -- # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript -- # accepts VEX prefix -- vexok_opcode_expr = "^[vk].*" -- vexok_expr = "\\(v1\\)" -- # All opcodes with (v) superscript supports *only* VEX prefix -- vexonly_expr = "\\(v\\)" -- # All opcodes with (ev) superscript supports *only* EVEX prefix -- evexonly_expr = "\\(ev\\)" -- -- prefix_expr = "\\(Prefix\\)" -- prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" -- prefix_num["REPNE"] = "INAT_PFX_REPNE" -- prefix_num["REP/REPE"] = "INAT_PFX_REPE" -- prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" -- prefix_num["XRELEASE"] = "INAT_PFX_REPE" -- prefix_num["LOCK"] = "INAT_PFX_LOCK" -- prefix_num["SEG=CS"] = "INAT_PFX_CS" -- prefix_num["SEG=DS"] = "INAT_PFX_DS" -- prefix_num["SEG=ES"] = "INAT_PFX_ES" -- prefix_num["SEG=FS"] = "INAT_PFX_FS" -- prefix_num["SEG=GS"] = "INAT_PFX_GS" -- prefix_num["SEG=SS"] = "INAT_PFX_SS" -- prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" -- prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" -- prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" -- prefix_num["EVEX"] = "INAT_PFX_EVEX" -- -- clear_vars() --} -- --function semantic_error(msg) { -- print "Semantic error at " NR ": " msg > "/dev/stderr" -- exit 1 --} -- --function debug(msg) { -- print "DEBUG: " msg --} -- --function array_size(arr, i,c) { -- c = 0 -- for (i in arr) -- c++ -- return c --} -- --/^Table:/ { -- print "/* " $0 " */" -- if (tname != "") -- semantic_error("Hit Table: before EndTable:."); --} -- --/^Referrer:/ { -- if (NF != 1) { -- # escape opcode table -- ref = "" -- for (i = 2; i <= NF; i++) -- ref = ref $i -- eid = escape[ref] -- tname = sprintf("inat_escape_table_%d", eid) -- } --} -- --/^AVXcode:/ { -- if (NF != 1) { -- # AVX/escape opcode table -- aid = $2 -- if (gaid <= aid) -- gaid = aid + 1 -- if (tname == "") # AVX only opcode table -- tname = sprintf("inat_avx_table_%d", $2) -- } -- if (aid == -1 && eid == -1) # primary opcode table -- tname = "inat_primary_table" --} -- --/^GrpTable:/ { -- print "/* " $0 " */" -- if (!($2 in group)) -- semantic_error("No group: " $2 ) -- gid = group[$2] -- tname = "inat_group_table_" gid --} -- --function print_table(tbl,name,fmt,n) --{ -- print "const insn_attr_t " name " = {" -- for (i = 0; i < n; i++) { -- id = sprintf(fmt, i) -- if (tbl[id]) -- print " [" id "] = " tbl[id] "," -- } -- print "};" --} -- --/^EndTable/ { -- if (gid != -1) { -- # print group tables -- if (array_size(table) != 0) { -- print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", -- "0x%x", 8) -- gtable[gid,0] = tname -- } -- if (array_size(lptable1) != 0) { -- print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", -- "0x%x", 8) -- gtable[gid,1] = tname "_1" -- } -- if (array_size(lptable2) != 0) { -- print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", -- "0x%x", 8) -- gtable[gid,2] = tname "_2" -- } -- if (array_size(lptable3) != 0) { -- print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", -- "0x%x", 8) -- gtable[gid,3] = tname "_3" -- } -- } else { -- # print primary/escaped tables -- if (array_size(table) != 0) { -- print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", -- "0x%02x", 256) -- etable[eid,0] = tname -- if (aid >= 0) -- atable[aid,0] = tname -- } -- if (array_size(lptable1) != 0) { -- print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", -- "0x%02x", 256) -- etable[eid,1] = tname "_1" -- if (aid >= 0) -- atable[aid,1] = tname "_1" -- } -- if (array_size(lptable2) != 0) { -- print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", -- "0x%02x", 256) -- etable[eid,2] = tname "_2" -- if (aid >= 0) -- atable[aid,2] = tname "_2" -- } -- if (array_size(lptable3) != 0) { -- print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", -- "0x%02x", 256) -- etable[eid,3] = tname "_3" -- if (aid >= 0) -- atable[aid,3] = tname "_3" -- } -- } -- print "" -- clear_vars() --} -- --function add_flags(old,new) { -- if (old && new) -- return old " | " new -- else if (old) -- return old -- else -- return new --} -- --# convert operands to flags. --function convert_operands(count,opnd, i,j,imm,mod) --{ -- imm = null -- mod = null -- for (j = 1; j <= count; j++) { -- i = opnd[j] -- if (match(i, imm_expr) == 1) { -- if (!imm_flag[i]) -- semantic_error("Unknown imm opnd: " i) -- if (imm) { -- if (i != "Ib") -- semantic_error("Second IMM error") -- imm = add_flags(imm, "INAT_SCNDIMM") -- } else -- imm = imm_flag[i] -- } else if (match(i, modrm_expr)) -- mod = "INAT_MODRM" -- } -- return add_flags(imm, mod) --} -- --/^[0-9a-f]+\:/ { -- if (NR == 1) -- next -- # get index -- idx = "0x" substr($1, 1, index($1,":") - 1) -- if (idx in table) -- semantic_error("Redefine " idx " in " tname) -- -- # check if escaped opcode -- if ("escape" == $2) { -- if ($3 != "#") -- semantic_error("No escaped name") -- ref = "" -- for (i = 4; i <= NF; i++) -- ref = ref $i -- if (ref in escape) -- semantic_error("Redefine escape (" ref ")") -- escape[ref] = geid -- geid++ -- table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" -- next -- } -- -- variant = null -- # converts -- i = 2 -- while (i <= NF) { -- opcode = $(i++) -- delete opnds -- ext = null -- flags = null -- opnd = null -- # parse one opcode -- if (match($i, opnd_expr)) { -- opnd = $i -- count = split($(i++), opnds, ",") -- flags = convert_operands(count, opnds) -- } -- if (match($i, ext_expr)) -- ext = $(i++) -- if (match($i, sep_expr)) -- i++ -- else if (i < NF) -- semantic_error($i " is not a separator") -- -- # check if group opcode -- if (match(opcode, group_expr)) { -- if (!(opcode in group)) { -- group[opcode] = ggid -- ggid++ -- } -- flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") -- } -- # check force(or default) 64bit -- if (match(ext, force64_expr)) -- flags = add_flags(flags, "INAT_FORCE64") -- -- # check REX prefix -- if (match(opcode, rex_expr)) -- flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") -- -- # check coprocessor escape : TODO -- if (match(opcode, fpu_expr)) -- flags = add_flags(flags, "INAT_MODRM") -- -- # check VEX codes -- if (match(ext, evexonly_expr)) -- flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") -- else if (match(ext, vexonly_expr)) -- flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") -- else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) -- flags = add_flags(flags, "INAT_VEXOK") -- -- # check prefixes -- if (match(ext, prefix_expr)) { -- if (!prefix_num[opcode]) -- semantic_error("Unknown prefix: " opcode) -- flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") -- } -- if (length(flags) == 0) -- continue -- # check if last prefix -- if (match(ext, lprefix1_expr)) { -- lptable1[idx] = add_flags(lptable1[idx],flags) -- variant = "INAT_VARIANT" -- } -- if (match(ext, lprefix2_expr)) { -- lptable2[idx] = add_flags(lptable2[idx],flags) -- variant = "INAT_VARIANT" -- } -- if (match(ext, lprefix3_expr)) { -- lptable3[idx] = add_flags(lptable3[idx],flags) -- variant = "INAT_VARIANT" -- } -- if (!match(ext, lprefix_expr)){ -- table[idx] = add_flags(table[idx],flags) -- } -- } -- if (variant) -- table[idx] = add_flags(table[idx],variant) --} -- --END { -- if (awkchecked != "") -- exit 1 -- # print escape opcode map's array -- print "/* Escape opcode map array */" -- print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ -- "[INAT_LSTPFX_MAX + 1] = {" -- for (i = 0; i < geid; i++) -- for (j = 0; j < max_lprefix; j++) -- if (etable[i,j]) -- print " ["i"]["j"] = "etable[i,j]"," -- print "};\n" -- # print group opcode map's array -- print "/* Group opcode map array */" -- print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ -- "[INAT_LSTPFX_MAX + 1] = {" -- for (i = 0; i < ggid; i++) -- for (j = 0; j < max_lprefix; j++) -- if (gtable[i,j]) -- print " ["i"]["j"] = "gtable[i,j]"," -- print "};\n" -- # print AVX opcode map's array -- print "/* AVX opcode map array */" -- print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ -- "[INAT_LSTPFX_MAX + 1] = {" -- for (i = 0; i < gaid; i++) -- for (j = 0; j < max_lprefix; j++) -- if (atable[i,j]) -- print " ["i"]["j"] = "atable[i,j]"," -- print "};" --} -- -diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/insn/inat.c -deleted file mode 100644 -index e4bf28e..0000000 ---- a/tools/objtool/arch/x86/insn/inat.c -+++ /dev/null -@@ -1,97 +0,0 @@ --/* -- * x86 instruction attribute tables -- * -- * Written by Masami Hiramatsu <mhiramat@redhat.com> -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- * -- * This program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- * -- */ --#include "insn.h" -- --/* Attribute tables are generated from opcode map */ --#include "inat-tables.c" -- --/* Attribute search APIs */ --insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) --{ -- return inat_primary_table[opcode]; --} -- --int inat_get_last_prefix_id(insn_byte_t last_pfx) --{ -- insn_attr_t lpfx_attr; -- -- lpfx_attr = inat_get_opcode_attribute(last_pfx); -- return inat_last_prefix_id(lpfx_attr); --} -- --insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, -- insn_attr_t esc_attr) --{ -- const insn_attr_t *table; -- int n; -- -- n = inat_escape_id(esc_attr); -- -- table = inat_escape_tables[n][0]; -- if (!table) -- return 0; -- if (inat_has_variant(table[opcode]) && lpfx_id) { -- table = inat_escape_tables[n][lpfx_id]; -- if (!table) -- return 0; -- } -- return table[opcode]; --} -- --insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, -- insn_attr_t grp_attr) --{ -- const insn_attr_t *table; -- int n; -- -- n = inat_group_id(grp_attr); -- -- table = inat_group_tables[n][0]; -- if (!table) -- return inat_group_common_attribute(grp_attr); -- if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { -- table = inat_group_tables[n][lpfx_id]; -- if (!table) -- return inat_group_common_attribute(grp_attr); -- } -- return table[X86_MODRM_REG(modrm)] | -- inat_group_common_attribute(grp_attr); --} -- --insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, -- insn_byte_t vex_p) --{ -- const insn_attr_t *table; -- if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) -- return 0; -- /* At first, this checks the master table */ -- table = inat_avx_tables[vex_m][0]; -- if (!table) -- return 0; -- if (!inat_is_group(table[opcode]) && vex_p) { -- /* If this is not a group, get attribute directly */ -- table = inat_avx_tables[vex_m][vex_p]; -- if (!table) -- return 0; -- } -- return table[opcode]; --} -- -diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h -deleted file mode 100644 -index 125ecd2..0000000 ---- a/tools/objtool/arch/x86/insn/inat.h -+++ /dev/null -@@ -1,234 +0,0 @@ --#ifndef _ASM_X86_INAT_H --#define _ASM_X86_INAT_H --/* -- * x86 instruction attributes -- * -- * Written by Masami Hiramatsu <mhiramat@redhat.com> -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- * -- * This program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- * -- */ --#include "inat_types.h" -- --/* -- * Internal bits. Don't use bitmasks directly, because these bits are -- * unstable. You should use checking functions. -- */ -- --#define INAT_OPCODE_TABLE_SIZE 256 --#define INAT_GROUP_TABLE_SIZE 8 -- --/* Legacy last prefixes */ --#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ --#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ --#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ --/* Other Legacy prefixes */ --#define INAT_PFX_LOCK 4 /* 0xF0 */ --#define INAT_PFX_CS 5 /* 0x2E */ --#define INAT_PFX_DS 6 /* 0x3E */ --#define INAT_PFX_ES 7 /* 0x26 */ --#define INAT_PFX_FS 8 /* 0x64 */ --#define INAT_PFX_GS 9 /* 0x65 */ --#define INAT_PFX_SS 10 /* 0x36 */ --#define INAT_PFX_ADDRSZ 11 /* 0x67 */ --/* x86-64 REX prefix */ --#define INAT_PFX_REX 12 /* 0x4X */ --/* AVX VEX prefixes */ --#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ --#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ --#define INAT_PFX_EVEX 15 /* EVEX prefix */ -- --#define INAT_LSTPFX_MAX 3 --#define INAT_LGCPFX_MAX 11 -- --/* Immediate size */ --#define INAT_IMM_BYTE 1 --#define INAT_IMM_WORD 2 --#define INAT_IMM_DWORD 3 --#define INAT_IMM_QWORD 4 --#define INAT_IMM_PTR 5 --#define INAT_IMM_VWORD32 6 --#define INAT_IMM_VWORD 7 -- --/* Legacy prefix */ --#define INAT_PFX_OFFS 0 --#define INAT_PFX_BITS 4 --#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) --#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) --/* Escape opcodes */ --#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) --#define INAT_ESC_BITS 2 --#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) --#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) --/* Group opcodes (1-16) */ --#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) --#define INAT_GRP_BITS 5 --#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) --#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) --/* Immediates */ --#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) --#define INAT_IMM_BITS 3 --#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) --/* Flags */ --#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) --#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) --#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) --#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) --#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) --#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) --#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) --#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) --#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) --/* Attribute making macros for attribute tables */ --#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) --#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) --#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) --#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) -- --/* Attribute search APIs */ --extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); --extern int inat_get_last_prefix_id(insn_byte_t last_pfx); --extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, -- int lpfx_id, -- insn_attr_t esc_attr); --extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, -- int lpfx_id, -- insn_attr_t esc_attr); --extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, -- insn_byte_t vex_m, -- insn_byte_t vex_pp); -- --/* Attribute checking functions */ --static inline int inat_is_legacy_prefix(insn_attr_t attr) --{ -- attr &= INAT_PFX_MASK; -- return attr && attr <= INAT_LGCPFX_MAX; --} -- --static inline int inat_is_address_size_prefix(insn_attr_t attr) --{ -- return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; --} -- --static inline int inat_is_operand_size_prefix(insn_attr_t attr) --{ -- return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; --} -- --static inline int inat_is_rex_prefix(insn_attr_t attr) --{ -- return (attr & INAT_PFX_MASK) == INAT_PFX_REX; --} -- --static inline int inat_last_prefix_id(insn_attr_t attr) --{ -- if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) -- return 0; -- else -- return attr & INAT_PFX_MASK; --} -- --static inline int inat_is_vex_prefix(insn_attr_t attr) --{ -- attr &= INAT_PFX_MASK; -- return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || -- attr == INAT_PFX_EVEX; --} -- --static inline int inat_is_evex_prefix(insn_attr_t attr) --{ -- return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; --} -- --static inline int inat_is_vex3_prefix(insn_attr_t attr) --{ -- return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; --} -- --static inline int inat_is_escape(insn_attr_t attr) --{ -- return attr & INAT_ESC_MASK; --} -- --static inline int inat_escape_id(insn_attr_t attr) --{ -- return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; --} -- --static inline int inat_is_group(insn_attr_t attr) --{ -- return attr & INAT_GRP_MASK; --} -- --static inline int inat_group_id(insn_attr_t attr) --{ -- return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; --} -- --static inline int inat_group_common_attribute(insn_attr_t attr) --{ -- return attr & ~INAT_GRP_MASK; --} -- --static inline int inat_has_immediate(insn_attr_t attr) --{ -- return attr & INAT_IMM_MASK; --} -- --static inline int inat_immediate_size(insn_attr_t attr) --{ -- return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; --} -- --static inline int inat_has_modrm(insn_attr_t attr) --{ -- return attr & INAT_MODRM; --} -- --static inline int inat_is_force64(insn_attr_t attr) --{ -- return attr & INAT_FORCE64; --} -- --static inline int inat_has_second_immediate(insn_attr_t attr) --{ -- return attr & INAT_SCNDIMM; --} -- --static inline int inat_has_moffset(insn_attr_t attr) --{ -- return attr & INAT_MOFFSET; --} -- --static inline int inat_has_variant(insn_attr_t attr) --{ -- return attr & INAT_VARIANT; --} -- --static inline int inat_accept_vex(insn_attr_t attr) --{ -- return attr & INAT_VEXOK; --} -- --static inline int inat_must_vex(insn_attr_t attr) --{ -- return attr & (INAT_VEXONLY | INAT_EVEXONLY); --} -- --static inline int inat_must_evex(insn_attr_t attr) --{ -- return attr & INAT_EVEXONLY; --} --#endif -diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/insn/inat_types.h -deleted file mode 100644 -index cb3c20c..0000000 ---- a/tools/objtool/arch/x86/insn/inat_types.h -+++ /dev/null -@@ -1,29 +0,0 @@ --#ifndef _ASM_X86_INAT_TYPES_H --#define _ASM_X86_INAT_TYPES_H --/* -- * x86 instruction attributes -- * -- * Written by Masami Hiramatsu <mhiramat@redhat.com> -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- * -- * This program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- * -- */ -- --/* Instruction attributes */ --typedef unsigned int insn_attr_t; --typedef unsigned char insn_byte_t; --typedef signed int insn_value_t; -- --#endif -diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/insn/insn.c -deleted file mode 100644 -index ca983e2..0000000 ---- a/tools/objtool/arch/x86/insn/insn.c -+++ /dev/null -@@ -1,606 +0,0 @@ --/* -- * x86 instruction analysis -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- * -- * This program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- * -- * Copyright (C) IBM Corporation, 2002, 2004, 2009 -- */ -- --#ifdef __KERNEL__ --#include <linux/string.h> --#else --#include <string.h> --#endif --#include "inat.h" --#include "insn.h" -- --/* Verify next sizeof(t) bytes can be on the same instruction */ --#define validate_next(t, insn, n) \ -- ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) -- --#define __get_next(t, insn) \ -- ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) -- --#define __peek_nbyte_next(t, insn, n) \ -- ({ t r = *(t*)((insn)->next_byte + n); r; }) -- --#define get_next(t, insn) \ -- ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) -- --#define peek_nbyte_next(t, insn, n) \ -- ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) -- --#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) -- --/** -- * insn_init() - initialize struct insn -- * @insn: &struct insn to be initialized -- * @kaddr: address (in kernel memory) of instruction (or copy thereof) -- * @x86_64: !0 for 64-bit kernel or 64-bit app -- */ --void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) --{ -- /* -- * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid -- * even if the input buffer is long enough to hold them. -- */ -- if (buf_len > MAX_INSN_SIZE) -- buf_len = MAX_INSN_SIZE; -- -- memset(insn, 0, sizeof(*insn)); -- insn->kaddr = kaddr; -- insn->end_kaddr = kaddr + buf_len; -- insn->next_byte = kaddr; -- insn->x86_64 = x86_64 ? 1 : 0; -- insn->opnd_bytes = 4; -- if (x86_64) -- insn->addr_bytes = 8; -- else -- insn->addr_bytes = 4; --} -- --/** -- * insn_get_prefixes - scan x86 instruction prefix bytes -- * @insn: &struct insn containing instruction -- * -- * Populates the @insn->prefixes bitmap, and updates @insn->next_byte -- * to point to the (first) opcode. No effect if @insn->prefixes.got -- * is already set. -- */ --void insn_get_prefixes(struct insn *insn) --{ -- struct insn_field *prefixes = &insn->prefixes; -- insn_attr_t attr; -- insn_byte_t b, lb; -- int i, nb; -- -- if (prefixes->got) -- return; -- -- nb = 0; -- lb = 0; -- b = peek_next(insn_byte_t, insn); -- attr = inat_get_opcode_attribute(b); -- while (inat_is_legacy_prefix(attr)) { -- /* Skip if same prefix */ -- for (i = 0; i < nb; i++) -- if (prefixes->bytes[i] == b) -- goto found; -- if (nb == 4) -- /* Invalid instruction */ -- break; -- prefixes->bytes[nb++] = b; -- if (inat_is_address_size_prefix(attr)) { -- /* address size switches 2/4 or 4/8 */ -- if (insn->x86_64) -- insn->addr_bytes ^= 12; -- else -- insn->addr_bytes ^= 6; -- } else if (inat_is_operand_size_prefix(attr)) { -- /* oprand size switches 2/4 */ -- insn->opnd_bytes ^= 6; -- } --found: -- prefixes->nbytes++; -- insn->next_byte++; -- lb = b; -- b = peek_next(insn_byte_t, insn); -- attr = inat_get_opcode_attribute(b); -- } -- /* Set the last prefix */ -- if (lb && lb != insn->prefixes.bytes[3]) { -- if (unlikely(insn->prefixes.bytes[3])) { -- /* Swap the last prefix */ -- b = insn->prefixes.bytes[3]; -- for (i = 0; i < nb; i++) -- if (prefixes->bytes[i] == lb) -- prefixes->bytes[i] = b; -- } -- insn->prefixes.bytes[3] = lb; -- } -- -- /* Decode REX prefix */ -- if (insn->x86_64) { -- b = peek_next(insn_byte_t, insn); -- attr = inat_get_opcode_attribute(b); -- if (inat_is_rex_prefix(attr)) { -- insn->rex_prefix.value = b; -- insn->rex_prefix.nbytes = 1; -- insn->next_byte++; -- if (X86_REX_W(b)) -- /* REX.W overrides opnd_size */ -- insn->opnd_bytes = 8; -- } -- } -- insn->rex_prefix.got = 1; -- -- /* Decode VEX prefix */ -- b = peek_next(insn_byte_t, insn); -- attr = inat_get_opcode_attribute(b); -- if (inat_is_vex_prefix(attr)) { -- insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); -- if (!insn->x86_64) { -- /* -- * In 32-bits mode, if the [7:6] bits (mod bits of -- * ModRM) on the second byte are not 11b, it is -- * LDS or LES or BOUND. -- */ -- if (X86_MODRM_MOD(b2) != 3) -- goto vex_end; -- } -- insn->vex_prefix.bytes[0] = b; -- insn->vex_prefix.bytes[1] = b2; -- if (inat_is_evex_prefix(attr)) { -- b2 = peek_nbyte_next(insn_byte_t, insn, 2); -- insn->vex_prefix.bytes[2] = b2; -- b2 = peek_nbyte_next(insn_byte_t, insn, 3); -- insn->vex_prefix.bytes[3] = b2; -- insn->vex_prefix.nbytes = 4; -- insn->next_byte += 4; -- if (insn->x86_64 && X86_VEX_W(b2)) -- /* VEX.W overrides opnd_size */ -- insn->opnd_bytes = 8; -- } else if (inat_is_vex3_prefix(attr)) { -- b2 = peek_nbyte_next(insn_byte_t, insn, 2); -- insn->vex_prefix.bytes[2] = b2; -- insn->vex_prefix.nbytes = 3; -- insn->next_byte += 3; -- if (insn->x86_64 && X86_VEX_W(b2)) -- /* VEX.W overrides opnd_size */ -- insn->opnd_bytes = 8; -- } else { -- /* -- * For VEX2, fake VEX3-like byte#2. -- * Makes it easier to decode vex.W, vex.vvvv, -- * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. -- */ -- insn->vex_prefix.bytes[2] = b2 & 0x7f; -- insn->vex_prefix.nbytes = 2; -- insn->next_byte += 2; -- } -- } --vex_end: -- insn->vex_prefix.got = 1; -- -- prefixes->got = 1; -- --err_out: -- return; --} -- --/** -- * insn_get_opcode - collect opcode(s) -- * @insn: &struct insn containing instruction -- * -- * Populates @insn->opcode, updates @insn->next_byte to point past the -- * opcode byte(s), and set @insn->attr (except for groups). -- * If necessary, first collects any preceding (prefix) bytes. -- * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got -- * is already 1. -- */ --void insn_get_opcode(struct insn *insn) --{ -- struct insn_field *opcode = &insn->opcode; -- insn_byte_t op; -- int pfx_id; -- if (opcode->got) -- return; -- if (!insn->prefixes.got) -- insn_get_prefixes(insn); -- -- /* Get first opcode */ -- op = get_next(insn_byte_t, insn); -- opcode->bytes[0] = op; -- opcode->nbytes = 1; -- -- /* Check if there is VEX prefix or not */ -- if (insn_is_avx(insn)) { -- insn_byte_t m, p; -- m = insn_vex_m_bits(insn); -- p = insn_vex_p_bits(insn); -- insn->attr = inat_get_avx_attribute(op, m, p); -- if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || -- (!inat_accept_vex(insn->attr) && -- !inat_is_group(insn->attr))) -- insn->attr = 0; /* This instruction is bad */ -- goto end; /* VEX has only 1 byte for opcode */ -- } -- -- insn->attr = inat_get_opcode_attribute(op); -- while (inat_is_escape(insn->attr)) { -- /* Get escaped opcode */ -- op = get_next(insn_byte_t, insn); -- opcode->bytes[opcode->nbytes++] = op; -- pfx_id = insn_last_prefix_id(insn); -- insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); -- } -- if (inat_must_vex(insn->attr)) -- insn->attr = 0; /* This instruction is bad */ --end: -- opcode->got = 1; -- --err_out: -- return; --} -- --/** -- * insn_get_modrm - collect ModRM byte, if any -- * @insn: &struct insn containing instruction -- * -- * Populates @insn->modrm and updates @insn->next_byte to point past the -- * ModRM byte, if any. If necessary, first collects the preceding bytes -- * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. -- */ --void insn_get_modrm(struct insn *insn) --{ -- struct insn_field *modrm = &insn->modrm; -- insn_byte_t pfx_id, mod; -- if (modrm->got) -- return; -- if (!insn->opcode.got) -- insn_get_opcode(insn); -- -- if (inat_has_modrm(insn->attr)) { -- mod = get_next(insn_byte_t, insn); -- modrm->value = mod; -- modrm->nbytes = 1; -- if (inat_is_group(insn->attr)) { -- pfx_id = insn_last_prefix_id(insn); -- insn->attr = inat_get_group_attribute(mod, pfx_id, -- insn->attr); -- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) -- insn->attr = 0; /* This is bad */ -- } -- } -- -- if (insn->x86_64 && inat_is_force64(insn->attr)) -- insn->opnd_bytes = 8; -- modrm->got = 1; -- --err_out: -- return; --} -- -- --/** -- * insn_rip_relative() - Does instruction use RIP-relative addressing mode? -- * @insn: &struct insn containing instruction -- * -- * If necessary, first collects the instruction up to and including the -- * ModRM byte. No effect if @insn->x86_64 is 0. -- */ --int insn_rip_relative(struct insn *insn) --{ -- struct insn_field *modrm = &insn->modrm; -- -- if (!insn->x86_64) -- return 0; -- if (!modrm->got) -- insn_get_modrm(insn); -- /* -- * For rip-relative instructions, the mod field (top 2 bits) -- * is zero and the r/m field (bottom 3 bits) is 0x5. -- */ -- return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); --} -- --/** -- * insn_get_sib() - Get the SIB byte of instruction -- * @insn: &struct insn containing instruction -- * -- * If necessary, first collects the instruction up to and including the -- * ModRM byte. -- */ --void insn_get_sib(struct insn *insn) --{ -- insn_byte_t modrm; -- -- if (insn->sib.got) -- return; -- if (!insn->modrm.got) -- insn_get_modrm(insn); -- if (insn->modrm.nbytes) { -- modrm = (insn_byte_t)insn->modrm.value; -- if (insn->addr_bytes != 2 && -- X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { -- insn->sib.value = get_next(insn_byte_t, insn); -- insn->sib.nbytes = 1; -- } -- } -- insn->sib.got = 1; -- --err_out: -- return; --} -- -- --/** -- * insn_get_displacement() - Get the displacement of instruction -- * @insn: &struct insn containing instruction -- * -- * If necessary, first collects the instruction up to and including the -- * SIB byte. -- * Displacement value is sign-expanded. -- */ --void insn_get_displacement(struct insn *insn) --{ -- insn_byte_t mod, rm, base; -- -- if (insn->displacement.got) -- return; -- if (!insn->sib.got) -- insn_get_sib(insn); -- if (insn->modrm.nbytes) { -- /* -- * Interpreting the modrm byte: -- * mod = 00 - no displacement fields (exceptions below) -- * mod = 01 - 1-byte displacement field -- * mod = 10 - displacement field is 4 bytes, or 2 bytes if -- * address size = 2 (0x67 prefix in 32-bit mode) -- * mod = 11 - no memory operand -- * -- * If address size = 2... -- * mod = 00, r/m = 110 - displacement field is 2 bytes -- * -- * If address size != 2... -- * mod != 11, r/m = 100 - SIB byte exists -- * mod = 00, SIB base = 101 - displacement field is 4 bytes -- * mod = 00, r/m = 101 - rip-relative addressing, displacement -- * field is 4 bytes -- */ -- mod = X86_MODRM_MOD(insn->modrm.value); -- rm = X86_MODRM_RM(insn->modrm.value); -- base = X86_SIB_BASE(insn->sib.value); -- if (mod == 3) -- goto out; -- if (mod == 1) { -- insn->displacement.value = get_next(signed char, insn); -- insn->displacement.nbytes = 1; -- } else if (insn->addr_bytes == 2) { -- if ((mod == 0 && rm == 6) || mod == 2) { -- insn->displacement.value = -- get_next(short, insn); -- insn->displacement.nbytes = 2; -- } -- } else { -- if ((mod == 0 && rm == 5) || mod == 2 || -- (mod == 0 && base == 5)) { -- insn->displacement.value = get_next(int, insn); -- insn->displacement.nbytes = 4; -- } -- } -- } --out: -- insn->displacement.got = 1; -- --err_out: -- return; --} -- --/* Decode moffset16/32/64. Return 0 if failed */ --static int __get_moffset(struct insn *insn) --{ -- switch (insn->addr_bytes) { -- case 2: -- insn->moffset1.value = get_next(short, insn); -- insn->moffset1.nbytes = 2; -- break; -- case 4: -- insn->moffset1.value = get_next(int, insn); -- insn->moffset1.nbytes = 4; -- break; -- case 8: -- insn->moffset1.value = get_next(int, insn); -- insn->moffset1.nbytes = 4; -- insn->moffset2.value = get_next(int, insn); -- insn->moffset2.nbytes = 4; -- break; -- default: /* opnd_bytes must be modified manually */ -- goto err_out; -- } -- insn->moffset1.got = insn->moffset2.got = 1; -- -- return 1; -- --err_out: -- return 0; --} -- --/* Decode imm v32(Iz). Return 0 if failed */ --static int __get_immv32(struct insn *insn) --{ -- switch (insn->opnd_bytes) { -- case 2: -- insn->immediate.value = get_next(short, insn); -- insn->immediate.nbytes = 2; -- break; -- case 4: -- case 8: -- insn->immediate.value = get_next(int, insn); -- insn->immediate.nbytes = 4; -- break; -- default: /* opnd_bytes must be modified manually */ -- goto err_out; -- } -- -- return 1; -- --err_out: -- return 0; --} -- --/* Decode imm v64(Iv/Ov), Return 0 if failed */ --static int __get_immv(struct insn *insn) --{ -- switch (insn->opnd_bytes) { -- case 2: -- insn->immediate1.value = get_next(short, insn); -- insn->immediate1.nbytes = 2; -- break; -- case 4: -- insn->immediate1.value = get_next(int, insn); -- insn->immediate1.nbytes = 4; -- break; -- case 8: -- insn->immediate1.value = get_next(int, insn); -- insn->immediate1.nbytes = 4; -- insn->immediate2.value = get_next(int, insn); -- insn->immediate2.nbytes = 4; -- break; -- default: /* opnd_bytes must be modified manually */ -- goto err_out; -- } -- insn->immediate1.got = insn->immediate2.got = 1; -- -- return 1; --err_out: -- return 0; --} -- --/* Decode ptr16:16/32(Ap) */ --static int __get_immptr(struct insn *insn) --{ -- switch (insn->opnd_bytes) { -- case 2: -- insn->immediate1.value = get_next(short, insn); -- insn->immediate1.nbytes = 2; -- break; -- case 4: -- insn->immediate1.value = get_next(int, insn); -- insn->immediate1.nbytes = 4; -- break; -- case 8: -- /* ptr16:64 is not exist (no segment) */ -- return 0; -- default: /* opnd_bytes must be modified manually */ -- goto err_out; -- } -- insn->immediate2.value = get_next(unsigned short, insn); -- insn->immediate2.nbytes = 2; -- insn->immediate1.got = insn->immediate2.got = 1; -- -- return 1; --err_out: -- return 0; --} -- --/** -- * insn_get_immediate() - Get the immediates of instruction -- * @insn: &struct insn containing instruction -- * -- * If necessary, first collects the instruction up to and including the -- * displacement bytes. -- * Basically, most of immediates are sign-expanded. Unsigned-value can be -- * get by bit masking with ((1 << (nbytes * 8)) - 1) -- */ --void insn_get_immediate(struct insn *insn) --{ -- if (insn->immediate.got) -- return; -- if (!insn->displacement.got) -- insn_get_displacement(insn); -- -- if (inat_has_moffset(insn->attr)) { -- if (!__get_moffset(insn)) -- goto err_out; -- goto done; -- } -- -- if (!inat_has_immediate(insn->attr)) -- /* no immediates */ -- goto done; -- -- switch (inat_immediate_size(insn->attr)) { -- case INAT_IMM_BYTE: -- insn->immediate.value = get_next(signed char, insn); -- insn->immediate.nbytes = 1; -- break; -- case INAT_IMM_WORD: -- insn->immediate.value = get_next(short, insn); -- insn->immediate.nbytes = 2; -- break; -- case INAT_IMM_DWORD: -- insn->immediate.value = get_next(int, insn); -- insn->immediate.nbytes = 4; -- break; -- case INAT_IMM_QWORD: -- insn->immediate1.value = get_next(int, insn); -- insn->immediate1.nbytes = 4; -- insn->immediate2.value = get_next(int, insn); -- insn->immediate2.nbytes = 4; -- break; -- case INAT_IMM_PTR: -- if (!__get_immptr(insn)) -- goto err_out; -- break; -- case INAT_IMM_VWORD32: -- if (!__get_immv32(insn)) -- goto err_out; -- break; -- case INAT_IMM_VWORD: -- if (!__get_immv(insn)) -- goto err_out; -- break; -- default: -- /* Here, insn must have an immediate, but failed */ -- goto err_out; -- } -- if (inat_has_second_immediate(insn->attr)) { -- insn->immediate2.value = get_next(signed char, insn); -- insn->immediate2.nbytes = 1; -- } --done: -- insn->immediate.got = 1; -- --err_out: -- return; --} -- --/** -- * insn_get_length() - Get the length of instruction -- * @insn: &struct insn containing instruction -- * -- * If necessary, first collects the instruction up to and including the -- * immediates bytes. -- */ --void insn_get_length(struct insn *insn) --{ -- if (insn->length) -- return; -- if (!insn->immediate.got) -- insn_get_immediate(insn); -- insn->length = (unsigned char)((unsigned long)insn->next_byte -- - (unsigned long)insn->kaddr); --} -diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/insn/insn.h -deleted file mode 100644 -index e23578c..0000000 ---- a/tools/objtool/arch/x86/insn/insn.h -+++ /dev/null -@@ -1,211 +0,0 @@ --#ifndef _ASM_X86_INSN_H --#define _ASM_X86_INSN_H --/* -- * x86 instruction analysis -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- * -- * This program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- * -- * Copyright (C) IBM Corporation, 2009 -- */ -- --/* insn_attr_t is defined in inat.h */ --#include "inat.h" -- --struct insn_field { -- union { -- insn_value_t value; -- insn_byte_t bytes[4]; -- }; -- /* !0 if we've run insn_get_xxx() for this field */ -- unsigned char got; -- unsigned char nbytes; --}; -- --struct insn { -- struct insn_field prefixes; /* -- * Prefixes -- * prefixes.bytes[3]: last prefix -- */ -- struct insn_field rex_prefix; /* REX prefix */ -- struct insn_field vex_prefix; /* VEX prefix */ -- struct insn_field opcode; /* -- * opcode.bytes[0]: opcode1 -- * opcode.bytes[1]: opcode2 -- * opcode.bytes[2]: opcode3 -- */ -- struct insn_field modrm; -- struct insn_field sib; -- struct insn_field displacement; -- union { -- struct insn_field immediate; -- struct insn_field moffset1; /* for 64bit MOV */ -- struct insn_field immediate1; /* for 64bit imm or off16/32 */ -- }; -- union { -- struct insn_field moffset2; /* for 64bit MOV */ -- struct insn_field immediate2; /* for 64bit imm or seg16 */ -- }; -- -- insn_attr_t attr; -- unsigned char opnd_bytes; -- unsigned char addr_bytes; -- unsigned char length; -- unsigned char x86_64; -- -- const insn_byte_t *kaddr; /* kernel address of insn to analyze */ -- const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ -- const insn_byte_t *next_byte; --}; -- --#define MAX_INSN_SIZE 15 -- --#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) --#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) --#define X86_MODRM_RM(modrm) ((modrm) & 0x07) -- --#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) --#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) --#define X86_SIB_BASE(sib) ((sib) & 0x07) -- --#define X86_REX_W(rex) ((rex) & 8) --#define X86_REX_R(rex) ((rex) & 4) --#define X86_REX_X(rex) ((rex) & 2) --#define X86_REX_B(rex) ((rex) & 1) -- --/* VEX bit flags */ --#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ --#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ --#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ --#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ --#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ --/* VEX bit fields */ --#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ --#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ --#define X86_VEX2_M 1 /* VEX2.M always 1 */ --#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ --#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ --#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ -- --extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); --extern void insn_get_prefixes(struct insn *insn); --extern void insn_get_opcode(struct insn *insn); --extern void insn_get_modrm(struct insn *insn); --extern void insn_get_sib(struct insn *insn); --extern void insn_get_displacement(struct insn *insn); --extern void insn_get_immediate(struct insn *insn); --extern void insn_get_length(struct insn *insn); -- --/* Attribute will be determined after getting ModRM (for opcode groups) */ --static inline void insn_get_attribute(struct insn *insn) --{ -- insn_get_modrm(insn); --} -- --/* Instruction uses RIP-relative addressing */ --extern int insn_rip_relative(struct insn *insn); -- --/* Init insn for kernel text */ --static inline void kernel_insn_init(struct insn *insn, -- const void *kaddr, int buf_len) --{ --#ifdef CONFIG_X86_64 -- insn_init(insn, kaddr, buf_len, 1); --#else /* CONFIG_X86_32 */ -- insn_init(insn, kaddr, buf_len, 0); --#endif --} -- --static inline int insn_is_avx(struct insn *insn) --{ -- if (!insn->prefixes.got) -- insn_get_prefixes(insn); -- return (insn->vex_prefix.value != 0); --} -- --static inline int insn_is_evex(struct insn *insn) --{ -- if (!insn->prefixes.got) -- insn_get_prefixes(insn); -- return (insn->vex_prefix.nbytes == 4); --} -- --/* Ensure this instruction is decoded completely */ --static inline int insn_complete(struct insn *insn) --{ -- return insn->opcode.got && insn->modrm.got && insn->sib.got && -- insn->displacement.got && insn->immediate.got; --} -- --static inline insn_byte_t insn_vex_m_bits(struct insn *insn) --{ -- if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ -- return X86_VEX2_M; -- else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ -- return X86_VEX3_M(insn->vex_prefix.bytes[1]); -- else /* EVEX */ -- return X86_EVEX_M(insn->vex_prefix.bytes[1]); --} -- --static inline insn_byte_t insn_vex_p_bits(struct insn *insn) --{ -- if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ -- return X86_VEX_P(insn->vex_prefix.bytes[1]); -- else -- return X86_VEX_P(insn->vex_prefix.bytes[2]); --} -- --/* Get the last prefix id from last prefix or VEX prefix */ --static inline int insn_last_prefix_id(struct insn *insn) --{ -- if (insn_is_avx(insn)) -- return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ -- -- if (insn->prefixes.bytes[3]) -- return inat_get_last_prefix_id(insn->prefixes.bytes[3]); -- -- return 0; --} -- --/* Offset of each field from kaddr */ --static inline int insn_offset_rex_prefix(struct insn *insn) --{ -- return insn->prefixes.nbytes; --} --static inline int insn_offset_vex_prefix(struct insn *insn) --{ -- return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; --} --static inline int insn_offset_opcode(struct insn *insn) --{ -- return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; --} --static inline int insn_offset_modrm(struct insn *insn) --{ -- return insn_offset_opcode(insn) + insn->opcode.nbytes; --} --static inline int insn_offset_sib(struct insn *insn) --{ -- return insn_offset_modrm(insn) + insn->modrm.nbytes; --} --static inline int insn_offset_displacement(struct insn *insn) --{ -- return insn_offset_sib(insn) + insn->sib.nbytes; --} --static inline int insn_offset_immediate(struct insn *insn) --{ -- return insn_offset_displacement(insn) + insn->displacement.nbytes; --} -- --#endif /* _ASM_X86_INSN_H */ -diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt -deleted file mode 100644 -index 767be7c..0000000 ---- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt -+++ /dev/null -@@ -1,1063 +0,0 @@ --# x86 Opcode Maps --# --# This is (mostly) based on following documentations. --# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C --# (#326018-047US, June 2013) --# --#<Opcode maps> --# Table: table-name --# Referrer: escaped-name --# AVXcode: avx-code --# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] --# (or) --# opcode: escape # escaped-name --# EndTable --# --# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix --# mnemonics that begin with lowercase 'k' accept a VEX prefix --# --#<group maps> --# GrpTable: GrpXXX --# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] --# EndTable --# --# AVX Superscripts --# (ev): this opcode requires EVEX prefix. --# (evo): this opcode is changed by EVEX prefix (EVEX opcode) --# (v): this opcode requires VEX prefix. --# (v1): this opcode only supports 128bit VEX. --# --# Last Prefix Superscripts --# - (66): the last prefix is 0x66 --# - (F3): the last prefix is 0xF3 --# - (F2): the last prefix is 0xF2 --# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) --# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. -- --Table: one byte opcode --Referrer: --AVXcode: --# 0x00 - 0x0f --00: ADD Eb,Gb --01: ADD Ev,Gv --02: ADD Gb,Eb --03: ADD Gv,Ev --04: ADD AL,Ib --05: ADD rAX,Iz --06: PUSH ES (i64) --07: POP ES (i64) --08: OR Eb,Gb --09: OR Ev,Gv --0a: OR Gb,Eb --0b: OR Gv,Ev --0c: OR AL,Ib --0d: OR rAX,Iz --0e: PUSH CS (i64) --0f: escape # 2-byte escape --# 0x10 - 0x1f --10: ADC Eb,Gb --11: ADC Ev,Gv --12: ADC Gb,Eb --13: ADC Gv,Ev --14: ADC AL,Ib --15: ADC rAX,Iz --16: PUSH SS (i64) --17: POP SS (i64) --18: SBB Eb,Gb --19: SBB Ev,Gv --1a: SBB Gb,Eb --1b: SBB Gv,Ev --1c: SBB AL,Ib --1d: SBB rAX,Iz --1e: PUSH DS (i64) --1f: POP DS (i64) --# 0x20 - 0x2f --20: AND Eb,Gb --21: AND Ev,Gv --22: AND Gb,Eb --23: AND Gv,Ev --24: AND AL,Ib --25: AND rAx,Iz --26: SEG=ES (Prefix) --27: DAA (i64) --28: SUB Eb,Gb --29: SUB Ev,Gv --2a: SUB Gb,Eb --2b: SUB Gv,Ev --2c: SUB AL,Ib --2d: SUB rAX,Iz --2e: SEG=CS (Prefix) --2f: DAS (i64) --# 0x30 - 0x3f --30: XOR Eb,Gb --31: XOR Ev,Gv --32: XOR Gb,Eb --33: XOR Gv,Ev --34: XOR AL,Ib --35: XOR rAX,Iz --36: SEG=SS (Prefix) --37: AAA (i64) --38: CMP Eb,Gb --39: CMP Ev,Gv --3a: CMP Gb,Eb --3b: CMP Gv,Ev --3c: CMP AL,Ib --3d: CMP rAX,Iz --3e: SEG=DS (Prefix) --3f: AAS (i64) --# 0x40 - 0x4f --40: INC eAX (i64) | REX (o64) --41: INC eCX (i64) | REX.B (o64) --42: INC eDX (i64) | REX.X (o64) --43: INC eBX (i64) | REX.XB (o64) --44: INC eSP (i64) | REX.R (o64) --45: INC eBP (i64) | REX.RB (o64) --46: INC eSI (i64) | REX.RX (o64) --47: INC eDI (i64) | REX.RXB (o64) --48: DEC eAX (i64) | REX.W (o64) --49: DEC eCX (i64) | REX.WB (o64) --4a: DEC eDX (i64) | REX.WX (o64) --4b: DEC eBX (i64) | REX.WXB (o64) --4c: DEC eSP (i64) | REX.WR (o64) --4d: DEC eBP (i64) | REX.WRB (o64) --4e: DEC eSI (i64) | REX.WRX (o64) --4f: DEC eDI (i64) | REX.WRXB (o64) --# 0x50 - 0x5f --50: PUSH rAX/r8 (d64) --51: PUSH rCX/r9 (d64) --52: PUSH rDX/r10 (d64) --53: PUSH rBX/r11 (d64) --54: PUSH rSP/r12 (d64) --55: PUSH rBP/r13 (d64) --56: PUSH rSI/r14 (d64) --57: PUSH rDI/r15 (d64) --58: POP rAX/r8 (d64) --59: POP rCX/r9 (d64) --5a: POP rDX/r10 (d64) --5b: POP rBX/r11 (d64) --5c: POP rSP/r12 (d64) --5d: POP rBP/r13 (d64) --5e: POP rSI/r14 (d64) --5f: POP rDI/r15 (d64) --# 0x60 - 0x6f --60: PUSHA/PUSHAD (i64) --61: POPA/POPAD (i64) --62: BOUND Gv,Ma (i64) | EVEX (Prefix) --63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) --64: SEG=FS (Prefix) --65: SEG=GS (Prefix) --66: Operand-Size (Prefix) --67: Address-Size (Prefix) --68: PUSH Iz (d64) --69: IMUL Gv,Ev,Iz --6a: PUSH Ib (d64) --6b: IMUL Gv,Ev,Ib --6c: INS/INSB Yb,DX --6d: INS/INSW/INSD Yz,DX --6e: OUTS/OUTSB DX,Xb --6f: OUTS/OUTSW/OUTSD DX,Xz --# 0x70 - 0x7f --70: JO Jb --71: JNO Jb --72: JB/JNAE/JC Jb --73: JNB/JAE/JNC Jb --74: JZ/JE Jb --75: JNZ/JNE Jb --76: JBE/JNA Jb --77: JNBE/JA Jb --78: JS Jb --79: JNS Jb --7a: JP/JPE Jb --7b: JNP/JPO Jb --7c: JL/JNGE Jb --7d: JNL/JGE Jb --7e: JLE/JNG Jb --7f: JNLE/JG Jb --# 0x80 - 0x8f --80: Grp1 Eb,Ib (1A) --81: Grp1 Ev,Iz (1A) --82: Grp1 Eb,Ib (1A),(i64) --83: Grp1 Ev,Ib (1A) --84: TEST Eb,Gb --85: TEST Ev,Gv --86: XCHG Eb,Gb --87: XCHG Ev,Gv --88: MOV Eb,Gb --89: MOV Ev,Gv --8a: MOV Gb,Eb --8b: MOV Gv,Ev --8c: MOV Ev,Sw --8d: LEA Gv,M --8e: MOV Sw,Ew --8f: Grp1A (1A) | POP Ev (d64) --# 0x90 - 0x9f --90: NOP | PAUSE (F3) | XCHG r8,rAX --91: XCHG rCX/r9,rAX --92: XCHG rDX/r10,rAX --93: XCHG rBX/r11,rAX --94: XCHG rSP/r12,rAX --95: XCHG rBP/r13,rAX --96: XCHG rSI/r14,rAX --97: XCHG rDI/r15,rAX --98: CBW/CWDE/CDQE --99: CWD/CDQ/CQO --9a: CALLF Ap (i64) --9b: FWAIT/WAIT --9c: PUSHF/D/Q Fv (d64) --9d: POPF/D/Q Fv (d64) --9e: SAHF --9f: LAHF --# 0xa0 - 0xaf --a0: MOV AL,Ob --a1: MOV rAX,Ov --a2: MOV Ob,AL --a3: MOV Ov,rAX --a4: MOVS/B Yb,Xb --a5: MOVS/W/D/Q Yv,Xv --a6: CMPS/B Xb,Yb --a7: CMPS/W/D Xv,Yv --a8: TEST AL,Ib --a9: TEST rAX,Iz --aa: STOS/B Yb,AL --ab: STOS/W/D/Q Yv,rAX --ac: LODS/B AL,Xb --ad: LODS/W/D/Q rAX,Xv --ae: SCAS/B AL,Yb --# Note: The May 2011 Intel manual shows Xv for the second parameter of the --# next instruction but Yv is correct --af: SCAS/W/D/Q rAX,Yv --# 0xb0 - 0xbf --b0: MOV AL/R8L,Ib --b1: MOV CL/R9L,Ib --b2: MOV DL/R10L,Ib --b3: MOV BL/R11L,Ib --b4: MOV AH/R12L,Ib --b5: MOV CH/R13L,Ib --b6: MOV DH/R14L,Ib --b7: MOV BH/R15L,Ib --b8: MOV rAX/r8,Iv --b9: MOV rCX/r9,Iv --ba: MOV rDX/r10,Iv --bb: MOV rBX/r11,Iv --bc: MOV rSP/r12,Iv --bd: MOV rBP/r13,Iv --be: MOV rSI/r14,Iv --bf: MOV rDI/r15,Iv --# 0xc0 - 0xcf --c0: Grp2 Eb,Ib (1A) --c1: Grp2 Ev,Ib (1A) --c2: RETN Iw (f64) --c3: RETN --c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) --c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) --c6: Grp11A Eb,Ib (1A) --c7: Grp11B Ev,Iz (1A) --c8: ENTER Iw,Ib --c9: LEAVE (d64) --ca: RETF Iw --cb: RETF --cc: INT3 --cd: INT Ib --ce: INTO (i64) --cf: IRET/D/Q --# 0xd0 - 0xdf --d0: Grp2 Eb,1 (1A) --d1: Grp2 Ev,1 (1A) --d2: Grp2 Eb,CL (1A) --d3: Grp2 Ev,CL (1A) --d4: AAM Ib (i64) --d5: AAD Ib (i64) --d6: --d7: XLAT/XLATB --d8: ESC --d9: ESC --da: ESC --db: ESC --dc: ESC --dd: ESC --de: ESC --df: ESC --# 0xe0 - 0xef --# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix --# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation --# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. --e0: LOOPNE/LOOPNZ Jb (f64) --e1: LOOPE/LOOPZ Jb (f64) --e2: LOOP Jb (f64) --e3: JrCXZ Jb (f64) --e4: IN AL,Ib --e5: IN eAX,Ib --e6: OUT Ib,AL --e7: OUT Ib,eAX --# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset --# in "near" jumps and calls is 16-bit. For CALL, --# push of return address is 16-bit wide, RSP is decremented by 2 --# but is not truncated to 16 bits, unlike RIP. --e8: CALL Jz (f64) --e9: JMP-near Jz (f64) --ea: JMP-far Ap (i64) --eb: JMP-short Jb (f64) --ec: IN AL,DX --ed: IN eAX,DX --ee: OUT DX,AL --ef: OUT DX,eAX --# 0xf0 - 0xff --f0: LOCK (Prefix) --f1: --f2: REPNE (Prefix) | XACQUIRE (Prefix) --f3: REP/REPE (Prefix) | XRELEASE (Prefix) --f4: HLT --f5: CMC --f6: Grp3_1 Eb (1A) --f7: Grp3_2 Ev (1A) --f8: CLC --f9: STC --fa: CLI --fb: STI --fc: CLD --fd: STD --fe: Grp4 (1A) --ff: Grp5 (1A) --EndTable -- --Table: 2-byte opcode (0x0f) --Referrer: 2-byte escape --AVXcode: 1 --# 0x0f 0x00-0x0f --00: Grp6 (1A) --01: Grp7 (1A) --02: LAR Gv,Ew --03: LSL Gv,Ew --04: --05: SYSCALL (o64) --06: CLTS --07: SYSRET (o64) --08: INVD --09: WBINVD --0a: --0b: UD2 (1B) --0c: --# AMD's prefetch group. Intel supports prefetchw(/1) only. --0d: GrpP --0e: FEMMS --# 3DNow! uses the last imm byte as opcode extension. --0f: 3DNow! Pq,Qq,Ib --# 0x0f 0x10-0x1f --# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands --# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. --# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. --# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming --# Reference A.1 --10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) --11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) --12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) --13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) --14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) --15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) --16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) --17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) --18: Grp16 (1A) --19: --# Intel SDM opcode map does not list MPX instructions. For now using Gv for --# bnd registers and Ev for everything else is OK because the instruction --# decoder does not use the information except as an indication that there is --# a ModR/M byte. --1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev --1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv --1c: --1d: --1e: --1f: NOP Ev --# 0x0f 0x20-0x2f --20: MOV Rd,Cd --21: MOV Rd,Dd --22: MOV Cd,Rd --23: MOV Dd,Rd --24: --25: --26: --27: --28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) --29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) --2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) --2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) --2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) --2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) --2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) --2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) --# 0x0f 0x30-0x3f --30: WRMSR --31: RDTSC --32: RDMSR --33: RDPMC --34: SYSENTER --35: SYSEXIT --36: --37: GETSEC --38: escape # 3-byte escape 1 --39: --3a: escape # 3-byte escape 2 --3b: --3c: --3d: --3e: --3f: --# 0x0f 0x40-0x4f --40: CMOVO Gv,Ev --41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) --42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) --43: CMOVAE/NB/NC Gv,Ev --44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) --45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) --46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) --47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) --48: CMOVS Gv,Ev --49: CMOVNS Gv,Ev --4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) --4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk --4c: CMOVL/NGE Gv,Ev --4d: CMOVNL/GE Gv,Ev --4e: CMOVLE/NG Gv,Ev --4f: CMOVNLE/G Gv,Ev --# 0x0f 0x50-0x5f --50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) --51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) --52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) --53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) --54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) --55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) --56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) --57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) --58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) --59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) --5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) --5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) --5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) --5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) --5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) --5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) --# 0x0f 0x60-0x6f --60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) --61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) --62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) --63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) --64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) --65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) --66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) --67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) --68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) --69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) --6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) --6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) --6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) --6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) --6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) --6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) --# 0x0f 0x70-0x7f --70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) --71: Grp12 (1A) --72: Grp13 (1A) --73: Grp14 (1A) --74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) --75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) --76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) --# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. --77: emms | vzeroupper | vzeroall --78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) --79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) --7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) --7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) --7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) --7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) --7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) --7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) --# 0x0f 0x80-0x8f --# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). --80: JO Jz (f64) --81: JNO Jz (f64) --82: JB/JC/JNAE Jz (f64) --83: JAE/JNB/JNC Jz (f64) --84: JE/JZ Jz (f64) --85: JNE/JNZ Jz (f64) --86: JBE/JNA Jz (f64) --87: JA/JNBE Jz (f64) --88: JS Jz (f64) --89: JNS Jz (f64) --8a: JP/JPE Jz (f64) --8b: JNP/JPO Jz (f64) --8c: JL/JNGE Jz (f64) --8d: JNL/JGE Jz (f64) --8e: JLE/JNG Jz (f64) --8f: JNLE/JG Jz (f64) --# 0x0f 0x90-0x9f --90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) --91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) --92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) --93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) --94: SETE/Z Eb --95: SETNE/NZ Eb --96: SETBE/NA Eb --97: SETA/NBE Eb --98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) --99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) --9a: SETP/PE Eb --9b: SETNP/PO Eb --9c: SETL/NGE Eb --9d: SETNL/GE Eb --9e: SETLE/NG Eb --9f: SETNLE/G Eb --# 0x0f 0xa0-0xaf --a0: PUSH FS (d64) --a1: POP FS (d64) --a2: CPUID --a3: BT Ev,Gv --a4: SHLD Ev,Gv,Ib --a5: SHLD Ev,Gv,CL --a6: GrpPDLK --a7: GrpRNG --a8: PUSH GS (d64) --a9: POP GS (d64) --aa: RSM --ab: BTS Ev,Gv --ac: SHRD Ev,Gv,Ib --ad: SHRD Ev,Gv,CL --ae: Grp15 (1A),(1C) --af: IMUL Gv,Ev --# 0x0f 0xb0-0xbf --b0: CMPXCHG Eb,Gb --b1: CMPXCHG Ev,Gv --b2: LSS Gv,Mp --b3: BTR Ev,Gv --b4: LFS Gv,Mp --b5: LGS Gv,Mp --b6: MOVZX Gv,Eb --b7: MOVZX Gv,Ew --b8: JMPE (!F3) | POPCNT Gv,Ev (F3) --b9: Grp10 (1A) --ba: Grp8 Ev,Ib (1A) --bb: BTC Ev,Gv --bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) --bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) --be: MOVSX Gv,Eb --bf: MOVSX Gv,Ew --# 0x0f 0xc0-0xcf --c0: XADD Eb,Gb --c1: XADD Ev,Gv --c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) --c3: movnti My,Gy --c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) --c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) --c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) --c7: Grp9 (1A) --c8: BSWAP RAX/EAX/R8/R8D --c9: BSWAP RCX/ECX/R9/R9D --ca: BSWAP RDX/EDX/R10/R10D --cb: BSWAP RBX/EBX/R11/R11D --cc: BSWAP RSP/ESP/R12/R12D --cd: BSWAP RBP/EBP/R13/R13D --ce: BSWAP RSI/ESI/R14/R14D --cf: BSWAP RDI/EDI/R15/R15D --# 0x0f 0xd0-0xdf --d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) --d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) --d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) --d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) --d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) --d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) --d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) --d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) --d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) --d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) --da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) --db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) --dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) --dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) --de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) --df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) --# 0x0f 0xe0-0xef --e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) --e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) --e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) --e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) --e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) --e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) --e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) --e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) --e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) --e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) --ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) --eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) --ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) --ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) --ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) --ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) --# 0x0f 0xf0-0xff --f0: vlddqu Vx,Mx (F2) --f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) --f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) --f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) --f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) --f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) --f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) --f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) --f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) --f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) --fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) --fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) --fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) --fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) --fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) --ff: --EndTable -- --Table: 3-byte opcode 1 (0x0f 0x38) --Referrer: 3-byte escape 1 --AVXcode: 2 --# 0x0f 0x38 0x00-0x0f --00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) --01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) --02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) --03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) --04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) --05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) --06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) --07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) --08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) --09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) --0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) --0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) --0c: vpermilps Vx,Hx,Wx (66),(v) --0d: vpermilpd Vx,Hx,Wx (66),(v) --0e: vtestps Vx,Wx (66),(v) --0f: vtestpd Vx,Wx (66),(v) --# 0x0f 0x38 0x10-0x1f --10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) --11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) --12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) --13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) --14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) --15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) --16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) --17: vptest Vx,Wx (66) --18: vbroadcastss Vx,Wd (66),(v) --19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) --1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) --1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) --1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) --1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) --1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) --1f: vpabsq Vx,Wx (66),(ev) --# 0x0f 0x38 0x20-0x2f --20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) --21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) --22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) --23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) --24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) --25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) --26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) --27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) --28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) --29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) --2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) --2b: vpackusdw Vx,Hx,Wx (66),(v1) --2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) --2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) --2e: vmaskmovps Mx,Hx,Vx (66),(v) --2f: vmaskmovpd Mx,Hx,Vx (66),(v) --# 0x0f 0x38 0x30-0x3f --30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) --31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) --32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) --33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) --34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) --35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) --36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) --37: vpcmpgtq Vx,Hx,Wx (66),(v1) --38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) --39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) --3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) --3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) --3c: vpmaxsb Vx,Hx,Wx (66),(v1) --3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) --3e: vpmaxuw Vx,Hx,Wx (66),(v1) --3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) --# 0x0f 0x38 0x40-0x8f --40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) --41: vphminposuw Vdq,Wdq (66),(v1) --42: vgetexpps/d Vx,Wx (66),(ev) --43: vgetexpss/d Vx,Hx,Wx (66),(ev) --44: vplzcntd/q Vx,Wx (66),(ev) --45: vpsrlvd/q Vx,Hx,Wx (66),(v) --46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) --47: vpsllvd/q Vx,Hx,Wx (66),(v) --# Skip 0x48-0x4b --4c: vrcp14ps/d Vpd,Wpd (66),(ev) --4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) --4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) --4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) --# Skip 0x50-0x57 --58: vpbroadcastd Vx,Wx (66),(v) --59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) --5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) --5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) --# Skip 0x5c-0x63 --64: vpblendmd/q Vx,Hx,Wx (66),(ev) --65: vblendmps/d Vx,Hx,Wx (66),(ev) --66: vpblendmb/w Vx,Hx,Wx (66),(ev) --# Skip 0x67-0x74 --75: vpermi2b/w Vx,Hx,Wx (66),(ev) --76: vpermi2d/q Vx,Hx,Wx (66),(ev) --77: vpermi2ps/d Vx,Hx,Wx (66),(ev) --78: vpbroadcastb Vx,Wx (66),(v) --79: vpbroadcastw Vx,Wx (66),(v) --7a: vpbroadcastb Vx,Rv (66),(ev) --7b: vpbroadcastw Vx,Rv (66),(ev) --7c: vpbroadcastd/q Vx,Rv (66),(ev) --7d: vpermt2b/w Vx,Hx,Wx (66),(ev) --7e: vpermt2d/q Vx,Hx,Wx (66),(ev) --7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) --80: INVEPT Gy,Mdq (66) --81: INVPID Gy,Mdq (66) --82: INVPCID Gy,Mdq (66) --83: vpmultishiftqb Vx,Hx,Wx (66),(ev) --88: vexpandps/d Vpd,Wpd (66),(ev) --89: vpexpandd/q Vx,Wx (66),(ev) --8a: vcompressps/d Wx,Vx (66),(ev) --8b: vpcompressd/q Wx,Vx (66),(ev) --8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) --8d: vpermb/w Vx,Hx,Wx (66),(ev) --8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) --# 0x0f 0x38 0x90-0xbf (FMA) --90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) --91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) --92: vgatherdps/d Vx,Hx,Wx (66),(v) --93: vgatherqps/d Vx,Hx,Wx (66),(v) --94: --95: --96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) --97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) --98: vfmadd132ps/d Vx,Hx,Wx (66),(v) --99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) --9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) --9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) --9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) --9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) --9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) --9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) --a0: vpscatterdd/q Wx,Vx (66),(ev) --a1: vpscatterqd/q Wx,Vx (66),(ev) --a2: vscatterdps/d Wx,Vx (66),(ev) --a3: vscatterqps/d Wx,Vx (66),(ev) --a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) --a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) --a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) --a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) --aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) --ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) --ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) --ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) --ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) --af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) --b4: vpmadd52luq Vx,Hx,Wx (66),(ev) --b5: vpmadd52huq Vx,Hx,Wx (66),(ev) --b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) --b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) --b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) --b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) --ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) --bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) --bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) --bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) --be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) --bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) --# 0x0f 0x38 0xc0-0xff --c4: vpconflictd/q Vx,Wx (66),(ev) --c6: Grp18 (1A) --c7: Grp19 (1A) --c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) --c9: sha1msg1 Vdq,Wdq --ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) --cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) --cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) --cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) --db: VAESIMC Vdq,Wdq (66),(v1) --dc: VAESENC Vdq,Hdq,Wdq (66),(v1) --dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) --de: VAESDEC Vdq,Hdq,Wdq (66),(v1) --df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) --f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) --f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) --f2: ANDN Gy,By,Ey (v) --f3: Grp17 (1A) --f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) --f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) --f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) --EndTable -- --Table: 3-byte opcode 2 (0x0f 0x3a) --Referrer: 3-byte escape 2 --AVXcode: 3 --# 0x0f 0x3a 0x00-0xff --00: vpermq Vqq,Wqq,Ib (66),(v) --01: vpermpd Vqq,Wqq,Ib (66),(v) --02: vpblendd Vx,Hx,Wx,Ib (66),(v) --03: valignd/q Vx,Hx,Wx,Ib (66),(ev) --04: vpermilps Vx,Wx,Ib (66),(v) --05: vpermilpd Vx,Wx,Ib (66),(v) --06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) --07: --08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) --09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) --0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) --0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) --0c: vblendps Vx,Hx,Wx,Ib (66) --0d: vblendpd Vx,Hx,Wx,Ib (66) --0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) --0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) --14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) --15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) --16: vpextrd/q Ey,Vdq,Ib (66),(v1) --17: vextractps Ed,Vdq,Ib (66),(v1) --18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) --19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) --1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) --1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) --1d: vcvtps2ph Wx,Vx,Ib (66),(v) --1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) --1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) --20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) --21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) --22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) --23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) --25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) --26: vgetmantps/d Vx,Wx,Ib (66),(ev) --27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) --30: kshiftrb/w Vk,Uk,Ib (66),(v) --31: kshiftrd/q Vk,Uk,Ib (66),(v) --32: kshiftlb/w Vk,Uk,Ib (66),(v) --33: kshiftld/q Vk,Uk,Ib (66),(v) --38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) --39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) --3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) --3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) --3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) --3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) --40: vdpps Vx,Hx,Wx,Ib (66) --41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) --42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) --43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) --44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) --46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) --4a: vblendvps Vx,Hx,Wx,Lx (66),(v) --4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) --4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) --50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) --51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) --54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) --55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) --56: vreduceps/d Vx,Wx,Ib (66),(ev) --57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) --60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) --61: vpcmpestri Vdq,Wdq,Ib (66),(v1) --62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) --63: vpcmpistri Vdq,Wdq,Ib (66),(v1) --66: vfpclassps/d Vk,Wx,Ib (66),(ev) --67: vfpclassss/d Vk,Wx,Ib (66),(ev) --cc: sha1rnds4 Vdq,Wdq,Ib --df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) --f0: RORX Gy,Ey,Ib (F2),(v) --EndTable -- --GrpTable: Grp1 --0: ADD --1: OR --2: ADC --3: SBB --4: AND --5: SUB --6: XOR --7: CMP --EndTable -- --GrpTable: Grp1A --0: POP --EndTable -- --GrpTable: Grp2 --0: ROL --1: ROR --2: RCL --3: RCR --4: SHL/SAL --5: SHR --6: --7: SAR --EndTable -- --GrpTable: Grp3_1 --0: TEST Eb,Ib --1: --2: NOT Eb --3: NEG Eb --4: MUL AL,Eb --5: IMUL AL,Eb --6: DIV AL,Eb --7: IDIV AL,Eb --EndTable -- --GrpTable: Grp3_2 --0: TEST Ev,Iz --1: --2: NOT Ev --3: NEG Ev --4: MUL rAX,Ev --5: IMUL rAX,Ev --6: DIV rAX,Ev --7: IDIV rAX,Ev --EndTable -- --GrpTable: Grp4 --0: INC Eb --1: DEC Eb --EndTable -- --GrpTable: Grp5 --0: INC Ev --1: DEC Ev --# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). --2: CALLN Ev (f64) --3: CALLF Ep --4: JMPN Ev (f64) --5: JMPF Mp --6: PUSH Ev (d64) --7: --EndTable -- --GrpTable: Grp6 --0: SLDT Rv/Mw --1: STR Rv/Mw --2: LLDT Ew --3: LTR Ew --4: VERR Ew --5: VERW Ew --EndTable -- --GrpTable: Grp7 --0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) --1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) --2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) --3: LIDT Ms --4: SMSW Mw/Rv --5: rdpkru (110),(11B) | wrpkru (111),(11B) --6: LMSW Ew --7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) --EndTable -- --GrpTable: Grp8 --4: BT --5: BTS --6: BTR --7: BTC --EndTable -- --GrpTable: Grp9 --1: CMPXCHG8B/16B Mq/Mdq --3: xrstors --4: xsavec --5: xsaves --6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) --7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) --EndTable -- --GrpTable: Grp10 --EndTable -- --# Grp11A and Grp11B are expressed as Grp11 in Intel SDM --GrpTable: Grp11A --0: MOV Eb,Ib --7: XABORT Ib (000),(11B) --EndTable -- --GrpTable: Grp11B --0: MOV Eb,Iz --7: XBEGIN Jz (000),(11B) --EndTable -- --GrpTable: Grp12 --2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) --4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) --6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) --EndTable -- --GrpTable: Grp13 --0: vprord/q Hx,Wx,Ib (66),(ev) --1: vprold/q Hx,Wx,Ib (66),(ev) --2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) --4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) --6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) --EndTable -- --GrpTable: Grp14 --2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) --3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) --6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) --7: vpslldq Hx,Ux,Ib (66),(11B),(v1) --EndTable -- --GrpTable: Grp15 --0: fxsave | RDFSBASE Ry (F3),(11B) --1: fxstor | RDGSBASE Ry (F3),(11B) --2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) --3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) --4: XSAVE --5: XRSTOR | lfence (11B) --6: XSAVEOPT | clwb (66) | mfence (11B) --7: clflush | clflushopt (66) | sfence (11B) --EndTable -- --GrpTable: Grp16 --0: prefetch NTA --1: prefetch T0 --2: prefetch T1 --3: prefetch T2 --EndTable -- --GrpTable: Grp17 --1: BLSR By,Ey (v) --2: BLSMSK By,Ey (v) --3: BLSI By,Ey (v) --EndTable -- --GrpTable: Grp18 --1: vgatherpf0dps/d Wx (66),(ev) --2: vgatherpf1dps/d Wx (66),(ev) --5: vscatterpf0dps/d Wx (66),(ev) --6: vscatterpf1dps/d Wx (66),(ev) --EndTable -- --GrpTable: Grp19 --1: vgatherpf0qps/d Wx (66),(ev) --2: vgatherpf1qps/d Wx (66),(ev) --5: vscatterpf0qps/d Wx (66),(ev) --6: vscatterpf1qps/d Wx (66),(ev) --EndTable -- --# AMD's Prefetch Group --GrpTable: GrpP --0: PREFETCH --1: PREFETCHW --EndTable -- --GrpTable: GrpPDLK --0: MONTMUL --1: XSHA1 --2: XSHA2 --EndTable -- --GrpTable: GrpRNG --0: xstore-rng --1: xcrypt-ecb --2: xcrypt-cbc --4: xcrypt-cfb --5: xcrypt-ofb --EndTable -diff --git a/tools/objtool/arch/x86/lib/inat.c b/tools/objtool/arch/x86/lib/inat.c -new file mode 100644 -index 0000000..c1f01a8 ---- /dev/null -+++ b/tools/objtool/arch/x86/lib/inat.c -@@ -0,0 +1,97 @@ -+/* -+ * x86 instruction attribute tables -+ * -+ * Written by Masami Hiramatsu <mhiramat@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ * -+ */ -+#include <asm/insn.h> -+ -+/* Attribute tables are generated from opcode map */ -+#include "inat-tables.c" -+ -+/* Attribute search APIs */ -+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) -+{ -+ return inat_primary_table[opcode]; -+} -+ -+int inat_get_last_prefix_id(insn_byte_t last_pfx) -+{ -+ insn_attr_t lpfx_attr; -+ -+ lpfx_attr = inat_get_opcode_attribute(last_pfx); -+ return inat_last_prefix_id(lpfx_attr); -+} -+ -+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, -+ insn_attr_t esc_attr) -+{ -+ const insn_attr_t *table; -+ int n; -+ -+ n = inat_escape_id(esc_attr); -+ -+ table = inat_escape_tables[n][0]; -+ if (!table) -+ return 0; -+ if (inat_has_variant(table[opcode]) && lpfx_id) { -+ table = inat_escape_tables[n][lpfx_id]; -+ if (!table) -+ return 0; -+ } -+ return table[opcode]; -+} -+ -+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, -+ insn_attr_t grp_attr) -+{ -+ const insn_attr_t *table; -+ int n; -+ -+ n = inat_group_id(grp_attr); -+ -+ table = inat_group_tables[n][0]; -+ if (!table) -+ return inat_group_common_attribute(grp_attr); -+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { -+ table = inat_group_tables[n][lpfx_id]; -+ if (!table) -+ return inat_group_common_attribute(grp_attr); -+ } -+ return table[X86_MODRM_REG(modrm)] | -+ inat_group_common_attribute(grp_attr); -+} -+ -+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, -+ insn_byte_t vex_p) -+{ -+ const insn_attr_t *table; -+ if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) -+ return 0; -+ /* At first, this checks the master table */ -+ table = inat_avx_tables[vex_m][0]; -+ if (!table) -+ return 0; -+ if (!inat_is_group(table[opcode]) && vex_p) { -+ /* If this is not a group, get attribute directly */ -+ table = inat_avx_tables[vex_m][vex_p]; -+ if (!table) -+ return 0; -+ } -+ return table[opcode]; -+} -+ -diff --git a/tools/objtool/arch/x86/lib/insn.c b/tools/objtool/arch/x86/lib/insn.c -new file mode 100644 -index 0000000..1088eb8 ---- /dev/null -+++ b/tools/objtool/arch/x86/lib/insn.c -@@ -0,0 +1,606 @@ -+/* -+ * x86 instruction analysis -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ * -+ * Copyright (C) IBM Corporation, 2002, 2004, 2009 -+ */ -+ -+#ifdef __KERNEL__ -+#include <linux/string.h> -+#else -+#include <string.h> -+#endif -+#include <asm/inat.h> -+#include <asm/insn.h> -+ -+/* Verify next sizeof(t) bytes can be on the same instruction */ -+#define validate_next(t, insn, n) \ -+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) -+ -+#define __get_next(t, insn) \ -+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) -+ -+#define __peek_nbyte_next(t, insn, n) \ -+ ({ t r = *(t*)((insn)->next_byte + n); r; }) -+ -+#define get_next(t, insn) \ -+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) -+ -+#define peek_nbyte_next(t, insn, n) \ -+ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) -+ -+#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) -+ -+/** -+ * insn_init() - initialize struct insn -+ * @insn: &struct insn to be initialized -+ * @kaddr: address (in kernel memory) of instruction (or copy thereof) -+ * @x86_64: !0 for 64-bit kernel or 64-bit app -+ */ -+void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) -+{ -+ /* -+ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid -+ * even if the input buffer is long enough to hold them. -+ */ -+ if (buf_len > MAX_INSN_SIZE) -+ buf_len = MAX_INSN_SIZE; -+ -+ memset(insn, 0, sizeof(*insn)); -+ insn->kaddr = kaddr; -+ insn->end_kaddr = kaddr + buf_len; -+ insn->next_byte = kaddr; -+ insn->x86_64 = x86_64 ? 1 : 0; -+ insn->opnd_bytes = 4; -+ if (x86_64) -+ insn->addr_bytes = 8; -+ else -+ insn->addr_bytes = 4; -+} -+ -+/** -+ * insn_get_prefixes - scan x86 instruction prefix bytes -+ * @insn: &struct insn containing instruction -+ * -+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte -+ * to point to the (first) opcode. No effect if @insn->prefixes.got -+ * is already set. -+ */ -+void insn_get_prefixes(struct insn *insn) -+{ -+ struct insn_field *prefixes = &insn->prefixes; -+ insn_attr_t attr; -+ insn_byte_t b, lb; -+ int i, nb; -+ -+ if (prefixes->got) -+ return; -+ -+ nb = 0; -+ lb = 0; -+ b = peek_next(insn_byte_t, insn); -+ attr = inat_get_opcode_attribute(b); -+ while (inat_is_legacy_prefix(attr)) { -+ /* Skip if same prefix */ -+ for (i = 0; i < nb; i++) -+ if (prefixes->bytes[i] == b) -+ goto found; -+ if (nb == 4) -+ /* Invalid instruction */ -+ break; -+ prefixes->bytes[nb++] = b; -+ if (inat_is_address_size_prefix(attr)) { -+ /* address size switches 2/4 or 4/8 */ -+ if (insn->x86_64) -+ insn->addr_bytes ^= 12; -+ else -+ insn->addr_bytes ^= 6; -+ } else if (inat_is_operand_size_prefix(attr)) { -+ /* oprand size switches 2/4 */ -+ insn->opnd_bytes ^= 6; -+ } -+found: -+ prefixes->nbytes++; -+ insn->next_byte++; -+ lb = b; -+ b = peek_next(insn_byte_t, insn); -+ attr = inat_get_opcode_attribute(b); -+ } -+ /* Set the last prefix */ -+ if (lb && lb != insn->prefixes.bytes[3]) { -+ if (unlikely(insn->prefixes.bytes[3])) { -+ /* Swap the last prefix */ -+ b = insn->prefixes.bytes[3]; -+ for (i = 0; i < nb; i++) -+ if (prefixes->bytes[i] == lb) -+ prefixes->bytes[i] = b; -+ } -+ insn->prefixes.bytes[3] = lb; -+ } -+ -+ /* Decode REX prefix */ -+ if (insn->x86_64) { -+ b = peek_next(insn_byte_t, insn); -+ attr = inat_get_opcode_attribute(b); -+ if (inat_is_rex_prefix(attr)) { -+ insn->rex_prefix.value = b; -+ insn->rex_prefix.nbytes = 1; -+ insn->next_byte++; -+ if (X86_REX_W(b)) -+ /* REX.W overrides opnd_size */ -+ insn->opnd_bytes = 8; -+ } -+ } -+ insn->rex_prefix.got = 1; -+ -+ /* Decode VEX prefix */ -+ b = peek_next(insn_byte_t, insn); -+ attr = inat_get_opcode_attribute(b); -+ if (inat_is_vex_prefix(attr)) { -+ insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); -+ if (!insn->x86_64) { -+ /* -+ * In 32-bits mode, if the [7:6] bits (mod bits of -+ * ModRM) on the second byte are not 11b, it is -+ * LDS or LES or BOUND. -+ */ -+ if (X86_MODRM_MOD(b2) != 3) -+ goto vex_end; -+ } -+ insn->vex_prefix.bytes[0] = b; -+ insn->vex_prefix.bytes[1] = b2; -+ if (inat_is_evex_prefix(attr)) { -+ b2 = peek_nbyte_next(insn_byte_t, insn, 2); -+ insn->vex_prefix.bytes[2] = b2; -+ b2 = peek_nbyte_next(insn_byte_t, insn, 3); -+ insn->vex_prefix.bytes[3] = b2; -+ insn->vex_prefix.nbytes = 4; -+ insn->next_byte += 4; -+ if (insn->x86_64 && X86_VEX_W(b2)) -+ /* VEX.W overrides opnd_size */ -+ insn->opnd_bytes = 8; -+ } else if (inat_is_vex3_prefix(attr)) { -+ b2 = peek_nbyte_next(insn_byte_t, insn, 2); -+ insn->vex_prefix.bytes[2] = b2; -+ insn->vex_prefix.nbytes = 3; -+ insn->next_byte += 3; -+ if (insn->x86_64 && X86_VEX_W(b2)) -+ /* VEX.W overrides opnd_size */ -+ insn->opnd_bytes = 8; -+ } else { -+ /* -+ * For VEX2, fake VEX3-like byte#2. -+ * Makes it easier to decode vex.W, vex.vvvv, -+ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. -+ */ -+ insn->vex_prefix.bytes[2] = b2 & 0x7f; -+ insn->vex_prefix.nbytes = 2; -+ insn->next_byte += 2; -+ } -+ } -+vex_end: -+ insn->vex_prefix.got = 1; -+ -+ prefixes->got = 1; -+ -+err_out: -+ return; -+} -+ -+/** -+ * insn_get_opcode - collect opcode(s) -+ * @insn: &struct insn containing instruction -+ * -+ * Populates @insn->opcode, updates @insn->next_byte to point past the -+ * opcode byte(s), and set @insn->attr (except for groups). -+ * If necessary, first collects any preceding (prefix) bytes. -+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got -+ * is already 1. -+ */ -+void insn_get_opcode(struct insn *insn) -+{ -+ struct insn_field *opcode = &insn->opcode; -+ insn_byte_t op; -+ int pfx_id; -+ if (opcode->got) -+ return; -+ if (!insn->prefixes.got) -+ insn_get_prefixes(insn); -+ -+ /* Get first opcode */ -+ op = get_next(insn_byte_t, insn); -+ opcode->bytes[0] = op; -+ opcode->nbytes = 1; -+ -+ /* Check if there is VEX prefix or not */ -+ if (insn_is_avx(insn)) { -+ insn_byte_t m, p; -+ m = insn_vex_m_bits(insn); -+ p = insn_vex_p_bits(insn); -+ insn->attr = inat_get_avx_attribute(op, m, p); -+ if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || -+ (!inat_accept_vex(insn->attr) && -+ !inat_is_group(insn->attr))) -+ insn->attr = 0; /* This instruction is bad */ -+ goto end; /* VEX has only 1 byte for opcode */ -+ } -+ -+ insn->attr = inat_get_opcode_attribute(op); -+ while (inat_is_escape(insn->attr)) { -+ /* Get escaped opcode */ -+ op = get_next(insn_byte_t, insn); -+ opcode->bytes[opcode->nbytes++] = op; -+ pfx_id = insn_last_prefix_id(insn); -+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); -+ } -+ if (inat_must_vex(insn->attr)) -+ insn->attr = 0; /* This instruction is bad */ -+end: -+ opcode->got = 1; -+ -+err_out: -+ return; -+} -+ -+/** -+ * insn_get_modrm - collect ModRM byte, if any -+ * @insn: &struct insn containing instruction -+ * -+ * Populates @insn->modrm and updates @insn->next_byte to point past the -+ * ModRM byte, if any. If necessary, first collects the preceding bytes -+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. -+ */ -+void insn_get_modrm(struct insn *insn) -+{ -+ struct insn_field *modrm = &insn->modrm; -+ insn_byte_t pfx_id, mod; -+ if (modrm->got) -+ return; -+ if (!insn->opcode.got) -+ insn_get_opcode(insn); -+ -+ if (inat_has_modrm(insn->attr)) { -+ mod = get_next(insn_byte_t, insn); -+ modrm->value = mod; -+ modrm->nbytes = 1; -+ if (inat_is_group(insn->attr)) { -+ pfx_id = insn_last_prefix_id(insn); -+ insn->attr = inat_get_group_attribute(mod, pfx_id, -+ insn->attr); -+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) -+ insn->attr = 0; /* This is bad */ -+ } -+ } -+ -+ if (insn->x86_64 && inat_is_force64(insn->attr)) -+ insn->opnd_bytes = 8; -+ modrm->got = 1; -+ -+err_out: -+ return; -+} -+ -+ -+/** -+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode? -+ * @insn: &struct insn containing instruction -+ * -+ * If necessary, first collects the instruction up to and including the -+ * ModRM byte. No effect if @insn->x86_64 is 0. -+ */ -+int insn_rip_relative(struct insn *insn) -+{ -+ struct insn_field *modrm = &insn->modrm; -+ -+ if (!insn->x86_64) -+ return 0; -+ if (!modrm->got) -+ insn_get_modrm(insn); -+ /* -+ * For rip-relative instructions, the mod field (top 2 bits) -+ * is zero and the r/m field (bottom 3 bits) is 0x5. -+ */ -+ return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); -+} -+ -+/** -+ * insn_get_sib() - Get the SIB byte of instruction -+ * @insn: &struct insn containing instruction -+ * -+ * If necessary, first collects the instruction up to and including the -+ * ModRM byte. -+ */ -+void insn_get_sib(struct insn *insn) -+{ -+ insn_byte_t modrm; -+ -+ if (insn->sib.got) -+ return; -+ if (!insn->modrm.got) -+ insn_get_modrm(insn); -+ if (insn->modrm.nbytes) { -+ modrm = (insn_byte_t)insn->modrm.value; -+ if (insn->addr_bytes != 2 && -+ X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { -+ insn->sib.value = get_next(insn_byte_t, insn); -+ insn->sib.nbytes = 1; -+ } -+ } -+ insn->sib.got = 1; -+ -+err_out: -+ return; -+} -+ -+ -+/** -+ * insn_get_displacement() - Get the displacement of instruction -+ * @insn: &struct insn containing instruction -+ * -+ * If necessary, first collects the instruction up to and including the -+ * SIB byte. -+ * Displacement value is sign-expanded. -+ */ -+void insn_get_displacement(struct insn *insn) -+{ -+ insn_byte_t mod, rm, base; -+ -+ if (insn->displacement.got) -+ return; -+ if (!insn->sib.got) -+ insn_get_sib(insn); -+ if (insn->modrm.nbytes) { -+ /* -+ * Interpreting the modrm byte: -+ * mod = 00 - no displacement fields (exceptions below) -+ * mod = 01 - 1-byte displacement field -+ * mod = 10 - displacement field is 4 bytes, or 2 bytes if -+ * address size = 2 (0x67 prefix in 32-bit mode) -+ * mod = 11 - no memory operand -+ * -+ * If address size = 2... -+ * mod = 00, r/m = 110 - displacement field is 2 bytes -+ * -+ * If address size != 2... -+ * mod != 11, r/m = 100 - SIB byte exists -+ * mod = 00, SIB base = 101 - displacement field is 4 bytes -+ * mod = 00, r/m = 101 - rip-relative addressing, displacement -+ * field is 4 bytes -+ */ -+ mod = X86_MODRM_MOD(insn->modrm.value); -+ rm = X86_MODRM_RM(insn->modrm.value); -+ base = X86_SIB_BASE(insn->sib.value); -+ if (mod == 3) -+ goto out; -+ if (mod == 1) { -+ insn->displacement.value = get_next(signed char, insn); -+ insn->displacement.nbytes = 1; -+ } else if (insn->addr_bytes == 2) { -+ if ((mod == 0 && rm == 6) || mod == 2) { -+ insn->displacement.value = -+ get_next(short, insn); -+ insn->displacement.nbytes = 2; -+ } -+ } else { -+ if ((mod == 0 && rm == 5) || mod == 2 || -+ (mod == 0 && base == 5)) { -+ insn->displacement.value = get_next(int, insn); -+ insn->displacement.nbytes = 4; -+ } -+ } -+ } -+out: -+ insn->displacement.got = 1; -+ -+err_out: -+ return; -+} -+ -+/* Decode moffset16/32/64. Return 0 if failed */ -+static int __get_moffset(struct insn *insn) -+{ -+ switch (insn->addr_bytes) { -+ case 2: -+ insn->moffset1.value = get_next(short, insn); -+ insn->moffset1.nbytes = 2; -+ break; -+ case 4: -+ insn->moffset1.value = get_next(int, insn); -+ insn->moffset1.nbytes = 4; -+ break; -+ case 8: -+ insn->moffset1.value = get_next(int, insn); -+ insn->moffset1.nbytes = 4; -+ insn->moffset2.value = get_next(int, insn); -+ insn->moffset2.nbytes = 4; -+ break; -+ default: /* opnd_bytes must be modified manually */ -+ goto err_out; -+ } -+ insn->moffset1.got = insn->moffset2.got = 1; -+ -+ return 1; -+ -+err_out: -+ return 0; -+} -+ -+/* Decode imm v32(Iz). Return 0 if failed */ -+static int __get_immv32(struct insn *insn) -+{ -+ switch (insn->opnd_bytes) { -+ case 2: -+ insn->immediate.value = get_next(short, insn); -+ insn->immediate.nbytes = 2; -+ break; -+ case 4: -+ case 8: -+ insn->immediate.value = get_next(int, insn); -+ insn->immediate.nbytes = 4; -+ break; -+ default: /* opnd_bytes must be modified manually */ -+ goto err_out; -+ } -+ -+ return 1; -+ -+err_out: -+ return 0; -+} -+ -+/* Decode imm v64(Iv/Ov), Return 0 if failed */ -+static int __get_immv(struct insn *insn) -+{ -+ switch (insn->opnd_bytes) { -+ case 2: -+ insn->immediate1.value = get_next(short, insn); -+ insn->immediate1.nbytes = 2; -+ break; -+ case 4: -+ insn->immediate1.value = get_next(int, insn); -+ insn->immediate1.nbytes = 4; -+ break; -+ case 8: -+ insn->immediate1.value = get_next(int, insn); -+ insn->immediate1.nbytes = 4; -+ insn->immediate2.value = get_next(int, insn); -+ insn->immediate2.nbytes = 4; -+ break; -+ default: /* opnd_bytes must be modified manually */ -+ goto err_out; -+ } -+ insn->immediate1.got = insn->immediate2.got = 1; -+ -+ return 1; -+err_out: -+ return 0; -+} -+ -+/* Decode ptr16:16/32(Ap) */ -+static int __get_immptr(struct insn *insn) -+{ -+ switch (insn->opnd_bytes) { -+ case 2: -+ insn->immediate1.value = get_next(short, insn); -+ insn->immediate1.nbytes = 2; -+ break; -+ case 4: -+ insn->immediate1.value = get_next(int, insn); -+ insn->immediate1.nbytes = 4; -+ break; -+ case 8: -+ /* ptr16:64 is not exist (no segment) */ -+ return 0; -+ default: /* opnd_bytes must be modified manually */ -+ goto err_out; -+ } -+ insn->immediate2.value = get_next(unsigned short, insn); -+ insn->immediate2.nbytes = 2; -+ insn->immediate1.got = insn->immediate2.got = 1; -+ -+ return 1; -+err_out: -+ return 0; -+} -+ -+/** -+ * insn_get_immediate() - Get the immediates of instruction -+ * @insn: &struct insn containing instruction -+ * -+ * If necessary, first collects the instruction up to and including the -+ * displacement bytes. -+ * Basically, most of immediates are sign-expanded. Unsigned-value can be -+ * get by bit masking with ((1 << (nbytes * 8)) - 1) -+ */ -+void insn_get_immediate(struct insn *insn) -+{ -+ if (insn->immediate.got) -+ return; -+ if (!insn->displacement.got) -+ insn_get_displacement(insn); -+ -+ if (inat_has_moffset(insn->attr)) { -+ if (!__get_moffset(insn)) -+ goto err_out; -+ goto done; -+ } -+ -+ if (!inat_has_immediate(insn->attr)) -+ /* no immediates */ -+ goto done; -+ -+ switch (inat_immediate_size(insn->attr)) { -+ case INAT_IMM_BYTE: -+ insn->immediate.value = get_next(signed char, insn); -+ insn->immediate.nbytes = 1; -+ break; -+ case INAT_IMM_WORD: -+ insn->immediate.value = get_next(short, insn); -+ insn->immediate.nbytes = 2; -+ break; -+ case INAT_IMM_DWORD: -+ insn->immediate.value = get_next(int, insn); -+ insn->immediate.nbytes = 4; -+ break; -+ case INAT_IMM_QWORD: -+ insn->immediate1.value = get_next(int, insn); -+ insn->immediate1.nbytes = 4; -+ insn->immediate2.value = get_next(int, insn); -+ insn->immediate2.nbytes = 4; -+ break; -+ case INAT_IMM_PTR: -+ if (!__get_immptr(insn)) -+ goto err_out; -+ break; -+ case INAT_IMM_VWORD32: -+ if (!__get_immv32(insn)) -+ goto err_out; -+ break; -+ case INAT_IMM_VWORD: -+ if (!__get_immv(insn)) -+ goto err_out; -+ break; -+ default: -+ /* Here, insn must have an immediate, but failed */ -+ goto err_out; -+ } -+ if (inat_has_second_immediate(insn->attr)) { -+ insn->immediate2.value = get_next(signed char, insn); -+ insn->immediate2.nbytes = 1; -+ } -+done: -+ insn->immediate.got = 1; -+ -+err_out: -+ return; -+} -+ -+/** -+ * insn_get_length() - Get the length of instruction -+ * @insn: &struct insn containing instruction -+ * -+ * If necessary, first collects the instruction up to and including the -+ * immediates bytes. -+ */ -+void insn_get_length(struct insn *insn) -+{ -+ if (insn->length) -+ return; -+ if (!insn->immediate.got) -+ insn_get_immediate(insn); -+ insn->length = (unsigned char)((unsigned long)insn->next_byte -+ - (unsigned long)insn->kaddr); -+} -diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt -new file mode 100644 -index 0000000..aa2270d ---- /dev/null -+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt -@@ -0,0 +1,1072 @@ -+# x86 Opcode Maps -+# -+# This is (mostly) based on following documentations. -+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C -+# (#326018-047US, June 2013) -+# -+#<Opcode maps> -+# Table: table-name -+# Referrer: escaped-name -+# AVXcode: avx-code -+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] -+# (or) -+# opcode: escape # escaped-name -+# EndTable -+# -+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix -+# mnemonics that begin with lowercase 'k' accept a VEX prefix -+# -+#<group maps> -+# GrpTable: GrpXXX -+# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] -+# EndTable -+# -+# AVX Superscripts -+# (ev): this opcode requires EVEX prefix. -+# (evo): this opcode is changed by EVEX prefix (EVEX opcode) -+# (v): this opcode requires VEX prefix. -+# (v1): this opcode only supports 128bit VEX. -+# -+# Last Prefix Superscripts -+# - (66): the last prefix is 0x66 -+# - (F3): the last prefix is 0xF3 -+# - (F2): the last prefix is 0xF2 -+# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) -+# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. -+ -+Table: one byte opcode -+Referrer: -+AVXcode: -+# 0x00 - 0x0f -+00: ADD Eb,Gb -+01: ADD Ev,Gv -+02: ADD Gb,Eb -+03: ADD Gv,Ev -+04: ADD AL,Ib -+05: ADD rAX,Iz -+06: PUSH ES (i64) -+07: POP ES (i64) -+08: OR Eb,Gb -+09: OR Ev,Gv -+0a: OR Gb,Eb -+0b: OR Gv,Ev -+0c: OR AL,Ib -+0d: OR rAX,Iz -+0e: PUSH CS (i64) -+0f: escape # 2-byte escape -+# 0x10 - 0x1f -+10: ADC Eb,Gb -+11: ADC Ev,Gv -+12: ADC Gb,Eb -+13: ADC Gv,Ev -+14: ADC AL,Ib -+15: ADC rAX,Iz -+16: PUSH SS (i64) -+17: POP SS (i64) -+18: SBB Eb,Gb -+19: SBB Ev,Gv -+1a: SBB Gb,Eb -+1b: SBB Gv,Ev -+1c: SBB AL,Ib -+1d: SBB rAX,Iz -+1e: PUSH DS (i64) -+1f: POP DS (i64) -+# 0x20 - 0x2f -+20: AND Eb,Gb -+21: AND Ev,Gv -+22: AND Gb,Eb -+23: AND Gv,Ev -+24: AND AL,Ib -+25: AND rAx,Iz -+26: SEG=ES (Prefix) -+27: DAA (i64) -+28: SUB Eb,Gb -+29: SUB Ev,Gv -+2a: SUB Gb,Eb -+2b: SUB Gv,Ev -+2c: SUB AL,Ib -+2d: SUB rAX,Iz -+2e: SEG=CS (Prefix) -+2f: DAS (i64) -+# 0x30 - 0x3f -+30: XOR Eb,Gb -+31: XOR Ev,Gv -+32: XOR Gb,Eb -+33: XOR Gv,Ev -+34: XOR AL,Ib -+35: XOR rAX,Iz -+36: SEG=SS (Prefix) -+37: AAA (i64) -+38: CMP Eb,Gb -+39: CMP Ev,Gv -+3a: CMP Gb,Eb -+3b: CMP Gv,Ev -+3c: CMP AL,Ib -+3d: CMP rAX,Iz -+3e: SEG=DS (Prefix) -+3f: AAS (i64) -+# 0x40 - 0x4f -+40: INC eAX (i64) | REX (o64) -+41: INC eCX (i64) | REX.B (o64) -+42: INC eDX (i64) | REX.X (o64) -+43: INC eBX (i64) | REX.XB (o64) -+44: INC eSP (i64) | REX.R (o64) -+45: INC eBP (i64) | REX.RB (o64) -+46: INC eSI (i64) | REX.RX (o64) -+47: INC eDI (i64) | REX.RXB (o64) -+48: DEC eAX (i64) | REX.W (o64) -+49: DEC eCX (i64) | REX.WB (o64) -+4a: DEC eDX (i64) | REX.WX (o64) -+4b: DEC eBX (i64) | REX.WXB (o64) -+4c: DEC eSP (i64) | REX.WR (o64) -+4d: DEC eBP (i64) | REX.WRB (o64) -+4e: DEC eSI (i64) | REX.WRX (o64) -+4f: DEC eDI (i64) | REX.WRXB (o64) -+# 0x50 - 0x5f -+50: PUSH rAX/r8 (d64) -+51: PUSH rCX/r9 (d64) -+52: PUSH rDX/r10 (d64) -+53: PUSH rBX/r11 (d64) -+54: PUSH rSP/r12 (d64) -+55: PUSH rBP/r13 (d64) -+56: PUSH rSI/r14 (d64) -+57: PUSH rDI/r15 (d64) -+58: POP rAX/r8 (d64) -+59: POP rCX/r9 (d64) -+5a: POP rDX/r10 (d64) -+5b: POP rBX/r11 (d64) -+5c: POP rSP/r12 (d64) -+5d: POP rBP/r13 (d64) -+5e: POP rSI/r14 (d64) -+5f: POP rDI/r15 (d64) -+# 0x60 - 0x6f -+60: PUSHA/PUSHAD (i64) -+61: POPA/POPAD (i64) -+62: BOUND Gv,Ma (i64) | EVEX (Prefix) -+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) -+64: SEG=FS (Prefix) -+65: SEG=GS (Prefix) -+66: Operand-Size (Prefix) -+67: Address-Size (Prefix) -+68: PUSH Iz (d64) -+69: IMUL Gv,Ev,Iz -+6a: PUSH Ib (d64) -+6b: IMUL Gv,Ev,Ib -+6c: INS/INSB Yb,DX -+6d: INS/INSW/INSD Yz,DX -+6e: OUTS/OUTSB DX,Xb -+6f: OUTS/OUTSW/OUTSD DX,Xz -+# 0x70 - 0x7f -+70: JO Jb -+71: JNO Jb -+72: JB/JNAE/JC Jb -+73: JNB/JAE/JNC Jb -+74: JZ/JE Jb -+75: JNZ/JNE Jb -+76: JBE/JNA Jb -+77: JNBE/JA Jb -+78: JS Jb -+79: JNS Jb -+7a: JP/JPE Jb -+7b: JNP/JPO Jb -+7c: JL/JNGE Jb -+7d: JNL/JGE Jb -+7e: JLE/JNG Jb -+7f: JNLE/JG Jb -+# 0x80 - 0x8f -+80: Grp1 Eb,Ib (1A) -+81: Grp1 Ev,Iz (1A) -+82: Grp1 Eb,Ib (1A),(i64) -+83: Grp1 Ev,Ib (1A) -+84: TEST Eb,Gb -+85: TEST Ev,Gv -+86: XCHG Eb,Gb -+87: XCHG Ev,Gv -+88: MOV Eb,Gb -+89: MOV Ev,Gv -+8a: MOV Gb,Eb -+8b: MOV Gv,Ev -+8c: MOV Ev,Sw -+8d: LEA Gv,M -+8e: MOV Sw,Ew -+8f: Grp1A (1A) | POP Ev (d64) -+# 0x90 - 0x9f -+90: NOP | PAUSE (F3) | XCHG r8,rAX -+91: XCHG rCX/r9,rAX -+92: XCHG rDX/r10,rAX -+93: XCHG rBX/r11,rAX -+94: XCHG rSP/r12,rAX -+95: XCHG rBP/r13,rAX -+96: XCHG rSI/r14,rAX -+97: XCHG rDI/r15,rAX -+98: CBW/CWDE/CDQE -+99: CWD/CDQ/CQO -+9a: CALLF Ap (i64) -+9b: FWAIT/WAIT -+9c: PUSHF/D/Q Fv (d64) -+9d: POPF/D/Q Fv (d64) -+9e: SAHF -+9f: LAHF -+# 0xa0 - 0xaf -+a0: MOV AL,Ob -+a1: MOV rAX,Ov -+a2: MOV Ob,AL -+a3: MOV Ov,rAX -+a4: MOVS/B Yb,Xb -+a5: MOVS/W/D/Q Yv,Xv -+a6: CMPS/B Xb,Yb -+a7: CMPS/W/D Xv,Yv -+a8: TEST AL,Ib -+a9: TEST rAX,Iz -+aa: STOS/B Yb,AL -+ab: STOS/W/D/Q Yv,rAX -+ac: LODS/B AL,Xb -+ad: LODS/W/D/Q rAX,Xv -+ae: SCAS/B AL,Yb -+# Note: The May 2011 Intel manual shows Xv for the second parameter of the -+# next instruction but Yv is correct -+af: SCAS/W/D/Q rAX,Yv -+# 0xb0 - 0xbf -+b0: MOV AL/R8L,Ib -+b1: MOV CL/R9L,Ib -+b2: MOV DL/R10L,Ib -+b3: MOV BL/R11L,Ib -+b4: MOV AH/R12L,Ib -+b5: MOV CH/R13L,Ib -+b6: MOV DH/R14L,Ib -+b7: MOV BH/R15L,Ib -+b8: MOV rAX/r8,Iv -+b9: MOV rCX/r9,Iv -+ba: MOV rDX/r10,Iv -+bb: MOV rBX/r11,Iv -+bc: MOV rSP/r12,Iv -+bd: MOV rBP/r13,Iv -+be: MOV rSI/r14,Iv -+bf: MOV rDI/r15,Iv -+# 0xc0 - 0xcf -+c0: Grp2 Eb,Ib (1A) -+c1: Grp2 Ev,Ib (1A) -+c2: RETN Iw (f64) -+c3: RETN -+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) -+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) -+c6: Grp11A Eb,Ib (1A) -+c7: Grp11B Ev,Iz (1A) -+c8: ENTER Iw,Ib -+c9: LEAVE (d64) -+ca: RETF Iw -+cb: RETF -+cc: INT3 -+cd: INT Ib -+ce: INTO (i64) -+cf: IRET/D/Q -+# 0xd0 - 0xdf -+d0: Grp2 Eb,1 (1A) -+d1: Grp2 Ev,1 (1A) -+d2: Grp2 Eb,CL (1A) -+d3: Grp2 Ev,CL (1A) -+d4: AAM Ib (i64) -+d5: AAD Ib (i64) -+d6: -+d7: XLAT/XLATB -+d8: ESC -+d9: ESC -+da: ESC -+db: ESC -+dc: ESC -+dd: ESC -+de: ESC -+df: ESC -+# 0xe0 - 0xef -+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix -+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation -+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -+e0: LOOPNE/LOOPNZ Jb (f64) -+e1: LOOPE/LOOPZ Jb (f64) -+e2: LOOP Jb (f64) -+e3: JrCXZ Jb (f64) -+e4: IN AL,Ib -+e5: IN eAX,Ib -+e6: OUT Ib,AL -+e7: OUT Ib,eAX -+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset -+# in "near" jumps and calls is 16-bit. For CALL, -+# push of return address is 16-bit wide, RSP is decremented by 2 -+# but is not truncated to 16 bits, unlike RIP. -+e8: CALL Jz (f64) -+e9: JMP-near Jz (f64) -+ea: JMP-far Ap (i64) -+eb: JMP-short Jb (f64) -+ec: IN AL,DX -+ed: IN eAX,DX -+ee: OUT DX,AL -+ef: OUT DX,eAX -+# 0xf0 - 0xff -+f0: LOCK (Prefix) -+f1: -+f2: REPNE (Prefix) | XACQUIRE (Prefix) -+f3: REP/REPE (Prefix) | XRELEASE (Prefix) -+f4: HLT -+f5: CMC -+f6: Grp3_1 Eb (1A) -+f7: Grp3_2 Ev (1A) -+f8: CLC -+f9: STC -+fa: CLI -+fb: STI -+fc: CLD -+fd: STD -+fe: Grp4 (1A) -+ff: Grp5 (1A) -+EndTable -+ -+Table: 2-byte opcode (0x0f) -+Referrer: 2-byte escape -+AVXcode: 1 -+# 0x0f 0x00-0x0f -+00: Grp6 (1A) -+01: Grp7 (1A) -+02: LAR Gv,Ew -+03: LSL Gv,Ew -+04: -+05: SYSCALL (o64) -+06: CLTS -+07: SYSRET (o64) -+08: INVD -+09: WBINVD -+0a: -+0b: UD2 (1B) -+0c: -+# AMD's prefetch group. Intel supports prefetchw(/1) only. -+0d: GrpP -+0e: FEMMS -+# 3DNow! uses the last imm byte as opcode extension. -+0f: 3DNow! Pq,Qq,Ib -+# 0x0f 0x10-0x1f -+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands -+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. -+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. -+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming -+# Reference A.1 -+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) -+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) -+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) -+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) -+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) -+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) -+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) -+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) -+18: Grp16 (1A) -+19: -+# Intel SDM opcode map does not list MPX instructions. For now using Gv for -+# bnd registers and Ev for everything else is OK because the instruction -+# decoder does not use the information except as an indication that there is -+# a ModR/M byte. -+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev -+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv -+1c: -+1d: -+1e: -+1f: NOP Ev -+# 0x0f 0x20-0x2f -+20: MOV Rd,Cd -+21: MOV Rd,Dd -+22: MOV Cd,Rd -+23: MOV Dd,Rd -+24: -+25: -+26: -+27: -+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) -+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) -+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) -+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) -+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) -+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) -+2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) -+2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) -+# 0x0f 0x30-0x3f -+30: WRMSR -+31: RDTSC -+32: RDMSR -+33: RDPMC -+34: SYSENTER -+35: SYSEXIT -+36: -+37: GETSEC -+38: escape # 3-byte escape 1 -+39: -+3a: escape # 3-byte escape 2 -+3b: -+3c: -+3d: -+3e: -+3f: -+# 0x0f 0x40-0x4f -+40: CMOVO Gv,Ev -+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) -+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) -+43: CMOVAE/NB/NC Gv,Ev -+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) -+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) -+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) -+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) -+48: CMOVS Gv,Ev -+49: CMOVNS Gv,Ev -+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) -+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk -+4c: CMOVL/NGE Gv,Ev -+4d: CMOVNL/GE Gv,Ev -+4e: CMOVLE/NG Gv,Ev -+4f: CMOVNLE/G Gv,Ev -+# 0x0f 0x50-0x5f -+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) -+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) -+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) -+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) -+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) -+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) -+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) -+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) -+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) -+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) -+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) -+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) -+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) -+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) -+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) -+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) -+# 0x0f 0x60-0x6f -+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) -+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) -+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) -+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) -+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) -+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) -+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) -+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) -+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) -+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) -+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) -+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) -+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) -+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) -+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) -+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) -+# 0x0f 0x70-0x7f -+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) -+71: Grp12 (1A) -+72: Grp13 (1A) -+73: Grp14 (1A) -+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) -+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) -+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) -+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. -+77: emms | vzeroupper | vzeroall -+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) -+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) -+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) -+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) -+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) -+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) -+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) -+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) -+# 0x0f 0x80-0x8f -+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -+80: JO Jz (f64) -+81: JNO Jz (f64) -+82: JB/JC/JNAE Jz (f64) -+83: JAE/JNB/JNC Jz (f64) -+84: JE/JZ Jz (f64) -+85: JNE/JNZ Jz (f64) -+86: JBE/JNA Jz (f64) -+87: JA/JNBE Jz (f64) -+88: JS Jz (f64) -+89: JNS Jz (f64) -+8a: JP/JPE Jz (f64) -+8b: JNP/JPO Jz (f64) -+8c: JL/JNGE Jz (f64) -+8d: JNL/JGE Jz (f64) -+8e: JLE/JNG Jz (f64) -+8f: JNLE/JG Jz (f64) -+# 0x0f 0x90-0x9f -+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) -+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) -+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) -+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) -+94: SETE/Z Eb -+95: SETNE/NZ Eb -+96: SETBE/NA Eb -+97: SETA/NBE Eb -+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) -+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) -+9a: SETP/PE Eb -+9b: SETNP/PO Eb -+9c: SETL/NGE Eb -+9d: SETNL/GE Eb -+9e: SETLE/NG Eb -+9f: SETNLE/G Eb -+# 0x0f 0xa0-0xaf -+a0: PUSH FS (d64) -+a1: POP FS (d64) -+a2: CPUID -+a3: BT Ev,Gv -+a4: SHLD Ev,Gv,Ib -+a5: SHLD Ev,Gv,CL -+a6: GrpPDLK -+a7: GrpRNG -+a8: PUSH GS (d64) -+a9: POP GS (d64) -+aa: RSM -+ab: BTS Ev,Gv -+ac: SHRD Ev,Gv,Ib -+ad: SHRD Ev,Gv,CL -+ae: Grp15 (1A),(1C) -+af: IMUL Gv,Ev -+# 0x0f 0xb0-0xbf -+b0: CMPXCHG Eb,Gb -+b1: CMPXCHG Ev,Gv -+b2: LSS Gv,Mp -+b3: BTR Ev,Gv -+b4: LFS Gv,Mp -+b5: LGS Gv,Mp -+b6: MOVZX Gv,Eb -+b7: MOVZX Gv,Ew -+b8: JMPE (!F3) | POPCNT Gv,Ev (F3) -+b9: Grp10 (1A) -+ba: Grp8 Ev,Ib (1A) -+bb: BTC Ev,Gv -+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) -+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) -+be: MOVSX Gv,Eb -+bf: MOVSX Gv,Ew -+# 0x0f 0xc0-0xcf -+c0: XADD Eb,Gb -+c1: XADD Ev,Gv -+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) -+c3: movnti My,Gy -+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) -+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) -+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) -+c7: Grp9 (1A) -+c8: BSWAP RAX/EAX/R8/R8D -+c9: BSWAP RCX/ECX/R9/R9D -+ca: BSWAP RDX/EDX/R10/R10D -+cb: BSWAP RBX/EBX/R11/R11D -+cc: BSWAP RSP/ESP/R12/R12D -+cd: BSWAP RBP/EBP/R13/R13D -+ce: BSWAP RSI/ESI/R14/R14D -+cf: BSWAP RDI/EDI/R15/R15D -+# 0x0f 0xd0-0xdf -+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) -+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) -+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) -+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) -+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) -+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) -+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) -+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) -+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) -+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) -+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) -+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) -+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) -+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) -+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) -+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) -+# 0x0f 0xe0-0xef -+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) -+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) -+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) -+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) -+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) -+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) -+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) -+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) -+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) -+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) -+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) -+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) -+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) -+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) -+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) -+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) -+# 0x0f 0xf0-0xff -+f0: vlddqu Vx,Mx (F2) -+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) -+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) -+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) -+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) -+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) -+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) -+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) -+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) -+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) -+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) -+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) -+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) -+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) -+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) -+ff: UD0 -+EndTable -+ -+Table: 3-byte opcode 1 (0x0f 0x38) -+Referrer: 3-byte escape 1 -+AVXcode: 2 -+# 0x0f 0x38 0x00-0x0f -+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) -+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) -+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) -+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) -+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) -+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) -+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) -+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) -+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) -+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) -+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) -+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) -+0c: vpermilps Vx,Hx,Wx (66),(v) -+0d: vpermilpd Vx,Hx,Wx (66),(v) -+0e: vtestps Vx,Wx (66),(v) -+0f: vtestpd Vx,Wx (66),(v) -+# 0x0f 0x38 0x10-0x1f -+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) -+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) -+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) -+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) -+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) -+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) -+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) -+17: vptest Vx,Wx (66) -+18: vbroadcastss Vx,Wd (66),(v) -+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) -+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) -+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) -+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) -+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) -+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) -+1f: vpabsq Vx,Wx (66),(ev) -+# 0x0f 0x38 0x20-0x2f -+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) -+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) -+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) -+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) -+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) -+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) -+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) -+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) -+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) -+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) -+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) -+2b: vpackusdw Vx,Hx,Wx (66),(v1) -+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) -+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) -+2e: vmaskmovps Mx,Hx,Vx (66),(v) -+2f: vmaskmovpd Mx,Hx,Vx (66),(v) -+# 0x0f 0x38 0x30-0x3f -+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) -+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) -+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) -+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) -+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) -+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) -+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) -+37: vpcmpgtq Vx,Hx,Wx (66),(v1) -+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) -+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) -+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) -+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) -+3c: vpmaxsb Vx,Hx,Wx (66),(v1) -+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) -+3e: vpmaxuw Vx,Hx,Wx (66),(v1) -+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) -+# 0x0f 0x38 0x40-0x8f -+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) -+41: vphminposuw Vdq,Wdq (66),(v1) -+42: vgetexpps/d Vx,Wx (66),(ev) -+43: vgetexpss/d Vx,Hx,Wx (66),(ev) -+44: vplzcntd/q Vx,Wx (66),(ev) -+45: vpsrlvd/q Vx,Hx,Wx (66),(v) -+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) -+47: vpsllvd/q Vx,Hx,Wx (66),(v) -+# Skip 0x48-0x4b -+4c: vrcp14ps/d Vpd,Wpd (66),(ev) -+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) -+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) -+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) -+# Skip 0x50-0x57 -+58: vpbroadcastd Vx,Wx (66),(v) -+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) -+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) -+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -+# Skip 0x5c-0x63 -+64: vpblendmd/q Vx,Hx,Wx (66),(ev) -+65: vblendmps/d Vx,Hx,Wx (66),(ev) -+66: vpblendmb/w Vx,Hx,Wx (66),(ev) -+# Skip 0x67-0x74 -+75: vpermi2b/w Vx,Hx,Wx (66),(ev) -+76: vpermi2d/q Vx,Hx,Wx (66),(ev) -+77: vpermi2ps/d Vx,Hx,Wx (66),(ev) -+78: vpbroadcastb Vx,Wx (66),(v) -+79: vpbroadcastw Vx,Wx (66),(v) -+7a: vpbroadcastb Vx,Rv (66),(ev) -+7b: vpbroadcastw Vx,Rv (66),(ev) -+7c: vpbroadcastd/q Vx,Rv (66),(ev) -+7d: vpermt2b/w Vx,Hx,Wx (66),(ev) -+7e: vpermt2d/q Vx,Hx,Wx (66),(ev) -+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) -+80: INVEPT Gy,Mdq (66) -+81: INVVPID Gy,Mdq (66) -+82: INVPCID Gy,Mdq (66) -+83: vpmultishiftqb Vx,Hx,Wx (66),(ev) -+88: vexpandps/d Vpd,Wpd (66),(ev) -+89: vpexpandd/q Vx,Wx (66),(ev) -+8a: vcompressps/d Wx,Vx (66),(ev) -+8b: vpcompressd/q Wx,Vx (66),(ev) -+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) -+8d: vpermb/w Vx,Hx,Wx (66),(ev) -+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) -+# 0x0f 0x38 0x90-0xbf (FMA) -+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) -+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) -+92: vgatherdps/d Vx,Hx,Wx (66),(v) -+93: vgatherqps/d Vx,Hx,Wx (66),(v) -+94: -+95: -+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) -+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) -+98: vfmadd132ps/d Vx,Hx,Wx (66),(v) -+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) -+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) -+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) -+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) -+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) -+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) -+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) -+a0: vpscatterdd/q Wx,Vx (66),(ev) -+a1: vpscatterqd/q Wx,Vx (66),(ev) -+a2: vscatterdps/d Wx,Vx (66),(ev) -+a3: vscatterqps/d Wx,Vx (66),(ev) -+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) -+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) -+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) -+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) -+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) -+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) -+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) -+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) -+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) -+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) -+b4: vpmadd52luq Vx,Hx,Wx (66),(ev) -+b5: vpmadd52huq Vx,Hx,Wx (66),(ev) -+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) -+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) -+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) -+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) -+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) -+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) -+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) -+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) -+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) -+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) -+# 0x0f 0x38 0xc0-0xff -+c4: vpconflictd/q Vx,Wx (66),(ev) -+c6: Grp18 (1A) -+c7: Grp19 (1A) -+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) -+c9: sha1msg1 Vdq,Wdq -+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) -+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) -+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) -+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) -+db: VAESIMC Vdq,Wdq (66),(v1) -+dc: VAESENC Vdq,Hdq,Wdq (66),(v1) -+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) -+de: VAESDEC Vdq,Hdq,Wdq (66),(v1) -+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) -+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) -+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) -+f2: ANDN Gy,By,Ey (v) -+f3: Grp17 (1A) -+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) -+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) -+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) -+EndTable -+ -+Table: 3-byte opcode 2 (0x0f 0x3a) -+Referrer: 3-byte escape 2 -+AVXcode: 3 -+# 0x0f 0x3a 0x00-0xff -+00: vpermq Vqq,Wqq,Ib (66),(v) -+01: vpermpd Vqq,Wqq,Ib (66),(v) -+02: vpblendd Vx,Hx,Wx,Ib (66),(v) -+03: valignd/q Vx,Hx,Wx,Ib (66),(ev) -+04: vpermilps Vx,Wx,Ib (66),(v) -+05: vpermilpd Vx,Wx,Ib (66),(v) -+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) -+07: -+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) -+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) -+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) -+0c: vblendps Vx,Hx,Wx,Ib (66) -+0d: vblendpd Vx,Hx,Wx,Ib (66) -+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) -+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) -+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) -+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) -+16: vpextrd/q Ey,Vdq,Ib (66),(v1) -+17: vextractps Ed,Vdq,Ib (66),(v1) -+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) -+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) -+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) -+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) -+1d: vcvtps2ph Wx,Vx,Ib (66),(v) -+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) -+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) -+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) -+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) -+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) -+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) -+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -+26: vgetmantps/d Vx,Wx,Ib (66),(ev) -+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) -+30: kshiftrb/w Vk,Uk,Ib (66),(v) -+31: kshiftrd/q Vk,Uk,Ib (66),(v) -+32: kshiftlb/w Vk,Uk,Ib (66),(v) -+33: kshiftld/q Vk,Uk,Ib (66),(v) -+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) -+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) -+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) -+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) -+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) -+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) -+40: vdpps Vx,Hx,Wx,Ib (66) -+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) -+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) -+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) -+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) -+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) -+4a: vblendvps Vx,Hx,Wx,Lx (66),(v) -+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) -+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) -+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) -+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) -+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) -+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -+56: vreduceps/d Vx,Wx,Ib (66),(ev) -+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) -+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) -+61: vpcmpestri Vdq,Wdq,Ib (66),(v1) -+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) -+63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -+66: vfpclassps/d Vk,Wx,Ib (66),(ev) -+67: vfpclassss/d Vk,Wx,Ib (66),(ev) -+cc: sha1rnds4 Vdq,Wdq,Ib -+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -+f0: RORX Gy,Ey,Ib (F2),(v) -+EndTable -+ -+GrpTable: Grp1 -+0: ADD -+1: OR -+2: ADC -+3: SBB -+4: AND -+5: SUB -+6: XOR -+7: CMP -+EndTable -+ -+GrpTable: Grp1A -+0: POP -+EndTable -+ -+GrpTable: Grp2 -+0: ROL -+1: ROR -+2: RCL -+3: RCR -+4: SHL/SAL -+5: SHR -+6: -+7: SAR -+EndTable -+ -+GrpTable: Grp3_1 -+0: TEST Eb,Ib -+1: -+2: NOT Eb -+3: NEG Eb -+4: MUL AL,Eb -+5: IMUL AL,Eb -+6: DIV AL,Eb -+7: IDIV AL,Eb -+EndTable -+ -+GrpTable: Grp3_2 -+0: TEST Ev,Iz -+1: -+2: NOT Ev -+3: NEG Ev -+4: MUL rAX,Ev -+5: IMUL rAX,Ev -+6: DIV rAX,Ev -+7: IDIV rAX,Ev -+EndTable -+ -+GrpTable: Grp4 -+0: INC Eb -+1: DEC Eb -+EndTable -+ -+GrpTable: Grp5 -+0: INC Ev -+1: DEC Ev -+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -+2: CALLN Ev (f64) -+3: CALLF Ep -+4: JMPN Ev (f64) -+5: JMPF Mp -+6: PUSH Ev (d64) -+7: -+EndTable -+ -+GrpTable: Grp6 -+0: SLDT Rv/Mw -+1: STR Rv/Mw -+2: LLDT Ew -+3: LTR Ew -+4: VERR Ew -+5: VERW Ew -+EndTable -+ -+GrpTable: Grp7 -+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) -+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) -+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) -+3: LIDT Ms -+4: SMSW Mw/Rv -+5: rdpkru (110),(11B) | wrpkru (111),(11B) -+6: LMSW Ew -+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) -+EndTable -+ -+GrpTable: Grp8 -+4: BT -+5: BTS -+6: BTR -+7: BTC -+EndTable -+ -+GrpTable: Grp9 -+1: CMPXCHG8B/16B Mq/Mdq -+3: xrstors -+4: xsavec -+5: xsaves -+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) -+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) -+EndTable -+ -+GrpTable: Grp10 -+# all are UD1 -+0: UD1 -+1: UD1 -+2: UD1 -+3: UD1 -+4: UD1 -+5: UD1 -+6: UD1 -+7: UD1 -+EndTable -+ -+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM -+GrpTable: Grp11A -+0: MOV Eb,Ib -+7: XABORT Ib (000),(11B) -+EndTable -+ -+GrpTable: Grp11B -+0: MOV Eb,Iz -+7: XBEGIN Jz (000),(11B) -+EndTable -+ -+GrpTable: Grp12 -+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) -+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) -+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) -+EndTable -+ -+GrpTable: Grp13 -+0: vprord/q Hx,Wx,Ib (66),(ev) -+1: vprold/q Hx,Wx,Ib (66),(ev) -+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) -+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) -+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) -+EndTable -+ -+GrpTable: Grp14 -+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) -+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) -+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) -+7: vpslldq Hx,Ux,Ib (66),(11B),(v1) -+EndTable -+ -+GrpTable: Grp15 -+0: fxsave | RDFSBASE Ry (F3),(11B) -+1: fxstor | RDGSBASE Ry (F3),(11B) -+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) -+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) -+4: XSAVE | ptwrite Ey (F3),(11B) -+5: XRSTOR | lfence (11B) -+6: XSAVEOPT | clwb (66) | mfence (11B) -+7: clflush | clflushopt (66) | sfence (11B) -+EndTable -+ -+GrpTable: Grp16 -+0: prefetch NTA -+1: prefetch T0 -+2: prefetch T1 -+3: prefetch T2 -+EndTable -+ -+GrpTable: Grp17 -+1: BLSR By,Ey (v) -+2: BLSMSK By,Ey (v) -+3: BLSI By,Ey (v) -+EndTable -+ -+GrpTable: Grp18 -+1: vgatherpf0dps/d Wx (66),(ev) -+2: vgatherpf1dps/d Wx (66),(ev) -+5: vscatterpf0dps/d Wx (66),(ev) -+6: vscatterpf1dps/d Wx (66),(ev) -+EndTable -+ -+GrpTable: Grp19 -+1: vgatherpf0qps/d Wx (66),(ev) -+2: vgatherpf1qps/d Wx (66),(ev) -+5: vscatterpf0qps/d Wx (66),(ev) -+6: vscatterpf1qps/d Wx (66),(ev) -+EndTable -+ -+# AMD's Prefetch Group -+GrpTable: GrpP -+0: PREFETCH -+1: PREFETCHW -+EndTable -+ -+GrpTable: GrpPDLK -+0: MONTMUL -+1: XSHA1 -+2: XSHA2 -+EndTable -+ -+GrpTable: GrpRNG -+0: xstore-rng -+1: xcrypt-ecb -+2: xcrypt-cbc -+4: xcrypt-cfb -+5: xcrypt-ofb -+EndTable -diff --git a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk -new file mode 100644 -index 0000000..b02a36b ---- /dev/null -+++ b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk -@@ -0,0 +1,393 @@ -+#!/bin/awk -f -+# SPDX-License-Identifier: GPL-2.0 -+# gen-insn-attr-x86.awk: Instruction attribute table generator -+# Written by Masami Hiramatsu <mhiramat@redhat.com> -+# -+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c -+ -+# Awk implementation sanity check -+function check_awk_implement() { -+ if (sprintf("%x", 0) != "0") -+ return "Your awk has a printf-format problem." -+ return "" -+} -+ -+# Clear working vars -+function clear_vars() { -+ delete table -+ delete lptable2 -+ delete lptable1 -+ delete lptable3 -+ eid = -1 # escape id -+ gid = -1 # group id -+ aid = -1 # AVX id -+ tname = "" -+} -+ -+BEGIN { -+ # Implementation error checking -+ awkchecked = check_awk_implement() -+ if (awkchecked != "") { -+ print "Error: " awkchecked > "/dev/stderr" -+ print "Please try to use gawk." > "/dev/stderr" -+ exit 1 -+ } -+ -+ # Setup generating tables -+ print "/* x86 opcode map generated from x86-opcode-map.txt */" -+ print "/* Do not change this code. */\n" -+ ggid = 1 -+ geid = 1 -+ gaid = 0 -+ delete etable -+ delete gtable -+ delete atable -+ -+ opnd_expr = "^[A-Za-z/]" -+ ext_expr = "^\\(" -+ sep_expr = "^\\|$" -+ group_expr = "^Grp[0-9A-Za-z]+" -+ -+ imm_expr = "^[IJAOL][a-z]" -+ imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" -+ imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" -+ imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" -+ imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" -+ imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" -+ imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" -+ imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" -+ imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" -+ imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" -+ imm_flag["Ob"] = "INAT_MOFFSET" -+ imm_flag["Ov"] = "INAT_MOFFSET" -+ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" -+ -+ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" -+ force64_expr = "\\([df]64\\)" -+ rex_expr = "^REX(\\.[XRWB]+)*" -+ fpu_expr = "^ESC" # TODO -+ -+ lprefix1_expr = "\\((66|!F3)\\)" -+ lprefix2_expr = "\\(F3\\)" -+ lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" -+ lprefix_expr = "\\((66|F2|F3)\\)" -+ max_lprefix = 4 -+ -+ # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript -+ # accepts VEX prefix -+ vexok_opcode_expr = "^[vk].*" -+ vexok_expr = "\\(v1\\)" -+ # All opcodes with (v) superscript supports *only* VEX prefix -+ vexonly_expr = "\\(v\\)" -+ # All opcodes with (ev) superscript supports *only* EVEX prefix -+ evexonly_expr = "\\(ev\\)" -+ -+ prefix_expr = "\\(Prefix\\)" -+ prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" -+ prefix_num["REPNE"] = "INAT_PFX_REPNE" -+ prefix_num["REP/REPE"] = "INAT_PFX_REPE" -+ prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" -+ prefix_num["XRELEASE"] = "INAT_PFX_REPE" -+ prefix_num["LOCK"] = "INAT_PFX_LOCK" -+ prefix_num["SEG=CS"] = "INAT_PFX_CS" -+ prefix_num["SEG=DS"] = "INAT_PFX_DS" -+ prefix_num["SEG=ES"] = "INAT_PFX_ES" -+ prefix_num["SEG=FS"] = "INAT_PFX_FS" -+ prefix_num["SEG=GS"] = "INAT_PFX_GS" -+ prefix_num["SEG=SS"] = "INAT_PFX_SS" -+ prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" -+ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" -+ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" -+ prefix_num["EVEX"] = "INAT_PFX_EVEX" -+ -+ clear_vars() -+} -+ -+function semantic_error(msg) { -+ print "Semantic error at " NR ": " msg > "/dev/stderr" -+ exit 1 -+} -+ -+function debug(msg) { -+ print "DEBUG: " msg -+} -+ -+function array_size(arr, i,c) { -+ c = 0 -+ for (i in arr) -+ c++ -+ return c -+} -+ -+/^Table:/ { -+ print "/* " $0 " */" -+ if (tname != "") -+ semantic_error("Hit Table: before EndTable:."); -+} -+ -+/^Referrer:/ { -+ if (NF != 1) { -+ # escape opcode table -+ ref = "" -+ for (i = 2; i <= NF; i++) -+ ref = ref $i -+ eid = escape[ref] -+ tname = sprintf("inat_escape_table_%d", eid) -+ } -+} -+ -+/^AVXcode:/ { -+ if (NF != 1) { -+ # AVX/escape opcode table -+ aid = $2 -+ if (gaid <= aid) -+ gaid = aid + 1 -+ if (tname == "") # AVX only opcode table -+ tname = sprintf("inat_avx_table_%d", $2) -+ } -+ if (aid == -1 && eid == -1) # primary opcode table -+ tname = "inat_primary_table" -+} -+ -+/^GrpTable:/ { -+ print "/* " $0 " */" -+ if (!($2 in group)) -+ semantic_error("No group: " $2 ) -+ gid = group[$2] -+ tname = "inat_group_table_" gid -+} -+ -+function print_table(tbl,name,fmt,n) -+{ -+ print "const insn_attr_t " name " = {" -+ for (i = 0; i < n; i++) { -+ id = sprintf(fmt, i) -+ if (tbl[id]) -+ print " [" id "] = " tbl[id] "," -+ } -+ print "};" -+} -+ -+/^EndTable/ { -+ if (gid != -1) { -+ # print group tables -+ if (array_size(table) != 0) { -+ print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", -+ "0x%x", 8) -+ gtable[gid,0] = tname -+ } -+ if (array_size(lptable1) != 0) { -+ print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", -+ "0x%x", 8) -+ gtable[gid,1] = tname "_1" -+ } -+ if (array_size(lptable2) != 0) { -+ print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", -+ "0x%x", 8) -+ gtable[gid,2] = tname "_2" -+ } -+ if (array_size(lptable3) != 0) { -+ print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", -+ "0x%x", 8) -+ gtable[gid,3] = tname "_3" -+ } -+ } else { -+ # print primary/escaped tables -+ if (array_size(table) != 0) { -+ print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", -+ "0x%02x", 256) -+ etable[eid,0] = tname -+ if (aid >= 0) -+ atable[aid,0] = tname -+ } -+ if (array_size(lptable1) != 0) { -+ print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", -+ "0x%02x", 256) -+ etable[eid,1] = tname "_1" -+ if (aid >= 0) -+ atable[aid,1] = tname "_1" -+ } -+ if (array_size(lptable2) != 0) { -+ print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", -+ "0x%02x", 256) -+ etable[eid,2] = tname "_2" -+ if (aid >= 0) -+ atable[aid,2] = tname "_2" -+ } -+ if (array_size(lptable3) != 0) { -+ print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", -+ "0x%02x", 256) -+ etable[eid,3] = tname "_3" -+ if (aid >= 0) -+ atable[aid,3] = tname "_3" -+ } -+ } -+ print "" -+ clear_vars() -+} -+ -+function add_flags(old,new) { -+ if (old && new) -+ return old " | " new -+ else if (old) -+ return old -+ else -+ return new -+} -+ -+# convert operands to flags. -+function convert_operands(count,opnd, i,j,imm,mod) -+{ -+ imm = null -+ mod = null -+ for (j = 1; j <= count; j++) { -+ i = opnd[j] -+ if (match(i, imm_expr) == 1) { -+ if (!imm_flag[i]) -+ semantic_error("Unknown imm opnd: " i) -+ if (imm) { -+ if (i != "Ib") -+ semantic_error("Second IMM error") -+ imm = add_flags(imm, "INAT_SCNDIMM") -+ } else -+ imm = imm_flag[i] -+ } else if (match(i, modrm_expr)) -+ mod = "INAT_MODRM" -+ } -+ return add_flags(imm, mod) -+} -+ -+/^[0-9a-f]+\:/ { -+ if (NR == 1) -+ next -+ # get index -+ idx = "0x" substr($1, 1, index($1,":") - 1) -+ if (idx in table) -+ semantic_error("Redefine " idx " in " tname) -+ -+ # check if escaped opcode -+ if ("escape" == $2) { -+ if ($3 != "#") -+ semantic_error("No escaped name") -+ ref = "" -+ for (i = 4; i <= NF; i++) -+ ref = ref $i -+ if (ref in escape) -+ semantic_error("Redefine escape (" ref ")") -+ escape[ref] = geid -+ geid++ -+ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" -+ next -+ } -+ -+ variant = null -+ # converts -+ i = 2 -+ while (i <= NF) { -+ opcode = $(i++) -+ delete opnds -+ ext = null -+ flags = null -+ opnd = null -+ # parse one opcode -+ if (match($i, opnd_expr)) { -+ opnd = $i -+ count = split($(i++), opnds, ",") -+ flags = convert_operands(count, opnds) -+ } -+ if (match($i, ext_expr)) -+ ext = $(i++) -+ if (match($i, sep_expr)) -+ i++ -+ else if (i < NF) -+ semantic_error($i " is not a separator") -+ -+ # check if group opcode -+ if (match(opcode, group_expr)) { -+ if (!(opcode in group)) { -+ group[opcode] = ggid -+ ggid++ -+ } -+ flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") -+ } -+ # check force(or default) 64bit -+ if (match(ext, force64_expr)) -+ flags = add_flags(flags, "INAT_FORCE64") -+ -+ # check REX prefix -+ if (match(opcode, rex_expr)) -+ flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") -+ -+ # check coprocessor escape : TODO -+ if (match(opcode, fpu_expr)) -+ flags = add_flags(flags, "INAT_MODRM") -+ -+ # check VEX codes -+ if (match(ext, evexonly_expr)) -+ flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") -+ else if (match(ext, vexonly_expr)) -+ flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") -+ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) -+ flags = add_flags(flags, "INAT_VEXOK") -+ -+ # check prefixes -+ if (match(ext, prefix_expr)) { -+ if (!prefix_num[opcode]) -+ semantic_error("Unknown prefix: " opcode) -+ flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") -+ } -+ if (length(flags) == 0) -+ continue -+ # check if last prefix -+ if (match(ext, lprefix1_expr)) { -+ lptable1[idx] = add_flags(lptable1[idx],flags) -+ variant = "INAT_VARIANT" -+ } -+ if (match(ext, lprefix2_expr)) { -+ lptable2[idx] = add_flags(lptable2[idx],flags) -+ variant = "INAT_VARIANT" -+ } -+ if (match(ext, lprefix3_expr)) { -+ lptable3[idx] = add_flags(lptable3[idx],flags) -+ variant = "INAT_VARIANT" -+ } -+ if (!match(ext, lprefix_expr)){ -+ table[idx] = add_flags(table[idx],flags) -+ } -+ } -+ if (variant) -+ table[idx] = add_flags(table[idx],variant) -+} -+ -+END { -+ if (awkchecked != "") -+ exit 1 -+ # print escape opcode map's array -+ print "/* Escape opcode map array */" -+ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ -+ "[INAT_LSTPFX_MAX + 1] = {" -+ for (i = 0; i < geid; i++) -+ for (j = 0; j < max_lprefix; j++) -+ if (etable[i,j]) -+ print " ["i"]["j"] = "etable[i,j]"," -+ print "};\n" -+ # print group opcode map's array -+ print "/* Group opcode map array */" -+ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ -+ "[INAT_LSTPFX_MAX + 1] = {" -+ for (i = 0; i < ggid; i++) -+ for (j = 0; j < max_lprefix; j++) -+ if (gtable[i,j]) -+ print " ["i"]["j"] = "gtable[i,j]"," -+ print "};\n" -+ # print AVX opcode map's array -+ print "/* AVX opcode map array */" -+ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ -+ "[INAT_LSTPFX_MAX + 1] = {" -+ for (i = 0; i < gaid; i++) -+ for (j = 0; j < max_lprefix; j++) -+ if (atable[i,j]) -+ print " ["i"]["j"] = "atable[i,j]"," -+ print "};" -+} -+ -diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c -index 365c34e..694abc6 100644 ---- a/tools/objtool/builtin-check.c -+++ b/tools/objtool/builtin-check.c -@@ -29,7 +29,7 @@ - #include "builtin.h" - #include "check.h" - --bool nofp; -+bool no_fp, no_unreachable, retpoline, module; - - static const char * const check_usage[] = { - "objtool check [<options>] file.o", -@@ -37,7 +37,10 @@ static const char * const check_usage[] = { - }; - - const struct option check_options[] = { -- OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), -+ OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), -+ OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), -+ OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), -+ OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), - OPT_END(), - }; - -@@ -52,5 +55,5 @@ int cmd_check(int argc, const char **argv) - - objname = argv[0]; - -- return check(objname, nofp); -+ return check(objname, false); - } -diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c -new file mode 100644 -index 0000000..77ea2b9 ---- /dev/null -+++ b/tools/objtool/builtin-orc.c -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+/* -+ * objtool orc: -+ * -+ * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip -+ * sections to it, which is used by the in-kernel ORC unwinder. -+ * -+ * This command is a superset of "objtool check". -+ */ -+ -+#include <string.h> -+#include "builtin.h" -+#include "check.h" -+ -+ -+static const char *orc_usage[] = { -+ "objtool orc generate [<options>] file.o", -+ "objtool orc dump file.o", -+ NULL, -+}; -+ -+int cmd_orc(int argc, const char **argv) -+{ -+ const char *objname; -+ -+ argc--; argv++; -+ if (argc <= 0) -+ usage_with_options(orc_usage, check_options); -+ -+ if (!strncmp(argv[0], "gen", 3)) { -+ argc = parse_options(argc, argv, check_options, orc_usage, 0); -+ if (argc != 1) -+ usage_with_options(orc_usage, check_options); -+ -+ objname = argv[0]; -+ -+ return check(objname, true); -+ } -+ -+ if (!strcmp(argv[0], "dump")) { -+ if (argc != 2) -+ usage_with_options(orc_usage, check_options); -+ -+ objname = argv[1]; -+ -+ return orc_dump(objname); -+ } -+ -+ usage_with_options(orc_usage, check_options); -+ -+ return 0; -+} -diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h -index 34d2ba7..28ff40e 100644 ---- a/tools/objtool/builtin.h -+++ b/tools/objtool/builtin.h -@@ -17,6 +17,12 @@ - #ifndef _BUILTIN_H - #define _BUILTIN_H - -+#include <subcmd/parse-options.h> -+ -+extern const struct option check_options[]; -+extern bool no_fp, no_unreachable, retpoline, module; -+ - extern int cmd_check(int argc, const char **argv); -+extern int cmd_orc(int argc, const char **argv); - - #endif /* _BUILTIN_H */ -diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h -new file mode 100644 -index 0000000..2fe883c ---- /dev/null -+++ b/tools/objtool/cfi.h -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#ifndef _OBJTOOL_CFI_H -+#define _OBJTOOL_CFI_H -+ -+#define CFI_UNDEFINED -1 -+#define CFI_CFA -2 -+#define CFI_SP_INDIRECT -3 -+#define CFI_BP_INDIRECT -4 -+ -+#define CFI_AX 0 -+#define CFI_DX 1 -+#define CFI_CX 2 -+#define CFI_BX 3 -+#define CFI_SI 4 -+#define CFI_DI 5 -+#define CFI_BP 6 -+#define CFI_SP 7 -+#define CFI_R8 8 -+#define CFI_R9 9 -+#define CFI_R10 10 -+#define CFI_R11 11 -+#define CFI_R12 12 -+#define CFI_R13 13 -+#define CFI_R14 14 -+#define CFI_R15 15 -+#define CFI_RA 16 -+#define CFI_NUM_REGS 17 -+ -+struct cfi_reg { -+ int base; -+ int offset; -+}; -+ -+struct cfi_state { -+ struct cfi_reg cfa; -+ struct cfi_reg regs[CFI_NUM_REGS]; -+}; -+ -+#endif /* _OBJTOOL_CFI_H */ -diff --git a/tools/objtool/check.c b/tools/objtool/check.c -index b7a0af5..c8b8b71 100644 ---- a/tools/objtool/check.c -+++ b/tools/objtool/check.c -@@ -18,6 +18,7 @@ - #include <string.h> - #include <stdlib.h> - -+#include "builtin.h" - #include "check.h" - #include "elf.h" - #include "special.h" -@@ -25,12 +26,7 @@ - #include "warn.h" - - #include <linux/hashtable.h> -- --#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -- --#define STATE_FP_SAVED 0x1 --#define STATE_FP_SETUP 0x2 --#define STATE_FENTRY 0x4 -+#include <linux/kernel.h> - - struct alternative { - struct list_head list; -@@ -38,10 +34,10 @@ struct alternative { - }; - - const char *objname; --static bool nofp; -+struct cfi_state initial_func_cfi; - --static struct instruction *find_insn(struct objtool_file *file, -- struct section *sec, unsigned long offset) -+struct instruction *find_insn(struct objtool_file *file, -+ struct section *sec, unsigned long offset) - { - struct instruction *insn; - -@@ -57,28 +53,12 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, - { - struct instruction *next = list_next_entry(insn, list); - -- if (&next->list == &file->insn_list || next->sec != insn->sec) -+ if (!next || &next->list == &file->insn_list || next->sec != insn->sec) - return NULL; - - return next; - } - --static bool gcov_enabled(struct objtool_file *file) --{ -- struct section *sec; -- struct symbol *sym; -- -- list_for_each_entry(sec, &file->elf->sections, list) -- list_for_each_entry(sym, &sec->symbol_list, list) -- if (!strncmp(sym->name, "__gcov_.", 8)) -- return true; -- -- return false; --} -- --#define for_each_insn(file, insn) \ -- list_for_each_entry(insn, &file->insn_list, list) -- - #define func_for_each_insn(file, func, insn) \ - for (insn = find_insn(file, func->sec, func->offset); \ - insn && &insn->list != &file->insn_list && \ -@@ -95,6 +75,9 @@ static bool gcov_enabled(struct objtool_file *file) - #define sec_for_each_insn_from(file, insn) \ - for (; insn; insn = next_insn_same_sec(file, insn)) - -+#define sec_for_each_insn_continue(file, insn) \ -+ for (insn = next_insn_same_sec(file, insn); insn; \ -+ insn = next_insn_same_sec(file, insn)) - - /* - * Check if the function has been manually whitelisted with the -@@ -104,7 +87,6 @@ static bool gcov_enabled(struct objtool_file *file) - static bool ignore_func(struct objtool_file *file, struct symbol *func) - { - struct rela *rela; -- struct instruction *insn; - - /* check for STACK_FRAME_NON_STANDARD */ - if (file->whitelist && file->whitelist->rela) -@@ -117,11 +99,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func) - return true; - } - -- /* check if it has a context switching instruction */ -- func_for_each_insn(file, func, insn) -- if (insn->type == INSN_CONTEXT_SWITCH) -- return true; -- - return false; - } - -@@ -159,7 +136,8 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, - "complete_and_exit", - "kvm_spurious_fault", - "__reiserfs_panic", -- "lbug_with_loc" -+ "lbug_with_loc", -+ "fortify_panic", - }; - - if (func->bind == STB_WEAK) -@@ -234,6 +212,20 @@ static int dead_end_function(struct objtool_file *file, struct symbol *func) - return __dead_end_function(file, func, 0); - } - -+static void clear_insn_state(struct insn_state *state) -+{ -+ int i; -+ -+ memset(state, 0, sizeof(*state)); -+ state->cfa.base = CFI_UNDEFINED; -+ for (i = 0; i < CFI_NUM_REGS; i++) { -+ state->regs[i].base = CFI_UNDEFINED; -+ state->vals[i].base = CFI_UNDEFINED; -+ } -+ state->drap_reg = CFI_UNDEFINED; -+ state->drap_offset = -1; -+} -+ - /* - * Call the arch-specific instruction decoder for all the instructions and add - * them to the global instruction list. -@@ -246,30 +238,42 @@ static int decode_instructions(struct objtool_file *file) - struct instruction *insn; - int ret; - -- list_for_each_entry(sec, &file->elf->sections, list) { -+ for_each_sec(file, sec) { - - if (!(sec->sh.sh_flags & SHF_EXECINSTR)) - continue; - -+ if (strcmp(sec->name, ".altinstr_replacement") && -+ strcmp(sec->name, ".altinstr_aux") && -+ strncmp(sec->name, ".discard.", 9)) -+ sec->text = true; -+ - for (offset = 0; offset < sec->len; offset += insn->len) { - insn = malloc(sizeof(*insn)); -+ if (!insn) { -+ WARN("malloc failed"); -+ return -1; -+ } - memset(insn, 0, sizeof(*insn)); -- - INIT_LIST_HEAD(&insn->alts); -+ clear_insn_state(&insn->state); -+ - insn->sec = sec; - insn->offset = offset; - - ret = arch_decode_instruction(file->elf, sec, offset, - sec->len - offset, - &insn->len, &insn->type, -- &insn->immediate); -+ &insn->immediate, -+ &insn->stack_op); - if (ret) -- return ret; -+ goto err; - - if (!insn->type || insn->type > INSN_LAST) { - WARN_FUNC("invalid instruction type %d", - insn->sec, insn->offset, insn->type); -- return -1; -+ ret = -1; -+ goto err; - } - - hash_add(file->insn_hash, &insn->hash, insn->offset); -@@ -293,10 +297,14 @@ static int decode_instructions(struct objtool_file *file) - } - - return 0; -+ -+err: -+ free(insn); -+ return ret; - } - - /* -- * Find all uses of the unreachable() macro, which are code path dead ends. -+ * Mark "ud2" instructions and manually annotated dead ends. - */ - static int add_dead_ends(struct objtool_file *file) - { -@@ -305,13 +313,24 @@ static int add_dead_ends(struct objtool_file *file) - struct instruction *insn; - bool found; - -- sec = find_section_by_name(file->elf, ".rela__unreachable"); -+ /* -+ * By default, "ud2" is a dead end unless otherwise annotated, because -+ * GCC 7 inserts it for certain divide-by-zero cases. -+ */ -+ for_each_insn(file, insn) -+ if (insn->type == INSN_BUG) -+ insn->dead_end = true; -+ -+ /* -+ * Check for manually annotated dead ends. -+ */ -+ sec = find_section_by_name(file->elf, ".rela.discard.unreachable"); - if (!sec) -- return 0; -+ goto reachable; - - list_for_each_entry(rela, &sec->rela_list, list) { - if (rela->sym->type != STT_SECTION) { -- WARN("unexpected relocation symbol type in .rela__unreachable"); -+ WARN("unexpected relocation symbol type in %s", sec->name); - return -1; - } - insn = find_insn(file, rela->sym->sec, rela->addend); -@@ -340,6 +359,48 @@ static int add_dead_ends(struct objtool_file *file) - insn->dead_end = true; - } - -+reachable: -+ /* -+ * These manually annotated reachable checks are needed for GCC 4.4, -+ * where the Linux unreachable() macro isn't supported. In that case -+ * GCC doesn't know the "ud2" is fatal, so it generates code as if it's -+ * not a dead end. -+ */ -+ sec = find_section_by_name(file->elf, ".rela.discard.reachable"); -+ if (!sec) -+ return 0; -+ -+ list_for_each_entry(rela, &sec->rela_list, list) { -+ if (rela->sym->type != STT_SECTION) { -+ WARN("unexpected relocation symbol type in %s", sec->name); -+ return -1; -+ } -+ insn = find_insn(file, rela->sym->sec, rela->addend); -+ if (insn) -+ insn = list_prev_entry(insn, list); -+ else if (rela->addend == rela->sym->sec->len) { -+ found = false; -+ list_for_each_entry_reverse(insn, &file->insn_list, list) { -+ if (insn->sec == rela->sym->sec) { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) { -+ WARN("can't find reachable insn at %s+0x%x", -+ rela->sym->sec->name, rela->addend); -+ return -1; -+ } -+ } else { -+ WARN("can't find reachable insn at %s+0x%x", -+ rela->sym->sec->name, rela->addend); -+ return -1; -+ } -+ -+ insn->dead_end = false; -+ } -+ - return 0; - } - -@@ -352,7 +413,7 @@ static void add_ignores(struct objtool_file *file) - struct section *sec; - struct symbol *func; - -- list_for_each_entry(sec, &file->elf->sections, list) { -+ for_each_sec(file, sec) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; -@@ -361,7 +422,7 @@ static void add_ignores(struct objtool_file *file) - continue; - - func_for_each_insn(file, func, insn) -- insn->visited = true; -+ insn->ignore = true; - } - } - } -@@ -415,8 +476,7 @@ static int add_jump_destinations(struct objtool_file *file) - insn->type != INSN_JUMP_UNCONDITIONAL) - continue; - -- /* skip ignores */ -- if (insn->visited) -+ if (insn->ignore) - continue; - - rela = find_rela_by_dest_range(insn->sec, insn->offset, -@@ -436,6 +496,7 @@ static int add_jump_destinations(struct objtool_file *file) - * disguise, so convert them accordingly. - */ - insn->type = INSN_JUMP_DYNAMIC; -+ insn->retpoline_safe = true; - continue; - } else { - /* sibling call */ -@@ -483,18 +544,15 @@ static int add_call_destinations(struct objtool_file *file) - dest_off = insn->offset + insn->len + insn->immediate; - insn->call_dest = find_symbol_by_offset(insn->sec, - dest_off); -- /* -- * FIXME: Thanks to retpolines, it's now considered -- * normal for a function to call within itself. So -- * disable this warning for now. -- */ --#if 0 -- if (!insn->call_dest) { -- WARN_FUNC("can't find call dest symbol at offset 0x%lx", -- insn->sec, insn->offset, dest_off); -+ -+ if (!insn->call_dest && !insn->ignore) { -+ WARN_FUNC("unsupported intra-function call", -+ insn->sec, insn->offset); -+ if (retpoline) -+ WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); - return -1; - } --#endif -+ - } else if (rela->sym->type == STT_SECTION) { - insn->call_dest = find_symbol_by_offset(rela->sym->sec, - rela->addend+4); -@@ -538,7 +596,7 @@ static int handle_group_alt(struct objtool_file *file, - struct instruction *orig_insn, - struct instruction **new_insn) - { -- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; -+ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; - unsigned long dest_off; - - last_orig_insn = NULL; -@@ -554,25 +612,30 @@ static int handle_group_alt(struct objtool_file *file, - last_orig_insn = insn; - } - -- if (!next_insn_same_sec(file, last_orig_insn)) { -- WARN("%s: don't know how to handle alternatives at end of section", -- special_alt->orig_sec->name); -- return -1; -- } -- -- fake_jump = malloc(sizeof(*fake_jump)); -- if (!fake_jump) { -- WARN("malloc failed"); -- return -1; -+ if (next_insn_same_sec(file, last_orig_insn)) { -+ fake_jump = malloc(sizeof(*fake_jump)); -+ if (!fake_jump) { -+ WARN("malloc failed"); -+ return -1; -+ } -+ memset(fake_jump, 0, sizeof(*fake_jump)); -+ INIT_LIST_HEAD(&fake_jump->alts); -+ clear_insn_state(&fake_jump->state); -+ -+ fake_jump->sec = special_alt->new_sec; -+ fake_jump->offset = -1; -+ fake_jump->type = INSN_JUMP_UNCONDITIONAL; -+ fake_jump->jump_dest = list_next_entry(last_orig_insn, list); -+ fake_jump->ignore = true; - } -- memset(fake_jump, 0, sizeof(*fake_jump)); -- INIT_LIST_HEAD(&fake_jump->alts); -- fake_jump->sec = special_alt->new_sec; -- fake_jump->offset = -1; -- fake_jump->type = INSN_JUMP_UNCONDITIONAL; -- fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - - if (!special_alt->new_len) { -+ if (!fake_jump) { -+ WARN("%s: empty alternative at end of section", -+ special_alt->orig_sec->name); -+ return -1; -+ } -+ - *new_insn = fake_jump; - return 0; - } -@@ -585,6 +648,8 @@ static int handle_group_alt(struct objtool_file *file, - - last_new_insn = insn; - -+ insn->ignore = orig_insn->ignore_alts; -+ - if (insn->type != INSN_JUMP_CONDITIONAL && - insn->type != INSN_JUMP_UNCONDITIONAL) - continue; -@@ -593,8 +658,14 @@ static int handle_group_alt(struct objtool_file *file, - continue; - - dest_off = insn->offset + insn->len + insn->immediate; -- if (dest_off == special_alt->new_off + special_alt->new_len) -+ if (dest_off == special_alt->new_off + special_alt->new_len) { -+ if (!fake_jump) { -+ WARN("%s: alternative jump to end of section", -+ special_alt->orig_sec->name); -+ return -1; -+ } - insn->jump_dest = fake_jump; -+ } - - if (!insn->jump_dest) { - WARN_FUNC("can't find alternative jump destination", -@@ -609,7 +680,8 @@ static int handle_group_alt(struct objtool_file *file, - return -1; - } - -- list_add(&fake_jump->list, &last_new_insn->list); -+ if (fake_jump) -+ list_add(&fake_jump->list, &last_new_insn->list); - - return 0; - } -@@ -656,6 +728,7 @@ static int add_special_section_alts(struct objtool_file *file) - return ret; - - list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { -+ - orig_insn = find_insn(file, special_alt->orig_sec, - special_alt->orig_off); - if (!orig_insn) { -@@ -665,10 +738,6 @@ static int add_special_section_alts(struct objtool_file *file) - goto out; - } - -- /* Ignore retpoline alternatives. */ -- if (orig_insn->ignore_alts) -- continue; -- - new_insn = NULL; - if (!special_alt->group || special_alt->new_len) { - new_insn = find_insn(file, special_alt->new_sec, -@@ -784,8 +853,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, - * This is a fairly uncommon pattern which is new for GCC 6. As of this - * writing, there are 11 occurrences of it in the allmodconfig kernel. - * -+ * As of GCC 7 there are quite a few more of these and the 'in between' code -+ * is significant. Esp. with KASAN enabled some of the code between the mov -+ * and jmpq uses .rodata itself, which can confuse things. -+ * - * TODO: Once we have DWARF CFI and smarter instruction decoding logic, - * ensure the same register is used in the mov and jump instructions. -+ * -+ * NOTE: RETPOLINE made it harder still to decode dynamic jumps. - */ - static struct rela *find_switch_table(struct objtool_file *file, - struct symbol *func, -@@ -807,12 +882,25 @@ static struct rela *find_switch_table(struct objtool_file *file, - text_rela->addend + 4); - if (!rodata_rela) - return NULL; -+ - file->ignore_unreachables = true; - return rodata_rela; - } - - /* case 3 */ -- func_for_each_insn_continue_reverse(file, func, insn) { -+ /* -+ * Backward search using the @first_jump_src links, these help avoid -+ * much of the 'in between' code. Which avoids us getting confused by -+ * it. -+ */ -+ for (insn = list_prev_entry(insn, list); -+ -+ &insn->list != &file->insn_list && -+ insn->sec == func->sec && -+ insn->offset >= func->offset; -+ -+ insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { -+ - if (insn->type == INSN_JUMP_DYNAMIC) - break; - -@@ -836,20 +924,42 @@ static struct rela *find_switch_table(struct objtool_file *file, - if (find_symbol_containing(file->rodata, text_rela->addend)) - continue; - -- return find_rela_by_dest(file->rodata, text_rela->addend); -+ rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); -+ if (!rodata_rela) -+ continue; -+ -+ return rodata_rela; - } - - return NULL; - } - -+ - static int add_func_switch_tables(struct objtool_file *file, - struct symbol *func) - { -- struct instruction *insn, *prev_jump = NULL; -+ struct instruction *insn, *last = NULL, *prev_jump = NULL; - struct rela *rela, *prev_rela = NULL; - int ret; - - func_for_each_insn(file, func, insn) { -+ if (!last) -+ last = insn; -+ -+ /* -+ * Store back-pointers for unconditional forward jumps such -+ * that find_switch_table() can back-track using those and -+ * avoid some potentially confusing code. -+ */ -+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && -+ insn->offset > last->offset && -+ insn->jump_dest->offset > insn->offset && -+ !insn->jump_dest->first_jump_src) { -+ -+ insn->jump_dest->first_jump_src = insn; -+ last = insn->jump_dest; -+ } -+ - if (insn->type != INSN_JUMP_DYNAMIC) - continue; - -@@ -896,7 +1006,7 @@ static int add_switch_table_alts(struct objtool_file *file) - if (!file->rodata || !file->rodata->rela) - return 0; - -- list_for_each_entry(sec, &file->elf->sections, list) { -+ for_each_sec(file, sec) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; -@@ -910,6 +1020,134 @@ static int add_switch_table_alts(struct objtool_file *file) - return 0; - } - -+static int read_unwind_hints(struct objtool_file *file) -+{ -+ struct section *sec, *relasec; -+ struct rela *rela; -+ struct unwind_hint *hint; -+ struct instruction *insn; -+ struct cfi_reg *cfa; -+ int i; -+ -+ sec = find_section_by_name(file->elf, ".discard.unwind_hints"); -+ if (!sec) -+ return 0; -+ -+ relasec = sec->rela; -+ if (!relasec) { -+ WARN("missing .rela.discard.unwind_hints section"); -+ return -1; -+ } -+ -+ if (sec->len % sizeof(struct unwind_hint)) { -+ WARN("struct unwind_hint size mismatch"); -+ return -1; -+ } -+ -+ file->hints = true; -+ -+ for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) { -+ hint = (struct unwind_hint *)sec->data->d_buf + i; -+ -+ rela = find_rela_by_dest(sec, i * sizeof(*hint)); -+ if (!rela) { -+ WARN("can't find rela for unwind_hints[%d]", i); -+ return -1; -+ } -+ -+ insn = find_insn(file, rela->sym->sec, rela->addend); -+ if (!insn) { -+ WARN("can't find insn for unwind_hints[%d]", i); -+ return -1; -+ } -+ -+ cfa = &insn->state.cfa; -+ -+ if (hint->type == UNWIND_HINT_TYPE_SAVE) { -+ insn->save = true; -+ continue; -+ -+ } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) { -+ insn->restore = true; -+ insn->hint = true; -+ continue; -+ } -+ -+ insn->hint = true; -+ -+ switch (hint->sp_reg) { -+ case ORC_REG_UNDEFINED: -+ cfa->base = CFI_UNDEFINED; -+ break; -+ case ORC_REG_SP: -+ cfa->base = CFI_SP; -+ break; -+ case ORC_REG_BP: -+ cfa->base = CFI_BP; -+ break; -+ case ORC_REG_SP_INDIRECT: -+ cfa->base = CFI_SP_INDIRECT; -+ break; -+ case ORC_REG_R10: -+ cfa->base = CFI_R10; -+ break; -+ case ORC_REG_R13: -+ cfa->base = CFI_R13; -+ break; -+ case ORC_REG_DI: -+ cfa->base = CFI_DI; -+ break; -+ case ORC_REG_DX: -+ cfa->base = CFI_DX; -+ break; -+ default: -+ WARN_FUNC("unsupported unwind_hint sp base reg %d", -+ insn->sec, insn->offset, hint->sp_reg); -+ return -1; -+ } -+ -+ cfa->offset = hint->sp_offset; -+ insn->state.type = hint->type; -+ } -+ -+ return 0; -+} -+ -+static int read_retpoline_hints(struct objtool_file *file) -+{ -+ struct section *sec; -+ struct instruction *insn; -+ struct rela *rela; -+ -+ sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe"); -+ if (!sec) -+ return 0; -+ -+ list_for_each_entry(rela, &sec->rela_list, list) { -+ if (rela->sym->type != STT_SECTION) { -+ WARN("unexpected relocation symbol type in %s", sec->name); -+ return -1; -+ } -+ -+ insn = find_insn(file, rela->sym->sec, rela->addend); -+ if (!insn) { -+ WARN("bad .discard.retpoline_safe entry"); -+ return -1; -+ } -+ -+ if (insn->type != INSN_JUMP_DYNAMIC && -+ insn->type != INSN_CALL_DYNAMIC) { -+ WARN_FUNC("retpoline_safe hint not an indirect jump/call", -+ insn->sec, insn->offset); -+ return -1; -+ } -+ -+ insn->retpoline_safe = true; -+ } -+ -+ return 0; -+} -+ - static int decode_sections(struct objtool_file *file) - { - int ret; -@@ -932,11 +1170,11 @@ static int decode_sections(struct objtool_file *file) - if (ret) - return ret; - -- ret = add_call_destinations(file); -+ ret = add_special_section_alts(file); - if (ret) - return ret; - -- ret = add_special_section_alts(file); -+ ret = add_call_destinations(file); - if (ret) - return ret; - -@@ -944,6 +1182,14 @@ static int decode_sections(struct objtool_file *file) - if (ret) - return ret; - -+ ret = read_unwind_hints(file); -+ if (ret) -+ return ret; -+ -+ ret = read_retpoline_hints(file); -+ if (ret) -+ return ret; -+ - return 0; - } - -@@ -957,125 +1203,647 @@ static bool is_fentry_call(struct instruction *insn) - return false; - } - --static bool has_modified_stack_frame(struct instruction *insn) -+static bool has_modified_stack_frame(struct insn_state *state) -+{ -+ int i; -+ -+ if (state->cfa.base != initial_func_cfi.cfa.base || -+ state->cfa.offset != initial_func_cfi.cfa.offset || -+ state->stack_size != initial_func_cfi.cfa.offset || -+ state->drap) -+ return true; -+ -+ for (i = 0; i < CFI_NUM_REGS; i++) -+ if (state->regs[i].base != initial_func_cfi.regs[i].base || -+ state->regs[i].offset != initial_func_cfi.regs[i].offset) -+ return true; -+ -+ return false; -+} -+ -+static bool has_valid_stack_frame(struct insn_state *state) -+{ -+ if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA && -+ state->regs[CFI_BP].offset == -16) -+ return true; -+ -+ if (state->drap && state->regs[CFI_BP].base == CFI_BP) -+ return true; -+ -+ return false; -+} -+ -+static int update_insn_state_regs(struct instruction *insn, struct insn_state *state) - { -- return (insn->state & STATE_FP_SAVED) || -- (insn->state & STATE_FP_SETUP); -+ struct cfi_reg *cfa = &state->cfa; -+ struct stack_op *op = &insn->stack_op; -+ -+ if (cfa->base != CFI_SP) -+ return 0; -+ -+ /* push */ -+ if (op->dest.type == OP_DEST_PUSH) -+ cfa->offset += 8; -+ -+ /* pop */ -+ if (op->src.type == OP_SRC_POP) -+ cfa->offset -= 8; -+ -+ /* add immediate to sp */ -+ if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD && -+ op->dest.reg == CFI_SP && op->src.reg == CFI_SP) -+ cfa->offset -= op->src.offset; -+ -+ return 0; - } - --static bool has_valid_stack_frame(struct instruction *insn) -+static void save_reg(struct insn_state *state, unsigned char reg, int base, -+ int offset) - { -- return (insn->state & STATE_FP_SAVED) && -- (insn->state & STATE_FP_SETUP); -+ if (arch_callee_saved_reg(reg) && -+ state->regs[reg].base == CFI_UNDEFINED) { -+ state->regs[reg].base = base; -+ state->regs[reg].offset = offset; -+ } - } - --static unsigned int frame_state(unsigned long state) -+static void restore_reg(struct insn_state *state, unsigned char reg) - { -- return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); -+ state->regs[reg].base = CFI_UNDEFINED; -+ state->regs[reg].offset = 0; - } - - /* -- * Follow the branch starting at the given instruction, and recursively follow -- * any other branches (jumps). Meanwhile, track the frame pointer state at -- * each instruction and validate all the rules described in -- * tools/objtool/Documentation/stack-validation.txt. -+ * A note about DRAP stack alignment: -+ * -+ * GCC has the concept of a DRAP register, which is used to help keep track of -+ * the stack pointer when aligning the stack. r10 or r13 is used as the DRAP -+ * register. The typical DRAP pattern is: -+ * -+ * 4c 8d 54 24 08 lea 0x8(%rsp),%r10 -+ * 48 83 e4 c0 and $0xffffffffffffffc0,%rsp -+ * 41 ff 72 f8 pushq -0x8(%r10) -+ * 55 push %rbp -+ * 48 89 e5 mov %rsp,%rbp -+ * (more pushes) -+ * 41 52 push %r10 -+ * ... -+ * 41 5a pop %r10 -+ * (more pops) -+ * 5d pop %rbp -+ * 49 8d 62 f8 lea -0x8(%r10),%rsp -+ * c3 retq -+ * -+ * There are some variations in the epilogues, like: -+ * -+ * 5b pop %rbx -+ * 41 5a pop %r10 -+ * 41 5c pop %r12 -+ * 41 5d pop %r13 -+ * 41 5e pop %r14 -+ * c9 leaveq -+ * 49 8d 62 f8 lea -0x8(%r10),%rsp -+ * c3 retq -+ * -+ * and: -+ * -+ * 4c 8b 55 e8 mov -0x18(%rbp),%r10 -+ * 48 8b 5d e0 mov -0x20(%rbp),%rbx -+ * 4c 8b 65 f0 mov -0x10(%rbp),%r12 -+ * 4c 8b 6d f8 mov -0x8(%rbp),%r13 -+ * c9 leaveq -+ * 49 8d 62 f8 lea -0x8(%r10),%rsp -+ * c3 retq -+ * -+ * Sometimes r13 is used as the DRAP register, in which case it's saved and -+ * restored beforehand: -+ * -+ * 41 55 push %r13 -+ * 4c 8d 6c 24 10 lea 0x10(%rsp),%r13 -+ * 48 83 e4 f0 and $0xfffffffffffffff0,%rsp -+ * ... -+ * 49 8d 65 f0 lea -0x10(%r13),%rsp -+ * 41 5d pop %r13 -+ * c3 retq - */ --static int validate_branch(struct objtool_file *file, -- struct instruction *first, unsigned char first_state) -+static int update_insn_state(struct instruction *insn, struct insn_state *state) - { -- struct alternative *alt; -- struct instruction *insn; -- struct section *sec; -- struct symbol *func = NULL; -- unsigned char state; -- int ret; -+ struct stack_op *op = &insn->stack_op; -+ struct cfi_reg *cfa = &state->cfa; -+ struct cfi_reg *regs = state->regs; -+ -+ /* stack operations don't make sense with an undefined CFA */ -+ if (cfa->base == CFI_UNDEFINED) { -+ if (insn->func) { -+ WARN_FUNC("undefined stack state", insn->sec, insn->offset); -+ return -1; -+ } -+ return 0; -+ } - -- insn = first; -- sec = insn->sec; -- state = first_state; -+ if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET) -+ return update_insn_state_regs(insn, state); - -- if (insn->alt_group && list_empty(&insn->alts)) { -- WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", -- sec, insn->offset); -- return 1; -- } -+ switch (op->dest.type) { - -- while (1) { -- if (file->c_file && insn->func) { -- if (func && func != insn->func) { -- WARN("%s() falls through to next function %s()", -- func->name, insn->func->name); -- return 1; -- } -+ case OP_DEST_REG: -+ switch (op->src.type) { - -- func = insn->func; -- } -+ case OP_SRC_REG: -+ if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && -+ cfa->base == CFI_SP && -+ regs[CFI_BP].base == CFI_CFA && -+ regs[CFI_BP].offset == -cfa->offset) { - -- if (insn->visited) { -- if (frame_state(insn->state) != frame_state(state)) { -- WARN_FUNC("frame pointer state mismatch", -- sec, insn->offset); -- return 1; -+ /* mov %rsp, %rbp */ -+ cfa->base = op->dest.reg; -+ state->bp_scratch = false; - } - -- return 0; -+ else if (op->src.reg == CFI_SP && -+ op->dest.reg == CFI_BP && state->drap) { -+ -+ /* drap: mov %rsp, %rbp */ -+ regs[CFI_BP].base = CFI_BP; -+ regs[CFI_BP].offset = -state->stack_size; -+ state->bp_scratch = false; -+ } -+ -+ else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { -+ -+ /* -+ * mov %rsp, %reg -+ * -+ * This is needed for the rare case where GCC -+ * does: -+ * -+ * mov %rsp, %rax -+ * ... -+ * mov %rax, %rsp -+ */ -+ state->vals[op->dest.reg].base = CFI_CFA; -+ state->vals[op->dest.reg].offset = -state->stack_size; -+ } -+ -+ else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && -+ cfa->base == CFI_BP) { -+ -+ /* -+ * mov %rbp, %rsp -+ * -+ * Restore the original stack pointer (Clang). -+ */ -+ state->stack_size = -state->regs[CFI_BP].offset; -+ } -+ -+ else if (op->dest.reg == cfa->base) { -+ -+ /* mov %reg, %rsp */ -+ if (cfa->base == CFI_SP && -+ state->vals[op->src.reg].base == CFI_CFA) { -+ -+ /* -+ * This is needed for the rare case -+ * where GCC does something dumb like: -+ * -+ * lea 0x8(%rsp), %rcx -+ * ... -+ * mov %rcx, %rsp -+ */ -+ cfa->offset = -state->vals[op->src.reg].offset; -+ state->stack_size = cfa->offset; -+ -+ } else { -+ cfa->base = CFI_UNDEFINED; -+ cfa->offset = 0; -+ } -+ } -+ -+ break; -+ -+ case OP_SRC_ADD: -+ if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) { -+ -+ /* add imm, %rsp */ -+ state->stack_size -= op->src.offset; -+ if (cfa->base == CFI_SP) -+ cfa->offset -= op->src.offset; -+ break; -+ } -+ -+ if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { -+ -+ /* lea disp(%rbp), %rsp */ -+ state->stack_size = -(op->src.offset + regs[CFI_BP].offset); -+ break; -+ } -+ -+ if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { -+ -+ /* drap: lea disp(%rsp), %drap */ -+ state->drap_reg = op->dest.reg; -+ -+ /* -+ * lea disp(%rsp), %reg -+ * -+ * This is needed for the rare case where GCC -+ * does something dumb like: -+ * -+ * lea 0x8(%rsp), %rcx -+ * ... -+ * mov %rcx, %rsp -+ */ -+ state->vals[op->dest.reg].base = CFI_CFA; -+ state->vals[op->dest.reg].offset = \ -+ -state->stack_size + op->src.offset; -+ -+ break; -+ } -+ -+ if (state->drap && op->dest.reg == CFI_SP && -+ op->src.reg == state->drap_reg) { -+ -+ /* drap: lea disp(%drap), %rsp */ -+ cfa->base = CFI_SP; -+ cfa->offset = state->stack_size = -op->src.offset; -+ state->drap_reg = CFI_UNDEFINED; -+ state->drap = false; -+ break; -+ } -+ -+ if (op->dest.reg == state->cfa.base) { -+ WARN_FUNC("unsupported stack register modification", -+ insn->sec, insn->offset); -+ return -1; -+ } -+ -+ break; -+ -+ case OP_SRC_AND: -+ if (op->dest.reg != CFI_SP || -+ (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) || -+ (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) { -+ WARN_FUNC("unsupported stack pointer realignment", -+ insn->sec, insn->offset); -+ return -1; -+ } -+ -+ if (state->drap_reg != CFI_UNDEFINED) { -+ /* drap: and imm, %rsp */ -+ cfa->base = state->drap_reg; -+ cfa->offset = state->stack_size = 0; -+ state->drap = true; -+ } -+ -+ /* -+ * Older versions of GCC (4.8ish) realign the stack -+ * without DRAP, with a frame pointer. -+ */ -+ -+ break; -+ -+ case OP_SRC_POP: -+ if (!state->drap && op->dest.type == OP_DEST_REG && -+ op->dest.reg == cfa->base) { -+ -+ /* pop %rbp */ -+ cfa->base = CFI_SP; -+ } -+ -+ if (state->drap && cfa->base == CFI_BP_INDIRECT && -+ op->dest.type == OP_DEST_REG && -+ op->dest.reg == state->drap_reg && -+ state->drap_offset == -state->stack_size) { -+ -+ /* drap: pop %drap */ -+ cfa->base = state->drap_reg; -+ cfa->offset = 0; -+ state->drap_offset = -1; -+ -+ } else if (regs[op->dest.reg].offset == -state->stack_size) { -+ -+ /* pop %reg */ -+ restore_reg(state, op->dest.reg); -+ } -+ -+ state->stack_size -= 8; -+ if (cfa->base == CFI_SP) -+ cfa->offset -= 8; -+ -+ break; -+ -+ case OP_SRC_REG_INDIRECT: -+ if (state->drap && op->src.reg == CFI_BP && -+ op->src.offset == state->drap_offset) { -+ -+ /* drap: mov disp(%rbp), %drap */ -+ cfa->base = state->drap_reg; -+ cfa->offset = 0; -+ state->drap_offset = -1; -+ } -+ -+ if (state->drap && op->src.reg == CFI_BP && -+ op->src.offset == regs[op->dest.reg].offset) { -+ -+ /* drap: mov disp(%rbp), %reg */ -+ restore_reg(state, op->dest.reg); -+ -+ } else if (op->src.reg == cfa->base && -+ op->src.offset == regs[op->dest.reg].offset + cfa->offset) { -+ -+ /* mov disp(%rbp), %reg */ -+ /* mov disp(%rsp), %reg */ -+ restore_reg(state, op->dest.reg); -+ } -+ -+ break; -+ -+ default: -+ WARN_FUNC("unknown stack-related instruction", -+ insn->sec, insn->offset); -+ return -1; - } - -- insn->visited = true; -- insn->state = state; -+ break; - -- list_for_each_entry(alt, &insn->alts, list) { -- ret = validate_branch(file, alt->insn, state); -- if (ret) -+ case OP_DEST_PUSH: -+ state->stack_size += 8; -+ if (cfa->base == CFI_SP) -+ cfa->offset += 8; -+ -+ if (op->src.type != OP_SRC_REG) -+ break; -+ -+ if (state->drap) { -+ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { -+ -+ /* drap: push %drap */ -+ cfa->base = CFI_BP_INDIRECT; -+ cfa->offset = -state->stack_size; -+ -+ /* save drap so we know when to restore it */ -+ state->drap_offset = -state->stack_size; -+ -+ } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) { -+ -+ /* drap: push %rbp */ -+ state->stack_size = 0; -+ -+ } else if (regs[op->src.reg].base == CFI_UNDEFINED) { -+ -+ /* drap: push %reg */ -+ save_reg(state, op->src.reg, CFI_BP, -state->stack_size); -+ } -+ -+ } else { -+ -+ /* push %reg */ -+ save_reg(state, op->src.reg, CFI_CFA, -state->stack_size); -+ } -+ -+ /* detect when asm code uses rbp as a scratch register */ -+ if (!no_fp && insn->func && op->src.reg == CFI_BP && -+ cfa->base != CFI_BP) -+ state->bp_scratch = true; -+ break; -+ -+ case OP_DEST_REG_INDIRECT: -+ -+ if (state->drap) { -+ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { -+ -+ /* drap: mov %drap, disp(%rbp) */ -+ cfa->base = CFI_BP_INDIRECT; -+ cfa->offset = op->dest.offset; -+ -+ /* save drap offset so we know when to restore it */ -+ state->drap_offset = op->dest.offset; -+ } -+ -+ else if (regs[op->src.reg].base == CFI_UNDEFINED) { -+ -+ /* drap: mov reg, disp(%rbp) */ -+ save_reg(state, op->src.reg, CFI_BP, op->dest.offset); -+ } -+ -+ } else if (op->dest.reg == cfa->base) { -+ -+ /* mov reg, disp(%rbp) */ -+ /* mov reg, disp(%rsp) */ -+ save_reg(state, op->src.reg, CFI_CFA, -+ op->dest.offset - state->cfa.offset); -+ } -+ -+ break; -+ -+ case OP_DEST_LEAVE: -+ if ((!state->drap && cfa->base != CFI_BP) || -+ (state->drap && cfa->base != state->drap_reg)) { -+ WARN_FUNC("leave instruction with modified stack frame", -+ insn->sec, insn->offset); -+ return -1; -+ } -+ -+ /* leave (mov %rbp, %rsp; pop %rbp) */ -+ -+ state->stack_size = -state->regs[CFI_BP].offset - 8; -+ restore_reg(state, CFI_BP); -+ -+ if (!state->drap) { -+ cfa->base = CFI_SP; -+ cfa->offset -= 8; -+ } -+ -+ break; -+ -+ case OP_DEST_MEM: -+ if (op->src.type != OP_SRC_POP) { -+ WARN_FUNC("unknown stack-related memory operation", -+ insn->sec, insn->offset); -+ return -1; -+ } -+ -+ /* pop mem */ -+ state->stack_size -= 8; -+ if (cfa->base == CFI_SP) -+ cfa->offset -= 8; -+ -+ break; -+ -+ default: -+ WARN_FUNC("unknown stack-related instruction", -+ insn->sec, insn->offset); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static bool insn_state_match(struct instruction *insn, struct insn_state *state) -+{ -+ struct insn_state *state1 = &insn->state, *state2 = state; -+ int i; -+ -+ if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) { -+ WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", -+ insn->sec, insn->offset, -+ state1->cfa.base, state1->cfa.offset, -+ state2->cfa.base, state2->cfa.offset); -+ -+ } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) { -+ for (i = 0; i < CFI_NUM_REGS; i++) { -+ if (!memcmp(&state1->regs[i], &state2->regs[i], -+ sizeof(struct cfi_reg))) -+ continue; -+ -+ WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d", -+ insn->sec, insn->offset, -+ i, state1->regs[i].base, state1->regs[i].offset, -+ i, state2->regs[i].base, state2->regs[i].offset); -+ break; -+ } -+ -+ } else if (state1->type != state2->type) { -+ WARN_FUNC("stack state mismatch: type1=%d type2=%d", -+ insn->sec, insn->offset, state1->type, state2->type); -+ -+ } else if (state1->drap != state2->drap || -+ (state1->drap && state1->drap_reg != state2->drap_reg) || -+ (state1->drap && state1->drap_offset != state2->drap_offset)) { -+ WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)", -+ insn->sec, insn->offset, -+ state1->drap, state1->drap_reg, state1->drap_offset, -+ state2->drap, state2->drap_reg, state2->drap_offset); -+ -+ } else -+ return true; -+ -+ return false; -+} -+ -+/* -+ * Follow the branch starting at the given instruction, and recursively follow -+ * any other branches (jumps). Meanwhile, track the frame pointer state at -+ * each instruction and validate all the rules described in -+ * tools/objtool/Documentation/stack-validation.txt. -+ */ -+static int validate_branch(struct objtool_file *file, struct instruction *first, -+ struct insn_state state) -+{ -+ struct alternative *alt; -+ struct instruction *insn, *next_insn; -+ struct section *sec; -+ struct symbol *func = NULL; -+ int ret; -+ -+ insn = first; -+ sec = insn->sec; -+ -+ if (insn->alt_group && list_empty(&insn->alts)) { -+ WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", -+ sec, insn->offset); -+ return 1; -+ } -+ -+ while (1) { -+ next_insn = next_insn_same_sec(file, insn); -+ -+ -+ if (file->c_file && func && insn->func && func != insn->func) { -+ WARN("%s() falls through to next function %s()", -+ func->name, insn->func->name); -+ return 1; -+ } -+ -+ if (insn->func) -+ func = insn->func; -+ -+ if (func && insn->ignore) { -+ WARN_FUNC("BUG: why am I validating an ignored function?", -+ sec, insn->offset); -+ return 1; -+ } -+ -+ if (insn->visited) { -+ if (!insn->hint && !insn_state_match(insn, &state)) - return 1; -+ -+ return 0; - } - -- switch (insn->type) { -+ if (insn->hint) { -+ if (insn->restore) { -+ struct instruction *save_insn, *i; -+ -+ i = insn; -+ save_insn = NULL; -+ func_for_each_insn_continue_reverse(file, func, i) { -+ if (i->save) { -+ save_insn = i; -+ break; -+ } -+ } - -- case INSN_FP_SAVE: -- if (!nofp) { -- if (state & STATE_FP_SAVED) { -- WARN_FUNC("duplicate frame pointer save", -+ if (!save_insn) { -+ WARN_FUNC("no corresponding CFI save for CFI restore", - sec, insn->offset); - return 1; - } -- state |= STATE_FP_SAVED; -- } -- break; - -- case INSN_FP_SETUP: -- if (!nofp) { -- if (state & STATE_FP_SETUP) { -- WARN_FUNC("duplicate frame pointer setup", -+ if (!save_insn->visited) { -+ /* -+ * Oops, no state to copy yet. -+ * Hopefully we can reach this -+ * instruction from another branch -+ * after the save insn has been -+ * visited. -+ */ -+ if (insn == first) -+ return 0; -+ -+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", - sec, insn->offset); - return 1; - } -- state |= STATE_FP_SETUP; -+ -+ insn->state = save_insn->state; - } -- break; - -- case INSN_FP_RESTORE: -- if (!nofp) { -- if (has_valid_stack_frame(insn)) -- state &= ~STATE_FP_SETUP; -+ state = insn->state; -+ -+ } else -+ insn->state = state; -+ -+ insn->visited = true; - -- state &= ~STATE_FP_SAVED; -+ if (!insn->ignore_alts) { -+ list_for_each_entry(alt, &insn->alts, list) { -+ ret = validate_branch(file, alt->insn, state); -+ if (ret) -+ return 1; - } -- break; -+ } -+ -+ switch (insn->type) { - - case INSN_RETURN: -- if (!nofp && has_modified_stack_frame(insn)) { -- WARN_FUNC("return without frame pointer restore", -+ if (func && has_modified_stack_frame(&state)) { -+ WARN_FUNC("return with modified stack frame", - sec, insn->offset); - return 1; - } -+ -+ if (state.bp_scratch) { -+ WARN("%s uses BP as a scratch register", -+ insn->func->name); -+ return 1; -+ } -+ - return 0; - - case INSN_CALL: -- if (is_fentry_call(insn)) { -- state |= STATE_FENTRY; -+ if (is_fentry_call(insn)) - break; -- } - - ret = dead_end_function(file, insn->call_dest); - if (ret == 1) -@@ -1085,7 +1853,7 @@ static int validate_branch(struct objtool_file *file, - - /* fallthrough */ - case INSN_CALL_DYNAMIC: -- if (!nofp && !has_valid_stack_frame(insn)) { -+ if (!no_fp && func && !has_valid_stack_frame(&state)) { - WARN_FUNC("call without frame pointer save/setup", - sec, insn->offset); - return 1; -@@ -1094,16 +1862,19 @@ static int validate_branch(struct objtool_file *file, - - case INSN_JUMP_CONDITIONAL: - case INSN_JUMP_UNCONDITIONAL: -- if (insn->jump_dest) { -+ if (insn->jump_dest && -+ (!func || !insn->jump_dest->func || -+ func == insn->jump_dest->func)) { - ret = validate_branch(file, insn->jump_dest, - state); - if (ret) - return 1; -- } else if (has_modified_stack_frame(insn)) { -- WARN_FUNC("sibling call from callable instruction with changed frame pointer", -+ -+ } else if (func && has_modified_stack_frame(&state)) { -+ WARN_FUNC("sibling call from callable instruction with modified stack frame", - sec, insn->offset); - return 1; -- } /* else it's a sibling call */ -+ } - - if (insn->type == INSN_JUMP_UNCONDITIONAL) - return 0; -@@ -1111,15 +1882,29 @@ static int validate_branch(struct objtool_file *file, - break; - - case INSN_JUMP_DYNAMIC: -- if (list_empty(&insn->alts) && -- has_modified_stack_frame(insn)) { -- WARN_FUNC("sibling call from callable instruction with changed frame pointer", -+ if (func && list_empty(&insn->alts) && -+ has_modified_stack_frame(&state)) { -+ WARN_FUNC("sibling call from callable instruction with modified stack frame", - sec, insn->offset); - return 1; - } - - return 0; - -+ case INSN_CONTEXT_SWITCH: -+ if (func && (!next_insn || !next_insn->hint)) { -+ WARN_FUNC("unsupported instruction in callable function", -+ sec, insn->offset); -+ return 1; -+ } -+ return 0; -+ -+ case INSN_STACK: -+ if (update_insn_state(insn, &state)) -+ return 1; -+ -+ break; -+ - default: - break; - } -@@ -1127,16 +1912,72 @@ static int validate_branch(struct objtool_file *file, - if (insn->dead_end) - return 0; - -- insn = next_insn_same_sec(file, insn); -- if (!insn) { -+ if (!next_insn) { -+ if (state.cfa.base == CFI_UNDEFINED) -+ return 0; - WARN("%s: unexpected end of section", sec->name); - return 1; - } -+ -+ insn = next_insn; - } - - return 0; - } - -+static int validate_unwind_hints(struct objtool_file *file) -+{ -+ struct instruction *insn; -+ int ret, warnings = 0; -+ struct insn_state state; -+ -+ if (!file->hints) -+ return 0; -+ -+ clear_insn_state(&state); -+ -+ for_each_insn(file, insn) { -+ if (insn->hint && !insn->visited) { -+ ret = validate_branch(file, insn, state); -+ warnings += ret; -+ } -+ } -+ -+ return warnings; -+} -+ -+static int validate_retpoline(struct objtool_file *file) -+{ -+ struct instruction *insn; -+ int warnings = 0; -+ -+ for_each_insn(file, insn) { -+ if (insn->type != INSN_JUMP_DYNAMIC && -+ insn->type != INSN_CALL_DYNAMIC) -+ continue; -+ -+ if (insn->retpoline_safe) -+ continue; -+ -+ /* -+ * .init.text code is ran before userspace and thus doesn't -+ * strictly need retpolines, except for modules which are -+ * loaded late, they very much do need retpoline in their -+ * .init.text -+ */ -+ if (!strcmp(insn->sec->name, ".init.text") && !module) -+ continue; -+ -+ WARN_FUNC("indirect %s found in RETPOLINE build", -+ insn->sec, insn->offset, -+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); -+ -+ warnings++; -+ } -+ -+ return warnings; -+} -+ - static bool is_kasan_insn(struct instruction *insn) - { - return (insn->type == INSN_CALL && -@@ -1150,12 +1991,23 @@ static bool is_ubsan_insn(struct instruction *insn) - "__ubsan_handle_builtin_unreachable")); - } - --static bool ignore_unreachable_insn(struct symbol *func, -- struct instruction *insn) -+static bool ignore_unreachable_insn(struct instruction *insn) - { - int i; - -- if (insn->type == INSN_NOP) -+ if (insn->ignore || insn->type == INSN_NOP) -+ return true; -+ -+ /* -+ * Ignore any unused exceptions. This can happen when a whitelisted -+ * function has an exception table entry. -+ * -+ * Also ignore alternative replacement instructions. This can happen -+ * when a whitelisted function uses one of the ALTERNATIVE macros. -+ */ -+ if (!strcmp(insn->sec->name, ".fixup") || -+ !strcmp(insn->sec->name, ".altinstr_replacement") || -+ !strcmp(insn->sec->name, ".altinstr_aux")) - return true; - - /* -@@ -1164,18 +2016,26 @@ static bool ignore_unreachable_insn(struct symbol *func, - * - * End the search at 5 instructions to avoid going into the weeds. - */ -+ if (!insn->func) -+ return false; - for (i = 0; i < 5; i++) { - - if (is_kasan_insn(insn) || is_ubsan_insn(insn)) - return true; - -- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { -- insn = insn->jump_dest; -- continue; -+ if (insn->type == INSN_JUMP_UNCONDITIONAL) { -+ if (insn->jump_dest && -+ insn->jump_dest->func == insn->func) { -+ insn = insn->jump_dest; -+ continue; -+ } -+ -+ break; - } - -- if (insn->offset + insn->len >= func->offset + func->len) -+ if (insn->offset + insn->len >= insn->func->offset + insn->func->len) - break; -+ - insn = list_next_entry(insn, list); - } - -@@ -1187,81 +2047,49 @@ static int validate_functions(struct objtool_file *file) - struct section *sec; - struct symbol *func; - struct instruction *insn; -+ struct insn_state state; - int ret, warnings = 0; - -- list_for_each_entry(sec, &file->elf->sections, list) { -+ clear_insn_state(&state); -+ -+ state.cfa = initial_func_cfi.cfa; -+ memcpy(&state.regs, &initial_func_cfi.regs, -+ CFI_NUM_REGS * sizeof(struct cfi_reg)); -+ state.stack_size = initial_func_cfi.cfa.offset; -+ -+ for_each_sec(file, sec) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; - - insn = find_insn(file, sec, func->offset); -- if (!insn) -+ if (!insn || insn->ignore) - continue; - -- ret = validate_branch(file, insn, 0); -+ ret = validate_branch(file, insn, state); - warnings += ret; - } - } - -- list_for_each_entry(sec, &file->elf->sections, list) { -- list_for_each_entry(func, &sec->symbol_list, list) { -- if (func->type != STT_FUNC) -- continue; -- -- func_for_each_insn(file, func, insn) { -- if (insn->visited) -- continue; -- -- insn->visited = true; -- -- if (file->ignore_unreachables || warnings || -- ignore_unreachable_insn(func, insn)) -- continue; -- -- /* -- * gcov produces a lot of unreachable -- * instructions. If we get an unreachable -- * warning and the file has gcov enabled, just -- * ignore it, and all other such warnings for -- * the file. -- */ -- if (!file->ignore_unreachables && -- gcov_enabled(file)) { -- file->ignore_unreachables = true; -- continue; -- } -- -- WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); -- warnings++; -- } -- } -- } -- - return warnings; - } - --static int validate_uncallable_instructions(struct objtool_file *file) -+static int validate_reachable_instructions(struct objtool_file *file) - { - struct instruction *insn; -- int warnings = 0; - -- for_each_insn(file, insn) { -- if (!insn->visited && insn->type == INSN_RETURN) { -+ if (file->ignore_unreachables) -+ return 0; - -- /* -- * Don't warn about call instructions in unvisited -- * retpoline alternatives. -- */ -- if (!strcmp(insn->sec->name, ".altinstr_replacement")) -- continue; -+ for_each_insn(file, insn) { -+ if (insn->visited || ignore_unreachable_insn(insn)) -+ continue; - -- WARN_FUNC("return instruction outside of a callable function", -- insn->sec, insn->offset); -- warnings++; -- } -+ WARN_FUNC("unreachable instruction", insn->sec, insn->offset); -+ return 1; - } - -- return warnings; -+ return 0; - } - - static void cleanup(struct objtool_file *file) -@@ -1281,42 +2109,73 @@ static void cleanup(struct objtool_file *file) - elf_close(file->elf); - } - --int check(const char *_objname, bool _nofp) -+int check(const char *_objname, bool orc) - { - struct objtool_file file; - int ret, warnings = 0; - - objname = _objname; -- nofp = _nofp; - -- file.elf = elf_open(objname); -- if (!file.elf) { -- fprintf(stderr, "error reading elf file %s\n", objname); -+ file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); -+ if (!file.elf) - return 1; -- } - - INIT_LIST_HEAD(&file.insn_list); - hash_init(file.insn_hash); - file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); - file.rodata = find_section_by_name(file.elf, ".rodata"); -- file.ignore_unreachables = false; - file.c_file = find_section_by_name(file.elf, ".comment"); -+ file.ignore_unreachables = no_unreachable; -+ file.hints = false; -+ -+ arch_initial_func_cfi_state(&initial_func_cfi); - - ret = decode_sections(&file); - if (ret < 0) - goto out; - warnings += ret; - -+ if (list_empty(&file.insn_list)) -+ goto out; -+ -+ if (retpoline) { -+ ret = validate_retpoline(&file); -+ if (ret < 0) -+ return ret; -+ warnings += ret; -+ } -+ - ret = validate_functions(&file); - if (ret < 0) - goto out; - warnings += ret; - -- ret = validate_uncallable_instructions(&file); -+ ret = validate_unwind_hints(&file); - if (ret < 0) - goto out; - warnings += ret; - -+ if (!warnings) { -+ ret = validate_reachable_instructions(&file); -+ if (ret < 0) -+ goto out; -+ warnings += ret; -+ } -+ -+ if (orc) { -+ ret = create_orc(&file); -+ if (ret < 0) -+ goto out; -+ -+ ret = create_orc_sections(&file); -+ if (ret < 0) -+ goto out; -+ -+ ret = elf_write(file.elf); -+ if (ret < 0) -+ goto out; -+ } -+ - out: - cleanup(&file); - -diff --git a/tools/objtool/check.h b/tools/objtool/check.h -index aca248a..c6b68fc 100644 ---- a/tools/objtool/check.h -+++ b/tools/objtool/check.h -@@ -20,22 +20,40 @@ - - #include <stdbool.h> - #include "elf.h" -+#include "cfi.h" - #include "arch.h" -+#include "orc.h" - #include <linux/hashtable.h> - -+struct insn_state { -+ struct cfi_reg cfa; -+ struct cfi_reg regs[CFI_NUM_REGS]; -+ int stack_size; -+ unsigned char type; -+ bool bp_scratch; -+ bool drap; -+ int drap_reg, drap_offset; -+ struct cfi_reg vals[CFI_NUM_REGS]; -+}; -+ - struct instruction { - struct list_head list; - struct hlist_node hash; - struct section *sec; - unsigned long offset; -- unsigned int len, state; -+ unsigned int len; - unsigned char type; - unsigned long immediate; -- bool alt_group, visited, dead_end, ignore_alts; -+ bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; -+ bool retpoline_safe; - struct symbol *call_dest; - struct instruction *jump_dest; -+ struct instruction *first_jump_src; - struct list_head alts; - struct symbol *func; -+ struct stack_op stack_op; -+ struct insn_state state; -+ struct orc_entry orc; - }; - - struct objtool_file { -@@ -43,9 +61,22 @@ struct objtool_file { - struct list_head insn_list; - DECLARE_HASHTABLE(insn_hash, 16); - struct section *rodata, *whitelist; -- bool ignore_unreachables, c_file; -+ bool ignore_unreachables, c_file, hints; - }; - --int check(const char *objname, bool nofp); -+int check(const char *objname, bool orc); -+ -+struct instruction *find_insn(struct objtool_file *file, -+ struct section *sec, unsigned long offset); -+ -+#define for_each_insn(file, insn) \ -+ list_for_each_entry(insn, &file->insn_list, list) -+ -+#define sec_for_each_insn(file, sec, insn) \ -+ for (insn = find_insn(file, sec, 0); \ -+ insn && &insn->list != &file->insn_list && \ -+ insn->sec == sec; \ -+ insn = list_next_entry(insn, list)) -+ - - #endif /* _CHECK_H */ -diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c -index 14a74d4..b31b7a6 100644 ---- a/tools/objtool/elf.c -+++ b/tools/objtool/elf.c -@@ -31,13 +31,6 @@ - #include "elf.h" - #include "warn.h" - --/* -- * Fallback for systems without this "read, mmaping if possible" cmd. -- */ --#ifndef ELF_C_READ_MMAP --#define ELF_C_READ_MMAP ELF_C_READ --#endif -- - struct section *find_section_by_name(struct elf *elf, const char *name) - { - struct section *sec; -@@ -128,12 +121,12 @@ static int read_sections(struct elf *elf) - int i; - - if (elf_getshdrnum(elf->elf, §ions_nr)) { -- perror("elf_getshdrnum"); -+ WARN_ELF("elf_getshdrnum"); - return -1; - } - - if (elf_getshdrstrndx(elf->elf, &shstrndx)) { -- perror("elf_getshdrstrndx"); -+ WARN_ELF("elf_getshdrstrndx"); - return -1; - } - -@@ -154,37 +147,37 @@ static int read_sections(struct elf *elf) - - s = elf_getscn(elf->elf, i); - if (!s) { -- perror("elf_getscn"); -+ WARN_ELF("elf_getscn"); - return -1; - } - - sec->idx = elf_ndxscn(s); - - if (!gelf_getshdr(s, &sec->sh)) { -- perror("gelf_getshdr"); -+ WARN_ELF("gelf_getshdr"); - return -1; - } - - sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name); - if (!sec->name) { -- perror("elf_strptr"); -- return -1; -- } -- -- sec->elf_data = elf_getdata(s, NULL); -- if (!sec->elf_data) { -- perror("elf_getdata"); -+ WARN_ELF("elf_strptr"); - return -1; - } - -- if (sec->elf_data->d_off != 0 || -- sec->elf_data->d_size != sec->sh.sh_size) { -- WARN("unexpected data attributes for %s", sec->name); -- return -1; -+ if (sec->sh.sh_size != 0) { -+ sec->data = elf_getdata(s, NULL); -+ if (!sec->data) { -+ WARN_ELF("elf_getdata"); -+ return -1; -+ } -+ if (sec->data->d_off != 0 || -+ sec->data->d_size != sec->sh.sh_size) { -+ WARN("unexpected data attributes for %s", -+ sec->name); -+ return -1; -+ } - } -- -- sec->data = (unsigned long)sec->elf_data->d_buf; -- sec->len = sec->elf_data->d_size; -+ sec->len = sec->sh.sh_size; - } - - /* sanity check, one more call to elf_nextscn() should return NULL */ -@@ -221,15 +214,15 @@ static int read_symbols(struct elf *elf) - - sym->idx = i; - -- if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) { -- perror("gelf_getsym"); -+ if (!gelf_getsym(symtab->data, i, &sym->sym)) { -+ WARN_ELF("gelf_getsym"); - goto err; - } - - sym->name = elf_strptr(elf->elf, symtab->sh.sh_link, - sym->sym.st_name); - if (!sym->name) { -- perror("elf_strptr"); -+ WARN_ELF("elf_strptr"); - goto err; - } - -@@ -311,8 +304,8 @@ static int read_relas(struct elf *elf) - } - memset(rela, 0, sizeof(*rela)); - -- if (!gelf_getrela(sec->elf_data, i, &rela->rela)) { -- perror("gelf_getrela"); -+ if (!gelf_getrela(sec->data, i, &rela->rela)) { -+ WARN_ELF("gelf_getrela"); - return -1; - } - -@@ -336,9 +329,10 @@ static int read_relas(struct elf *elf) - return 0; - } - --struct elf *elf_open(const char *name) -+struct elf *elf_open(const char *name, int flags) - { - struct elf *elf; -+ Elf_Cmd cmd; - - elf_version(EV_CURRENT); - -@@ -351,27 +345,28 @@ struct elf *elf_open(const char *name) - - INIT_LIST_HEAD(&elf->sections); - -- elf->name = strdup(name); -- if (!elf->name) { -- perror("strdup"); -- goto err; -- } -- -- elf->fd = open(name, O_RDONLY); -+ elf->fd = open(name, flags); - if (elf->fd == -1) { - fprintf(stderr, "objtool: Can't open '%s': %s\n", - name, strerror(errno)); - goto err; - } - -- elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL); -+ if ((flags & O_ACCMODE) == O_RDONLY) -+ cmd = ELF_C_READ_MMAP; -+ else if ((flags & O_ACCMODE) == O_RDWR) -+ cmd = ELF_C_RDWR; -+ else /* O_WRONLY */ -+ cmd = ELF_C_WRITE; -+ -+ elf->elf = elf_begin(elf->fd, cmd, NULL); - if (!elf->elf) { -- perror("elf_begin"); -+ WARN_ELF("elf_begin"); - goto err; - } - - if (!gelf_getehdr(elf->elf, &elf->ehdr)) { -- perror("gelf_getehdr"); -+ WARN_ELF("gelf_getehdr"); - goto err; - } - -@@ -391,12 +386,212 @@ struct elf *elf_open(const char *name) - return NULL; - } - -+struct section *elf_create_section(struct elf *elf, const char *name, -+ size_t entsize, int nr) -+{ -+ struct section *sec, *shstrtab; -+ size_t size = entsize * nr; -+ struct Elf_Scn *s; -+ Elf_Data *data; -+ -+ sec = malloc(sizeof(*sec)); -+ if (!sec) { -+ perror("malloc"); -+ return NULL; -+ } -+ memset(sec, 0, sizeof(*sec)); -+ -+ INIT_LIST_HEAD(&sec->symbol_list); -+ INIT_LIST_HEAD(&sec->rela_list); -+ hash_init(sec->rela_hash); -+ hash_init(sec->symbol_hash); -+ -+ list_add_tail(&sec->list, &elf->sections); -+ -+ s = elf_newscn(elf->elf); -+ if (!s) { -+ WARN_ELF("elf_newscn"); -+ return NULL; -+ } -+ -+ sec->name = strdup(name); -+ if (!sec->name) { -+ perror("strdup"); -+ return NULL; -+ } -+ -+ sec->idx = elf_ndxscn(s); -+ sec->len = size; -+ sec->changed = true; -+ -+ sec->data = elf_newdata(s); -+ if (!sec->data) { -+ WARN_ELF("elf_newdata"); -+ return NULL; -+ } -+ -+ sec->data->d_size = size; -+ sec->data->d_align = 1; -+ -+ if (size) { -+ sec->data->d_buf = malloc(size); -+ if (!sec->data->d_buf) { -+ perror("malloc"); -+ return NULL; -+ } -+ memset(sec->data->d_buf, 0, size); -+ } -+ -+ if (!gelf_getshdr(s, &sec->sh)) { -+ WARN_ELF("gelf_getshdr"); -+ return NULL; -+ } -+ -+ sec->sh.sh_size = size; -+ sec->sh.sh_entsize = entsize; -+ sec->sh.sh_type = SHT_PROGBITS; -+ sec->sh.sh_addralign = 1; -+ sec->sh.sh_flags = SHF_ALLOC; -+ -+ -+ /* Add section name to .shstrtab */ -+ shstrtab = find_section_by_name(elf, ".shstrtab"); -+ if (!shstrtab) { -+ WARN("can't find .shstrtab section"); -+ return NULL; -+ } -+ -+ s = elf_getscn(elf->elf, shstrtab->idx); -+ if (!s) { -+ WARN_ELF("elf_getscn"); -+ return NULL; -+ } -+ -+ data = elf_newdata(s); -+ if (!data) { -+ WARN_ELF("elf_newdata"); -+ return NULL; -+ } -+ -+ data->d_buf = sec->name; -+ data->d_size = strlen(name) + 1; -+ data->d_align = 1; -+ -+ sec->sh.sh_name = shstrtab->len; -+ -+ shstrtab->len += strlen(name) + 1; -+ shstrtab->changed = true; -+ -+ return sec; -+} -+ -+struct section *elf_create_rela_section(struct elf *elf, struct section *base) -+{ -+ char *relaname; -+ struct section *sec; -+ -+ relaname = malloc(strlen(base->name) + strlen(".rela") + 1); -+ if (!relaname) { -+ perror("malloc"); -+ return NULL; -+ } -+ strcpy(relaname, ".rela"); -+ strcat(relaname, base->name); -+ -+ sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0); -+ free(relaname); -+ if (!sec) -+ return NULL; -+ -+ base->rela = sec; -+ sec->base = base; -+ -+ sec->sh.sh_type = SHT_RELA; -+ sec->sh.sh_addralign = 8; -+ sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx; -+ sec->sh.sh_info = base->idx; -+ sec->sh.sh_flags = SHF_INFO_LINK; -+ -+ return sec; -+} -+ -+int elf_rebuild_rela_section(struct section *sec) -+{ -+ struct rela *rela; -+ int nr, idx = 0, size; -+ GElf_Rela *relas; -+ -+ nr = 0; -+ list_for_each_entry(rela, &sec->rela_list, list) -+ nr++; -+ -+ size = nr * sizeof(*relas); -+ relas = malloc(size); -+ if (!relas) { -+ perror("malloc"); -+ return -1; -+ } -+ -+ sec->data->d_buf = relas; -+ sec->data->d_size = size; -+ -+ sec->sh.sh_size = size; -+ -+ idx = 0; -+ list_for_each_entry(rela, &sec->rela_list, list) { -+ relas[idx].r_offset = rela->offset; -+ relas[idx].r_addend = rela->addend; -+ relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type); -+ idx++; -+ } -+ -+ return 0; -+} -+ -+int elf_write(struct elf *elf) -+{ -+ struct section *sec; -+ Elf_Scn *s; -+ -+ /* Update section headers for changed sections: */ -+ list_for_each_entry(sec, &elf->sections, list) { -+ if (sec->changed) { -+ s = elf_getscn(elf->elf, sec->idx); -+ if (!s) { -+ WARN_ELF("elf_getscn"); -+ return -1; -+ } -+ if (!gelf_update_shdr(s, &sec->sh)) { -+ WARN_ELF("gelf_update_shdr"); -+ return -1; -+ } -+ } -+ } -+ -+ /* Make sure the new section header entries get updated properly. */ -+ elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY); -+ -+ /* Write all changes to the file. */ -+ if (elf_update(elf->elf, ELF_C_WRITE) < 0) { -+ WARN_ELF("elf_update"); -+ return -1; -+ } -+ -+ return 0; -+} -+ - void elf_close(struct elf *elf) - { - struct section *sec, *tmpsec; - struct symbol *sym, *tmpsym; - struct rela *rela, *tmprela; - -+ if (elf->elf) -+ elf_end(elf->elf); -+ -+ if (elf->fd > 0) -+ close(elf->fd); -+ - list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) { - list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) { - list_del(&sym->list); -@@ -411,11 +606,6 @@ void elf_close(struct elf *elf) - list_del(&sec->list); - free(sec); - } -- if (elf->name) -- free(elf->name); -- if (elf->fd > 0) -- close(elf->fd); -- if (elf->elf) -- elf_end(elf->elf); -+ - free(elf); - } -diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h -index aa1ff65..440b83b 100644 ---- a/tools/objtool/elf.h -+++ b/tools/objtool/elf.h -@@ -28,6 +28,13 @@ - # define elf_getshdrstrndx elf_getshstrndx - #endif - -+/* -+ * Fallback for systems without this "read, mmaping if possible" cmd. -+ */ -+#ifndef ELF_C_READ_MMAP -+#define ELF_C_READ_MMAP ELF_C_READ -+#endif -+ - struct section { - struct list_head list; - GElf_Shdr sh; -@@ -37,11 +44,11 @@ struct section { - DECLARE_HASHTABLE(rela_hash, 16); - struct section *base, *rela; - struct symbol *sym; -- Elf_Data *elf_data; -+ Elf_Data *data; - char *name; - int idx; -- unsigned long data; - unsigned int len; -+ bool changed, text; - }; - - struct symbol { -@@ -76,15 +83,21 @@ struct elf { - }; - - --struct elf *elf_open(const char *name); -+struct elf *elf_open(const char *name, int flags); - struct section *find_section_by_name(struct elf *elf, const char *name); - struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); - struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); - struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, - unsigned int len); - struct symbol *find_containing_func(struct section *sec, unsigned long offset); -+struct section *elf_create_section(struct elf *elf, const char *name, size_t -+ entsize, int nr); -+struct section *elf_create_rela_section(struct elf *elf, struct section *base); -+int elf_rebuild_rela_section(struct section *sec); -+int elf_write(struct elf *elf); - void elf_close(struct elf *elf); - -- -+#define for_each_sec(file, sec) \ -+ list_for_each_entry(sec, &file->elf->sections, list) - - #endif /* _OBJTOOL_ELF_H */ -diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c -index 46c326d..07f3299 100644 ---- a/tools/objtool/objtool.c -+++ b/tools/objtool/objtool.c -@@ -31,11 +31,10 @@ - #include <stdlib.h> - #include <subcmd/exec-cmd.h> - #include <subcmd/pager.h> -+#include <linux/kernel.h> - - #include "builtin.h" - --#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) -- - struct cmd_struct { - const char *name; - int (*fn)(int, const char **); -@@ -43,10 +42,11 @@ struct cmd_struct { - }; - - static const char objtool_usage_string[] = -- "objtool [OPTIONS] COMMAND [ARGS]"; -+ "objtool COMMAND [ARGS]"; - - static struct cmd_struct objtool_cmds[] = { - {"check", cmd_check, "Perform stack metadata validation on an object file" }, -+ {"orc", cmd_orc, "Generate in-place ORC unwind tables for an object file" }, - }; - - bool help; -@@ -70,7 +70,7 @@ static void cmd_usage(void) - - printf("\n"); - -- exit(1); -+ exit(129); - } - - static void handle_options(int *argc, const char ***argv) -@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv) - break; - } else { - fprintf(stderr, "Unknown option: %s\n", cmd); -- fprintf(stderr, "\n Usage: %s\n", -- objtool_usage_string); -- exit(1); -+ cmd_usage(); - } - - (*argv)++; -diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h -new file mode 100644 -index 0000000..b0e92a6 ---- /dev/null -+++ b/tools/objtool/orc.h -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#ifndef _ORC_H -+#define _ORC_H -+ -+#include <asm/orc_types.h> -+ -+struct objtool_file; -+ -+int create_orc(struct objtool_file *file); -+int create_orc_sections(struct objtool_file *file); -+ -+int orc_dump(const char *objname); -+ -+#endif /* _ORC_H */ -diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c -new file mode 100644 -index 0000000..c334382 ---- /dev/null -+++ b/tools/objtool/orc_dump.c -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#include <unistd.h> -+#include "orc.h" -+#include "warn.h" -+ -+static const char *reg_name(unsigned int reg) -+{ -+ switch (reg) { -+ case ORC_REG_PREV_SP: -+ return "prevsp"; -+ case ORC_REG_DX: -+ return "dx"; -+ case ORC_REG_DI: -+ return "di"; -+ case ORC_REG_BP: -+ return "bp"; -+ case ORC_REG_SP: -+ return "sp"; -+ case ORC_REG_R10: -+ return "r10"; -+ case ORC_REG_R13: -+ return "r13"; -+ case ORC_REG_BP_INDIRECT: -+ return "bp(ind)"; -+ case ORC_REG_SP_INDIRECT: -+ return "sp(ind)"; -+ default: -+ return "?"; -+ } -+} -+ -+static const char *orc_type_name(unsigned int type) -+{ -+ switch (type) { -+ case ORC_TYPE_CALL: -+ return "call"; -+ case ORC_TYPE_REGS: -+ return "regs"; -+ case ORC_TYPE_REGS_IRET: -+ return "iret"; -+ default: -+ return "?"; -+ } -+} -+ -+static void print_reg(unsigned int reg, int offset) -+{ -+ if (reg == ORC_REG_BP_INDIRECT) -+ printf("(bp%+d)", offset); -+ else if (reg == ORC_REG_SP_INDIRECT) -+ printf("(sp%+d)", offset); -+ else if (reg == ORC_REG_UNDEFINED) -+ printf("(und)"); -+ else -+ printf("%s%+d", reg_name(reg), offset); -+} -+ -+int orc_dump(const char *_objname) -+{ -+ int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0; -+ struct orc_entry *orc = NULL; -+ char *name; -+ size_t nr_sections; -+ Elf64_Addr orc_ip_addr = 0; -+ size_t shstrtab_idx; -+ Elf *elf; -+ Elf_Scn *scn; -+ GElf_Shdr sh; -+ GElf_Rela rela; -+ GElf_Sym sym; -+ Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL; -+ -+ -+ objname = _objname; -+ -+ elf_version(EV_CURRENT); -+ -+ fd = open(objname, O_RDONLY); -+ if (fd == -1) { -+ perror("open"); -+ return -1; -+ } -+ -+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); -+ if (!elf) { -+ WARN_ELF("elf_begin"); -+ return -1; -+ } -+ -+ if (elf_getshdrnum(elf, &nr_sections)) { -+ WARN_ELF("elf_getshdrnum"); -+ return -1; -+ } -+ -+ if (elf_getshdrstrndx(elf, &shstrtab_idx)) { -+ WARN_ELF("elf_getshdrstrndx"); -+ return -1; -+ } -+ -+ for (i = 0; i < nr_sections; i++) { -+ scn = elf_getscn(elf, i); -+ if (!scn) { -+ WARN_ELF("elf_getscn"); -+ return -1; -+ } -+ -+ if (!gelf_getshdr(scn, &sh)) { -+ WARN_ELF("gelf_getshdr"); -+ return -1; -+ } -+ -+ name = elf_strptr(elf, shstrtab_idx, sh.sh_name); -+ if (!name) { -+ WARN_ELF("elf_strptr"); -+ return -1; -+ } -+ -+ data = elf_getdata(scn, NULL); -+ if (!data) { -+ WARN_ELF("elf_getdata"); -+ return -1; -+ } -+ -+ if (!strcmp(name, ".symtab")) { -+ symtab = data; -+ } else if (!strcmp(name, ".orc_unwind")) { -+ orc = data->d_buf; -+ orc_size = sh.sh_size; -+ } else if (!strcmp(name, ".orc_unwind_ip")) { -+ orc_ip = data->d_buf; -+ orc_ip_addr = sh.sh_addr; -+ } else if (!strcmp(name, ".rela.orc_unwind_ip")) { -+ rela_orc_ip = data; -+ } -+ } -+ -+ if (!symtab || !orc || !orc_ip) -+ return 0; -+ -+ if (orc_size % sizeof(*orc) != 0) { -+ WARN("bad .orc_unwind section size"); -+ return -1; -+ } -+ -+ nr_entries = orc_size / sizeof(*orc); -+ for (i = 0; i < nr_entries; i++) { -+ if (rela_orc_ip) { -+ if (!gelf_getrela(rela_orc_ip, i, &rela)) { -+ WARN_ELF("gelf_getrela"); -+ return -1; -+ } -+ -+ if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) { -+ WARN_ELF("gelf_getsym"); -+ return -1; -+ } -+ -+ scn = elf_getscn(elf, sym.st_shndx); -+ if (!scn) { -+ WARN_ELF("elf_getscn"); -+ return -1; -+ } -+ -+ if (!gelf_getshdr(scn, &sh)) { -+ WARN_ELF("gelf_getshdr"); -+ return -1; -+ } -+ -+ name = elf_strptr(elf, shstrtab_idx, sh.sh_name); -+ if (!name || !*name) { -+ WARN_ELF("elf_strptr"); -+ return -1; -+ } -+ -+ printf("%s+%llx:", name, (unsigned long long)rela.r_addend); -+ -+ } else { -+ printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i])); -+ } -+ -+ -+ printf(" sp:"); -+ -+ print_reg(orc[i].sp_reg, orc[i].sp_offset); -+ -+ printf(" bp:"); -+ -+ print_reg(orc[i].bp_reg, orc[i].bp_offset); -+ -+ printf(" type:%s\n", orc_type_name(orc[i].type)); -+ } -+ -+ elf_end(elf); -+ close(fd); -+ -+ return 0; -+} -diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c -new file mode 100644 -index 0000000..18384d9 ---- /dev/null -+++ b/tools/objtool/orc_gen.c -@@ -0,0 +1,221 @@ -+/* -+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, see <http://www.gnu.org/licenses/>. -+ */ -+ -+#include <stdlib.h> -+#include <string.h> -+ -+#include "orc.h" -+#include "check.h" -+#include "warn.h" -+ -+int create_orc(struct objtool_file *file) -+{ -+ struct instruction *insn; -+ -+ for_each_insn(file, insn) { -+ struct orc_entry *orc = &insn->orc; -+ struct cfi_reg *cfa = &insn->state.cfa; -+ struct cfi_reg *bp = &insn->state.regs[CFI_BP]; -+ -+ if (cfa->base == CFI_UNDEFINED) { -+ orc->sp_reg = ORC_REG_UNDEFINED; -+ continue; -+ } -+ -+ switch (cfa->base) { -+ case CFI_SP: -+ orc->sp_reg = ORC_REG_SP; -+ break; -+ case CFI_SP_INDIRECT: -+ orc->sp_reg = ORC_REG_SP_INDIRECT; -+ break; -+ case CFI_BP: -+ orc->sp_reg = ORC_REG_BP; -+ break; -+ case CFI_BP_INDIRECT: -+ orc->sp_reg = ORC_REG_BP_INDIRECT; -+ break; -+ case CFI_R10: -+ orc->sp_reg = ORC_REG_R10; -+ break; -+ case CFI_R13: -+ orc->sp_reg = ORC_REG_R13; -+ break; -+ case CFI_DI: -+ orc->sp_reg = ORC_REG_DI; -+ break; -+ case CFI_DX: -+ orc->sp_reg = ORC_REG_DX; -+ break; -+ default: -+ WARN_FUNC("unknown CFA base reg %d", -+ insn->sec, insn->offset, cfa->base); -+ return -1; -+ } -+ -+ switch(bp->base) { -+ case CFI_UNDEFINED: -+ orc->bp_reg = ORC_REG_UNDEFINED; -+ break; -+ case CFI_CFA: -+ orc->bp_reg = ORC_REG_PREV_SP; -+ break; -+ case CFI_BP: -+ orc->bp_reg = ORC_REG_BP; -+ break; -+ default: -+ WARN_FUNC("unknown BP base reg %d", -+ insn->sec, insn->offset, bp->base); -+ return -1; -+ } -+ -+ orc->sp_offset = cfa->offset; -+ orc->bp_offset = bp->offset; -+ orc->type = insn->state.type; -+ } -+ -+ return 0; -+} -+ -+static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, -+ unsigned int idx, struct section *insn_sec, -+ unsigned long insn_off, struct orc_entry *o) -+{ -+ struct orc_entry *orc; -+ struct rela *rela; -+ -+ if (!insn_sec->sym) { -+ WARN("missing symbol for section %s", insn_sec->name); -+ return -1; -+ } -+ -+ /* populate ORC data */ -+ orc = (struct orc_entry *)u_sec->data->d_buf + idx; -+ memcpy(orc, o, sizeof(*orc)); -+ -+ /* populate rela for ip */ -+ rela = malloc(sizeof(*rela)); -+ if (!rela) { -+ perror("malloc"); -+ return -1; -+ } -+ memset(rela, 0, sizeof(*rela)); -+ -+ rela->sym = insn_sec->sym; -+ rela->addend = insn_off; -+ rela->type = R_X86_64_PC32; -+ rela->offset = idx * sizeof(int); -+ -+ list_add_tail(&rela->list, &ip_relasec->rela_list); -+ hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset); -+ -+ return 0; -+} -+ -+int create_orc_sections(struct objtool_file *file) -+{ -+ struct instruction *insn, *prev_insn; -+ struct section *sec, *u_sec, *ip_relasec; -+ unsigned int idx; -+ -+ struct orc_entry empty = { -+ .sp_reg = ORC_REG_UNDEFINED, -+ .bp_reg = ORC_REG_UNDEFINED, -+ .type = ORC_TYPE_CALL, -+ }; -+ -+ sec = find_section_by_name(file->elf, ".orc_unwind"); -+ if (sec) { -+ WARN("file already has .orc_unwind section, skipping"); -+ return -1; -+ } -+ -+ /* count the number of needed orcs */ -+ idx = 0; -+ for_each_sec(file, sec) { -+ if (!sec->text) -+ continue; -+ -+ prev_insn = NULL; -+ sec_for_each_insn(file, sec, insn) { -+ if (!prev_insn || -+ memcmp(&insn->orc, &prev_insn->orc, -+ sizeof(struct orc_entry))) { -+ idx++; -+ } -+ prev_insn = insn; -+ } -+ -+ /* section terminator */ -+ if (prev_insn) -+ idx++; -+ } -+ if (!idx) -+ return -1; -+ -+ -+ /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ -+ sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); -+ if (!sec) -+ return -1; -+ -+ ip_relasec = elf_create_rela_section(file->elf, sec); -+ if (!ip_relasec) -+ return -1; -+ -+ /* create .orc_unwind section */ -+ u_sec = elf_create_section(file->elf, ".orc_unwind", -+ sizeof(struct orc_entry), idx); -+ -+ /* populate sections */ -+ idx = 0; -+ for_each_sec(file, sec) { -+ if (!sec->text) -+ continue; -+ -+ prev_insn = NULL; -+ sec_for_each_insn(file, sec, insn) { -+ if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, -+ sizeof(struct orc_entry))) { -+ -+ if (create_orc_entry(u_sec, ip_relasec, idx, -+ insn->sec, insn->offset, -+ &insn->orc)) -+ return -1; -+ -+ idx++; -+ } -+ prev_insn = insn; -+ } -+ -+ /* section terminator */ -+ if (prev_insn) { -+ if (create_orc_entry(u_sec, ip_relasec, idx, -+ prev_insn->sec, -+ prev_insn->offset + prev_insn->len, -+ &empty)) -+ return -1; -+ -+ idx++; -+ } -+ } -+ -+ if (elf_rebuild_rela_section(ip_relasec)) -+ return -1; -+ -+ return 0; -+} -diff --git a/tools/objtool/special.c b/tools/objtool/special.c -index bff8abb..84f001d 100644 ---- a/tools/objtool/special.c -+++ b/tools/objtool/special.c -@@ -91,16 +91,16 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, - alt->jump_or_nop = entry->jump_or_nop; - - if (alt->group) { -- alt->orig_len = *(unsigned char *)(sec->data + offset + -+ alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset + - entry->orig_len); -- alt->new_len = *(unsigned char *)(sec->data + offset + -+ alt->new_len = *(unsigned char *)(sec->data->d_buf + offset + - entry->new_len); - } - - if (entry->feature) { - unsigned short feature; - -- feature = *(unsigned short *)(sec->data + offset + -+ feature = *(unsigned short *)(sec->data->d_buf + offset + - entry->feature); - - /* -diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh -new file mode 100755 -index 0000000..1470e74 ---- /dev/null -+++ b/tools/objtool/sync-check.sh -@@ -0,0 +1,29 @@ -+#!/bin/sh -+# SPDX-License-Identifier: GPL-2.0 -+ -+FILES=' -+arch/x86/lib/insn.c -+arch/x86/lib/inat.c -+arch/x86/lib/x86-opcode-map.txt -+arch/x86/tools/gen-insn-attr-x86.awk -+arch/x86/include/asm/insn.h -+arch/x86/include/asm/inat.h -+arch/x86/include/asm/inat_types.h -+arch/x86/include/asm/orc_types.h -+' -+ -+check() -+{ -+ local file=$1 -+ -+ diff $file ../../$file > /dev/null || -+ echo "Warning: synced file at 'tools/objtool/$file' differs from latest kernel version at '$file'" -+} -+ -+if [ ! -d ../../kernel ] || [ ! -d ../../tools ] || [ ! -d ../objtool ]; then -+ exit 0 -+fi -+ -+for i in $FILES; do -+ check $i -+done -diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h -index ac7e075..afd9f7a 100644 ---- a/tools/objtool/warn.h -+++ b/tools/objtool/warn.h -@@ -18,6 +18,13 @@ - #ifndef _WARN_H - #define _WARN_H - -+#include <stdlib.h> -+#include <string.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <fcntl.h> -+#include "elf.h" -+ - extern const char *objname; - - static inline char *offstr(struct section *sec, unsigned long offset) -@@ -57,4 +64,7 @@ static inline char *offstr(struct section *sec, unsigned long offset) - free(_str); \ - }) - -+#define WARN_ELF(format, ...) \ -+ WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) -+ - #endif /* _WARN_H */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch deleted file mode 100644 index 6bdeb9f8..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch +++ /dev/null @@ -1,316 +0,0 @@ -From 1a39c1b7d192d62e6d4203ea7acfc35eb3317c48 Mon Sep 17 00:00:00 2001 -From: Josh Poimboeuf <jpoimboe@redhat.com> -Date: Wed, 28 Jun 2017 10:11:06 -0500 -Subject: [PATCH 89/93] objtool, x86: Add several functions and files to the - objtool whitelist - -commit c207aee48037abca71c669cbec407b9891965c34 upstream. - -In preparation for an objtool rewrite which will have broader checks, -whitelist functions and files which cause problems because they do -unusual things with the stack. - -These whitelists serve as a TODO list for which functions and files -don't yet have undwarf unwinder coverage. Eventually most of the -whitelists can be removed in favor of manual CFI hint annotations or -objtool improvements. - -Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> -Cc: Andy Lutomirski <luto@kernel.org> -Cc: Jiri Slaby <jslaby@suse.cz> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: live-patching@vger.kernel.org -Link: http://lkml.kernel.org/r/7f934a5d707a574bda33ea282e9478e627fb1829.1498659915.git.jpoimboe@redhat.com -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/crypto/Makefile | 2 ++ - arch/x86/crypto/sha1-mb/Makefile | 2 ++ - arch/x86/crypto/sha256-mb/Makefile | 2 ++ - arch/x86/kernel/Makefile | 1 + - arch/x86/kernel/acpi/Makefile | 2 ++ - arch/x86/kernel/kprobes/opt.c | 9 ++++++++- - arch/x86/kernel/reboot.c | 2 ++ - arch/x86/kvm/svm.c | 2 ++ - arch/x86/kvm/vmx.c | 3 +++ - arch/x86/lib/msr-reg.S | 8 ++++---- - arch/x86/net/Makefile | 2 ++ - arch/x86/platform/efi/Makefile | 1 + - arch/x86/power/Makefile | 2 ++ - arch/x86/xen/Makefile | 3 +++ - kernel/kexec_core.c | 4 +++- - 15 files changed, 39 insertions(+), 6 deletions(-) - -diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile -index 34b3fa2..9e32d40 100644 ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -2,6 +2,8 @@ - # Arch-specific CryptoAPI modules. - # - -+OBJECT_FILES_NON_STANDARD := y -+ - avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) - avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) -diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile -index 2f87563..2e14acc 100644 ---- a/arch/x86/crypto/sha1-mb/Makefile -+++ b/arch/x86/crypto/sha1-mb/Makefile -@@ -2,6 +2,8 @@ - # Arch-specific CryptoAPI modules. - # - -+OBJECT_FILES_NON_STANDARD := y -+ - avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) - ifeq ($(avx2_supported),yes) -diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile -index 41089e7..45b4fca 100644 ---- a/arch/x86/crypto/sha256-mb/Makefile -+++ b/arch/x86/crypto/sha256-mb/Makefile -@@ -2,6 +2,8 @@ - # Arch-specific CryptoAPI modules. - # - -+OBJECT_FILES_NON_STANDARD := y -+ - avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) - ifeq ($(avx2_supported),yes) -diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile -index 79076d7..4c9c615 100644 ---- a/arch/x86/kernel/Makefile -+++ b/arch/x86/kernel/Makefile -@@ -29,6 +29,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y - OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y - OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y - OBJECT_FILES_NON_STANDARD_test_nx.o := y -+OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y - - # If instrumentation of this dir is enabled, boot hangs during first second. - # Probably could be more selective here, but note that files related to irqs, -diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile -index 26b78d8..85a9e17 100644 ---- a/arch/x86/kernel/acpi/Makefile -+++ b/arch/x86/kernel/acpi/Makefile -@@ -1,3 +1,5 @@ -+OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y -+ - obj-$(CONFIG_ACPI) += boot.o - obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o - obj-$(CONFIG_ACPI_APEI) += apei.o -diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c -index 90f8cd6..6a7b428 100644 ---- a/arch/x86/kernel/kprobes/opt.c -+++ b/arch/x86/kernel/kprobes/opt.c -@@ -28,6 +28,7 @@ - #include <linux/kdebug.h> - #include <linux/kallsyms.h> - #include <linux/ftrace.h> -+#include <linux/frame.h> - - #include <asm/text-patching.h> - #include <asm/cacheflush.h> -@@ -91,6 +92,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) - } - - asm ( -+ "optprobe_template_func:\n" - ".global optprobe_template_entry\n" - "optprobe_template_entry:\n" - #ifdef CONFIG_X86_64 -@@ -128,7 +130,12 @@ asm ( - " popf\n" - #endif - ".global optprobe_template_end\n" -- "optprobe_template_end:\n"); -+ "optprobe_template_end:\n" -+ ".type optprobe_template_func, @function\n" -+ ".size optprobe_template_func, .-optprobe_template_func\n"); -+ -+void optprobe_template_func(void); -+STACK_FRAME_NON_STANDARD(optprobe_template_func); - - #define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) -diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c -index e244c19..acca20d 100644 ---- a/arch/x86/kernel/reboot.c -+++ b/arch/x86/kernel/reboot.c -@@ -9,6 +9,7 @@ - #include <linux/sched.h> - #include <linux/tboot.h> - #include <linux/delay.h> -+#include <linux/frame.h> - #include <acpi/reboot.h> - #include <asm/io.h> - #include <asm/apic.h> -@@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type) - #ifdef CONFIG_APM_MODULE - EXPORT_SYMBOL(machine_real_restart); - #endif -+STACK_FRAME_NON_STANDARD(machine_real_restart); - - /* - * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index c60d8fc..2672102 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -36,6 +36,7 @@ - #include <linux/slab.h> - #include <linux/amd-iommu.h> - #include <linux/hashtable.h> -+#include <linux/frame.h> - - #include <asm/apic.h> - #include <asm/perf_event.h> -@@ -5099,6 +5100,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) - - mark_all_clean(svm->vmcb); - } -+STACK_FRAME_NON_STANDARD(svm_vcpu_run); - - static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) - { -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 9307c0d..d39062c 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -33,6 +33,7 @@ - #include <linux/slab.h> - #include <linux/tboot.h> - #include <linux/hrtimer.h> -+#include <linux/frame.h> - #include <linux/nospec.h> - #include "kvm_cache_regs.h" - #include "x86.h" -@@ -8680,6 +8681,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) - ); - } - } -+STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); - - static bool vmx_has_emulated_msr(int index) - { -@@ -9120,6 +9122,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - vmx_recover_nmi_blocking(vmx); - vmx_complete_interrupts(vmx); - } -+STACK_FRAME_NON_STANDARD(vmx_vcpu_run); - - static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) - { -diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S -index c815564..10ffa7e 100644 ---- a/arch/x86/lib/msr-reg.S -+++ b/arch/x86/lib/msr-reg.S -@@ -13,14 +13,14 @@ - .macro op_safe_regs op - ENTRY(\op\()_safe_regs) - pushq %rbx -- pushq %rbp -+ pushq %r12 - movq %rdi, %r10 /* Save pointer */ - xorl %r11d, %r11d /* Return value */ - movl (%rdi), %eax - movl 4(%rdi), %ecx - movl 8(%rdi), %edx - movl 12(%rdi), %ebx -- movl 20(%rdi), %ebp -+ movl 20(%rdi), %r12d - movl 24(%rdi), %esi - movl 28(%rdi), %edi - 1: \op -@@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs) - movl %ecx, 4(%r10) - movl %edx, 8(%r10) - movl %ebx, 12(%r10) -- movl %ebp, 20(%r10) -+ movl %r12d, 20(%r10) - movl %esi, 24(%r10) - movl %edi, 28(%r10) -- popq %rbp -+ popq %r12 - popq %rbx - ret - 3: -diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile -index 90568c3..fefb4b6 100644 ---- a/arch/x86/net/Makefile -+++ b/arch/x86/net/Makefile -@@ -1,4 +1,6 @@ - # - # Arch-specific network modules - # -+OBJECT_FILES_NON_STANDARD_bpf_jit.o += y -+ - obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o -diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile -index 066619b..7a25502 100644 ---- a/arch/x86/platform/efi/Makefile -+++ b/arch/x86/platform/efi/Makefile -@@ -1,4 +1,5 @@ - OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y -+OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y - - obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o - obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o -diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile -index a6a198c..0504187 100644 ---- a/arch/x86/power/Makefile -+++ b/arch/x86/power/Makefile -@@ -1,3 +1,5 @@ -+OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y -+ - # __restore_processor_state() restores %gs after S3 resume and so should not - # itself be stack-protected - nostackp := $(call cc-option, -fno-stack-protector) -diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile -index e47e527..4a54059 100644 ---- a/arch/x86/xen/Makefile -+++ b/arch/x86/xen/Makefile -@@ -1,3 +1,6 @@ -+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y -+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y -+ - ifdef CONFIG_FUNCTION_TRACER - # Do not profile debug and lowlevel utilities - CFLAGS_REMOVE_spinlock.o = -pg -diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c -index 5616755..f5ab72e 100644 ---- a/kernel/kexec_core.c -+++ b/kernel/kexec_core.c -@@ -38,6 +38,7 @@ - #include <linux/syscore_ops.h> - #include <linux/compiler.h> - #include <linux/hugetlb.h> -+#include <linux/frame.h> - - #include <asm/page.h> - #include <asm/sections.h> -@@ -878,7 +879,7 @@ int kexec_load_disabled; - * only when panic_cpu holds the current CPU number; this is the only CPU - * which processes crash_kexec routines. - */ --void __crash_kexec(struct pt_regs *regs) -+void __noclone __crash_kexec(struct pt_regs *regs) - { - /* Take the kexec_mutex here to prevent sys_kexec_load - * running on one cpu from replacing the crash kernel -@@ -900,6 +901,7 @@ void __crash_kexec(struct pt_regs *regs) - mutex_unlock(&kexec_mutex); - } - } -+STACK_FRAME_NON_STANDARD(__crash_kexec); - - void crash_kexec(struct pt_regs *regs) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-x86-pti-Document-fix-wrong-index.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-x86-pti-Document-fix-wrong-index.patch deleted file mode 100644 index 54d9b443..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-x86-pti-Document-fix-wrong-index.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 681e4c72f7d620f7ba9d806f5997d3aeb4442b18 Mon Sep 17 00:00:00 2001 -From: "zhenwei.pi" <zhenwei.pi@youruncloud.com> -Date: Thu, 18 Jan 2018 09:04:52 +0800 -Subject: [PATCH 089/103] x86/pti: Document fix wrong index - -commit 98f0fceec7f84d80bc053e49e596088573086421 upstream. - -In section <2. Runtime Cost>, fix wrong index. - -Signed-off-by: zhenwei.pi <zhenwei.pi@youruncloud.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: dave.hansen@linux.intel.com -Link: https://lkml.kernel.org/r/1516237492-27739-1-git-send-email-zhenwei.pi@youruncloud.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/x86/pti.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt -index d11eff6..5cd5843 100644 ---- a/Documentation/x86/pti.txt -+++ b/Documentation/x86/pti.txt -@@ -78,7 +78,7 @@ this protection comes at a cost: - non-PTI SYSCALL entry code, so requires mapping fewer - things into the userspace page tables. The downside is - that stacks must be switched at entry time. -- d. Global pages are disabled for all kernel structures not -+ c. Global pages are disabled for all kernel structures not - mapped into both kernel and userspace page tables. This - feature of the MMU allows different processes to share TLB - entries mapping the kernel. Losing the feature means more --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch deleted file mode 100644 index 5cfbebeb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch +++ /dev/null @@ -1,61 +0,0 @@ -From e06fc5b3f4c084453bad0d92f40888aad431d9bb Mon Sep 17 00:00:00 2001 -From: Andi Kleen <ak@linux.intel.com> -Date: Wed, 17 Jan 2018 14:53:28 -0800 -Subject: [PATCH 090/103] x86/retpoline: Optimize inline assembler for - vmexit_fill_RSB - -commit 3f7d875566d8e79c5e0b2c9a413e91b2c29e0854 upstream. - -The generated assembler for the C fill RSB inline asm operations has -several issues: - -- The C code sets up the loop register, which is then immediately - overwritten in __FILL_RETURN_BUFFER with the same value again. - -- The C code also passes in the iteration count in another register, which - is not used at all. - -Remove these two unnecessary operations. Just rely on the single constant -passed to the macro for the iterations. - -Signed-off-by: Andi Kleen <ak@linux.intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: David Woodhouse <dwmw@amazon.co.uk> -Cc: dave.hansen@intel.com -Cc: gregkh@linuxfoundation.org -Cc: torvalds@linux-foundation.org -Cc: arjan@linux.intel.com -Link: https://lkml.kernel.org/r/20180117225328.15414-1-andi@firstfloor.org -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/include/asm/nospec-branch.h | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 19ba5ad..4ad4108 100644 ---- a/arch/x86/include/asm/nospec-branch.h -+++ b/arch/x86/include/asm/nospec-branch.h -@@ -206,16 +206,17 @@ extern char __indirect_thunk_end[]; - static inline void vmexit_fill_RSB(void) - { - #ifdef CONFIG_RETPOLINE -- unsigned long loops = RSB_CLEAR_LOOPS / 2; -+ unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" -- : "=&r" (loops), ASM_CALL_CONSTRAINT -- : "r" (loops) : "memory" ); -+ : "=r" (loops), ASM_CALL_CONSTRAINT -+ : : "memory" ); - #endif - } -+ - #endif /* __ASSEMBLY__ */ - #endif /* __NOSPEC_BRANCH_H__ */ --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch deleted file mode 100644 index 3fdd5b1b..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 666ab0b0ab5ec0645aca319b17218c976d98b950 Mon Sep 17 00:00:00 2001 -From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Date: Sun, 3 Jun 2018 13:37:03 +0200 -Subject: [PATCH 90/93] x86/xen: Add unwind hint annotations to xen_setup_gdt - -Not needed in mainline as this function got rewritten in 4.12 - -This enables objtool to grok the iret in the middle of a C function. - -This matches commit 76846bf3cb09 ("x86/asm: Add unwind hint annotations -to sync_core()") - -Cc: Josh Poimboeuf <jpoimboe@redhat.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/xen/enlighten.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index 8b97c87..af79764 100644 ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -75,6 +75,7 @@ - #include <asm/mwait.h> - #include <asm/pci_x86.h> - #include <asm/cpu.h> -+#include <asm/unwind_hints.h> - - #ifdef CONFIG_ACPI - #include <linux/acpi.h> -@@ -1450,10 +1451,12 @@ static void __ref xen_setup_gdt(int cpu) - * GDT. The new GDT has __KERNEL_CS with CS.L = 1 - * and we are jumping to reload it. - */ -- asm volatile ("pushq %0\n" -+ asm volatile (UNWIND_HINT_SAVE -+ "pushq %0\n" - "leaq 1f(%%rip),%0\n" - "pushq %0\n" - "lretq\n" -+ UNWIND_HINT_RESTORE - "1:\n" - : "=&r" (dummy) : "0" (__KERNEL_CS)); - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch deleted file mode 100644 index 6ea33fd5..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch +++ /dev/null @@ -1,53 +0,0 @@ -From b7a806693ec8ac8b5872e5a3de8fddd43600165f Mon Sep 17 00:00:00 2001 -From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Date: Wed, 24 Jan 2018 15:28:17 +0100 -Subject: [PATCH 091/103] Revert "module: Add retpoline tag to VERMAGIC" - -commit 5132ede0fe8092b043dae09a7cc32b8ae7272baa upstream. - -This reverts commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12. - -Turns out distros do not want to make retpoline as part of their "ABI", -so this patch should not have been merged. Sorry Andi, this was my -fault, I suggested it when your original patch was the "correct" way of -doing this instead. - -Reported-by: Jiri Kosina <jikos@kernel.org> -Fixes: 6cfb521ac0d5 ("module: Add retpoline tag to VERMAGIC") -Acked-by: Andi Kleen <ak@linux.intel.com> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: David Woodhouse <dwmw@amazon.co.uk> -Cc: rusty@rustcorp.com.au -Cc: arjan.van.de.ven@intel.com -Cc: jeyu@kernel.org -Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/vermagic.h | 8 +------- - 1 file changed, 1 insertion(+), 7 deletions(-) - -diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h -index a3d0493..6f8fbcf 100644 ---- a/include/linux/vermagic.h -+++ b/include/linux/vermagic.h -@@ -24,16 +24,10 @@ - #ifndef MODULE_ARCH_VERMAGIC - #define MODULE_ARCH_VERMAGIC "" - #endif --#ifdef RETPOLINE --#define MODULE_VERMAGIC_RETPOLINE "retpoline " --#else --#define MODULE_VERMAGIC_RETPOLINE "" --#endif - - #define VERMAGIC_STRING \ - UTS_RELEASE " " \ - MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ - MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ -- MODULE_ARCH_VERMAGIC \ -- MODULE_VERMAGIC_RETPOLINE -+ MODULE_ARCH_VERMAGIC - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch deleted file mode 100644 index 3b05904d..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 0ccf96d6eee2affd66ebde69247397542a03185b Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Wed, 30 May 2018 13:09:56 +0200 -Subject: [PATCH 91/93] x86/amd: revert commit 944e0fc51a89c9827b9 - -Revert commit 944e0fc51a89c9827b98813d65dc083274777c7f ("x86/amd: don't -set X86_BUG_SYSRET_SS_ATTRS when running under Xen") as it is lacking -a prerequisite patch and is making things worse. - -Signed-off-by: Juergen Gross <jgross@suse.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/amd.c | 5 ++--- - arch/x86/xen/enlighten.c | 4 +++- - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 4c2be99..cd0abf8 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -857,9 +857,8 @@ static void init_amd(struct cpuinfo_x86 *c) - if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) - set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); - -- /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ -- if (!cpu_has(c, X86_FEATURE_XENPV)) -- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); -+ /* AMD CPUs don't reset SS attributes on SYSRET */ -+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - } - - #ifdef CONFIG_X86_32 -diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index af79764..c926db0 100644 ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -1971,8 +1971,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); - - static void xen_set_cpu_features(struct cpuinfo_x86 *c) - { -- if (xen_pv_domain()) -+ if (xen_pv_domain()) { -+ clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - set_cpu_cap(c, X86_FEATURE_XENPV); -+ } - } - - static void xen_pin_vcpu(int cpu) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-Map-the-vsyscall-page-with-_PAGE_USER.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-Map-the-vsyscall-page-with-_PAGE_USER.patch deleted file mode 100644 index 348bade7..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-Map-the-vsyscall-page-with-_PAGE_USER.patch +++ /dev/null @@ -1,151 +0,0 @@ -From a3834ca12f7d16891da85363f2ce1459a768d216 Mon Sep 17 00:00:00 2001 -From: Borislav Petkov <bp@suse.de> -Date: Thu, 4 Jan 2018 17:42:45 +0100 -Subject: [PATCH 092/103] Map the vsyscall page with _PAGE_USER - -This needs to happen early in kaiser_pagetable_walk(), before the -hierarchy is established so that _PAGE_USER permission can be really -set. - -A proper fix would be to teach kaiser_pagetable_walk() to update those -permissions but the vsyscall page is the only exception here so ... - -Signed-off-by: Borislav Petkov <bp@suse.de> -Acked-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/vsyscall/vsyscall_64.c | 5 +++++ - arch/x86/include/asm/vsyscall.h | 2 ++ - arch/x86/mm/kaiser.c | 34 ++++++++++++++++++++++++++++++---- - 3 files changed, 37 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c -index 636c4b3..6bb7e92 100644 ---- a/arch/x86/entry/vsyscall/vsyscall_64.c -+++ b/arch/x86/entry/vsyscall/vsyscall_64.c -@@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *str) - } - early_param("vsyscall", vsyscall_setup); - -+bool vsyscall_enabled(void) -+{ -+ return vsyscall_mode != NONE; -+} -+ - static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, - const char *message) - { -diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h -index 6ba66ee..4865e10 100644 ---- a/arch/x86/include/asm/vsyscall.h -+++ b/arch/x86/include/asm/vsyscall.h -@@ -12,12 +12,14 @@ extern void map_vsyscall(void); - * Returns true if handled. - */ - extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); -+extern bool vsyscall_enabled(void); - #else - static inline void map_vsyscall(void) {} - static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) - { - return false; - } -+static inline bool vsyscall_enabled(void) { return false; } - #endif - - #endif /* _ASM_X86_VSYSCALL_H */ -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index 42a5307..a8ade08 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -19,6 +19,7 @@ - #include <asm/pgalloc.h> - #include <asm/desc.h> - #include <asm/cmdline.h> -+#include <asm/vsyscall.h> - - int kaiser_enabled __read_mostly = 1; - EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ -@@ -110,12 +111,13 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) - * - * Returns a pointer to a PTE on success, or NULL on failure. - */ --static pte_t *kaiser_pagetable_walk(unsigned long address) -+static pte_t *kaiser_pagetable_walk(unsigned long address, bool user) - { - pmd_t *pmd; - pud_t *pud; - pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address)); - gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); -+ unsigned long prot = _KERNPG_TABLE; - - if (pgd_none(*pgd)) { - WARN_ONCE(1, "All shadow pgds should have been populated"); -@@ -123,6 +125,17 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) - } - BUILD_BUG_ON(pgd_large(*pgd) != 0); - -+ if (user) { -+ /* -+ * The vsyscall page is the only page that will have -+ * _PAGE_USER set. Catch everything else. -+ */ -+ BUG_ON(address != VSYSCALL_ADDR); -+ -+ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); -+ prot = _PAGE_TABLE; -+ } -+ - pud = pud_offset(pgd, address); - /* The shadow page tables do not use large mappings: */ - if (pud_large(*pud)) { -@@ -135,7 +148,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) - return NULL; - spin_lock(&shadow_table_allocation_lock); - if (pud_none(*pud)) { -- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); -+ set_pud(pud, __pud(prot | __pa(new_pmd_page))); - __inc_zone_page_state(virt_to_page((void *) - new_pmd_page), NR_KAISERTABLE); - } else -@@ -155,7 +168,7 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) - return NULL; - spin_lock(&shadow_table_allocation_lock); - if (pmd_none(*pmd)) { -- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); -+ set_pmd(pmd, __pmd(prot | __pa(new_pte_page))); - __inc_zone_page_state(virt_to_page((void *) - new_pte_page), NR_KAISERTABLE); - } else -@@ -193,7 +206,7 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, - ret = -EIO; - break; - } -- pte = kaiser_pagetable_walk(address); -+ pte = kaiser_pagetable_walk(address, flags & _PAGE_USER); - if (!pte) { - ret = -ENOMEM; - break; -@@ -320,6 +333,19 @@ void __init kaiser_init(void) - - kaiser_init_all_pgds(); - -+ /* -+ * Note that this sets _PAGE_USER and it needs to happen when the -+ * pagetable hierarchy gets created, i.e., early. Otherwise -+ * kaiser_pagetable_walk() will encounter initialized PTEs in the -+ * hierarchy and not set the proper permissions, leading to the -+ * pagefaults with page-protection violations when trying to read the -+ * vsyscall page. For example. -+ */ -+ if (vsyscall_enabled()) -+ kaiser_add_user_map_early((void *)VSYSCALL_ADDR, -+ PAGE_SIZE, -+ __PAGE_KERNEL_VSYSCALL); -+ - for_each_possible_cpu(cpu) { - void *percpu_vaddr = __per_cpu_user_mapped_start + - per_cpu_offset(cpu); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch deleted file mode 100644 index 7860a717..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 4f306d0db3c6e46b730cba2b9b59a93b46a0a315 Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Wed, 30 May 2018 13:09:57 +0200 -Subject: [PATCH 92/93] xen: set cpu capabilities from xen_start_kernel() - -Upstream commit: 0808e80cb760de2733c0527d2090ed2205a1eef8 ("xen: set -cpu capabilities from xen_start_kernel()") - -There is no need to set the same capabilities for each cpu -individually. This can easily be done for all cpus when starting the -kernel. - -Signed-off-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/xen/enlighten.c | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - -diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index c926db0..01120da 100644 ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -470,6 +470,14 @@ static void __init xen_init_cpuid_mask(void) - cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); - } - -+static void __init xen_init_capabilities(void) -+{ -+ if (xen_pv_domain()) { -+ setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS); -+ setup_force_cpu_cap(X86_FEATURE_XENPV); -+ } -+} -+ - static void xen_set_debugreg(int reg, unsigned long val) - { - HYPERVISOR_set_debugreg(reg, val); -@@ -1629,6 +1637,7 @@ asmlinkage __visible void __init xen_start_kernel(void) - - xen_init_irq_ops(); - xen_init_cpuid_mask(); -+ xen_init_capabilities(); - - #ifdef CONFIG_X86_LOCAL_APIC - /* -@@ -1969,14 +1978,6 @@ bool xen_hvm_need_lapic(void) - } - EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); - --static void xen_set_cpu_features(struct cpuinfo_x86 *c) --{ -- if (xen_pv_domain()) { -- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); -- set_cpu_cap(c, X86_FEATURE_XENPV); -- } --} -- - static void xen_pin_vcpu(int cpu) - { - static bool disable_pinning; -@@ -2023,7 +2024,6 @@ const struct hypervisor_x86 x86_hyper_xen = { - .init_platform = xen_hvm_guest_init, - #endif - .x2apic_available = xen_x2apic_para_available, -- .set_cpu_features = xen_set_cpu_features, - .pin_vcpu = xen_pin_vcpu, - }; - EXPORT_SYMBOL(x86_hyper_xen); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch deleted file mode 100644 index 02ab8deb..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch +++ /dev/null @@ -1,75 +0,0 @@ -From f0886ae9fdfab258512e978acf1dafb5c51bc693 Mon Sep 17 00:00:00 2001 -From: Ben Hutchings <ben.hutchings@codethink.co.uk> -Date: Fri, 26 Jan 2018 16:23:02 +0000 -Subject: [PATCH 093/103] vsyscall: Fix permissions for emulate mode with - KAISER/PTI - -The backport of KAISER to 4.4 turned vsyscall emulate mode into native -mode. Add a vsyscall_pgprot variable to hold the correct page -protections, like Borislav and Hugh did for 3.2 and 3.18. - -Cc: Borislav Petkov <bp@suse.de> -Cc: Hugh Dickins <hughd@google.com> -Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/vsyscall/vsyscall_64.c | 7 ++++--- - arch/x86/include/asm/vsyscall.h | 1 + - arch/x86/mm/kaiser.c | 2 +- - 3 files changed, 6 insertions(+), 4 deletions(-) - -diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c -index 6bb7e92..0174290 100644 ---- a/arch/x86/entry/vsyscall/vsyscall_64.c -+++ b/arch/x86/entry/vsyscall/vsyscall_64.c -@@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode = - #else - EMULATE; - #endif -+unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL; - - static int __init vsyscall_setup(char *str) - { -@@ -336,11 +337,11 @@ void __init map_vsyscall(void) - extern char __vsyscall_page; - unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - -+ if (vsyscall_mode != NATIVE) -+ vsyscall_pgprot = __PAGE_KERNEL_VVAR; - if (vsyscall_mode != NONE) - __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, -- vsyscall_mode == NATIVE -- ? PAGE_KERNEL_VSYSCALL -- : PAGE_KERNEL_VVAR); -+ __pgprot(vsyscall_pgprot)); - - BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != - (unsigned long)VSYSCALL_ADDR); -diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h -index 4865e10..9ee8506 100644 ---- a/arch/x86/include/asm/vsyscall.h -+++ b/arch/x86/include/asm/vsyscall.h -@@ -13,6 +13,7 @@ extern void map_vsyscall(void); - */ - extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); - extern bool vsyscall_enabled(void); -+extern unsigned long vsyscall_pgprot; - #else - static inline void map_vsyscall(void) {} - static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index a8ade08..ec678aa 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -344,7 +344,7 @@ void __init kaiser_init(void) - if (vsyscall_enabled()) - kaiser_add_user_map_early((void *)VSYSCALL_ADDR, - PAGE_SIZE, -- __PAGE_KERNEL_VSYSCALL); -+ vsyscall_pgprot); - - for_each_possible_cpu(cpu) { - void *percpu_vaddr = __per_cpu_user_mapped_start + --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch deleted file mode 100644 index e2416627..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 7a5ded1f7c88ae162c525486f4be8030fa54fa26 Mon Sep 17 00:00:00 2001 -From: Juergen Gross <jgross@suse.com> -Date: Wed, 30 May 2018 13:09:58 +0200 -Subject: [PATCH 93/93] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running - under Xen - -Upstream commit: def9331a12977770cc6132d79f8e6565871e8e38 ("x86/amd: -don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen") - -When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set -on AMD cpus. - -This bug/feature bit is kind of special as it will be used very early -when switching threads. Setting the bit and clearing it a little bit -later leaves a critical window where things can go wrong. This time -window has enlarged a little bit by using setup_clear_cpu_cap() instead -of the hypervisor's set_cpu_features callback. It seems this larger -window now makes it rather easy to hit the problem. - -The proper solution is to never set the bit in case of Xen. - -Signed-off-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> -Acked-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/kernel/cpu/amd.c | 5 +++-- - arch/x86/xen/enlighten.c | 4 +--- - 2 files changed, 4 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index cd0abf8..4c2be99 100644 ---- a/arch/x86/kernel/cpu/amd.c -+++ b/arch/x86/kernel/cpu/amd.c -@@ -857,8 +857,9 @@ static void init_amd(struct cpuinfo_x86 *c) - if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) - set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); - -- /* AMD CPUs don't reset SS attributes on SYSRET */ -- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); -+ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ -+ if (!cpu_has(c, X86_FEATURE_XENPV)) -+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - } - - #ifdef CONFIG_X86_32 -diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index 01120da..fc63c84 100644 ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -472,10 +472,8 @@ static void __init xen_init_cpuid_mask(void) - - static void __init xen_init_capabilities(void) - { -- if (xen_pv_domain()) { -- setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS); -+ if (xen_pv_domain()) - setup_force_cpu_cap(X86_FEATURE_XENPV); -- } - } - - static void xen_set_debugreg(int reg, unsigned long val) --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0094-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0094-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch deleted file mode 100644 index 8b9aea64..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0094-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch +++ /dev/null @@ -1,463 +0,0 @@ -From 657cf2b4005afd14a566892eb625107d8383487d Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Fri, 21 Jul 2017 00:00:21 +0200 -Subject: [PATCH 094/103] bpf: fix mixed signed/unsigned derived min/max value - bounds - -[ Upstream commit 4cabc5b186b5427b9ee5a7495172542af105f02b ] - -Edward reported that there's an issue in min/max value bounds -tracking when signed and unsigned compares both provide hints -on limits when having unknown variables. E.g. a program such -as the following should have been rejected: - - 0: (7a) *(u64 *)(r10 -8) = 0 - 1: (bf) r2 = r10 - 2: (07) r2 += -8 - 3: (18) r1 = 0xffff8a94cda93400 - 5: (85) call bpf_map_lookup_elem#1 - 6: (15) if r0 == 0x0 goto pc+7 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp - 7: (7a) *(u64 *)(r10 -16) = -8 - 8: (79) r1 = *(u64 *)(r10 -16) - 9: (b7) r2 = -1 - 10: (2d) if r1 > r2 goto pc+3 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0 - R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp - 11: (65) if r1 s> 0x1 goto pc+2 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1 - R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp - 12: (0f) r0 += r1 - 13: (72) *(u8 *)(r0 +0) = 0 - R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1 - R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp - 14: (b7) r0 = 0 - 15: (95) exit - -What happens is that in the first part ... - - 8: (79) r1 = *(u64 *)(r10 -16) - 9: (b7) r2 = -1 - 10: (2d) if r1 > r2 goto pc+3 - -... r1 carries an unsigned value, and is compared as unsigned -against a register carrying an immediate. Verifier deduces in -reg_set_min_max() that since the compare is unsigned and operation -is greater than (>), that in the fall-through/false case, r1's -minimum bound must be 0 and maximum bound must be r2. Latter is -larger than the bound and thus max value is reset back to being -'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now -'R1=inv,min_value=0'. The subsequent test ... - - 11: (65) if r1 s> 0x1 goto pc+2 - -... is a signed compare of r1 with immediate value 1. Here, -verifier deduces in reg_set_min_max() that since the compare -is signed this time and operation is greater than (>), that -in the fall-through/false case, we can deduce that r1's maximum -bound must be 1, meaning with prior test, we result in r1 having -the following state: R1=inv,min_value=0,max_value=1. Given that -the actual value this holds is -8, the bounds are wrongly deduced. -When this is being added to r0 which holds the map_value(_adj) -type, then subsequent store access in above case will go through -check_mem_access() which invokes check_map_access_adj(), that -will then probe whether the map memory is in bounds based -on the min_value and max_value as well as access size since -the actual unknown value is min_value <= x <= max_value; commit -fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{, -_adj} register types") provides some more explanation on the -semantics. - -It's worth to note in this context that in the current code, -min_value and max_value tracking are used for two things, i) -dynamic map value access via check_map_access_adj() and since -commit 06c1c049721a ("bpf: allow helpers access to variable memory") -ii) also enforced at check_helper_mem_access() when passing a -memory address (pointer to packet, map value, stack) and length -pair to a helper and the length in this case is an unknown value -defining an access range through min_value/max_value in that -case. The min_value/max_value tracking is /not/ used in the -direct packet access case to track ranges. However, the issue -also affects case ii), for example, the following crafted program -based on the same principle must be rejected as well: - - 0: (b7) r2 = 0 - 1: (bf) r3 = r10 - 2: (07) r3 += -512 - 3: (7a) *(u64 *)(r10 -16) = -8 - 4: (79) r4 = *(u64 *)(r10 -16) - 5: (b7) r6 = -1 - 6: (2d) if r4 > r6 goto pc+5 - R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 - R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp - 7: (65) if r4 s> 0x1 goto pc+4 - R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 - R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1 - R10=fp - 8: (07) r4 += 1 - 9: (b7) r5 = 0 - 10: (6a) *(u16 *)(r10 -512) = 0 - 11: (85) call bpf_skb_load_bytes#26 - 12: (b7) r0 = 0 - 13: (95) exit - -Meaning, while we initialize the max_value stack slot that the -verifier thinks we access in the [1,2] range, in reality we -pass -7 as length which is interpreted as u32 in the helper. -Thus, this issue is relevant also for the case of helper ranges. -Resetting both bounds in check_reg_overflow() in case only one -of them exceeds limits is also not enough as similar test can be -created that uses values which are within range, thus also here -learned min value in r1 is incorrect when mixed with later signed -test to create a range: - - 0: (7a) *(u64 *)(r10 -8) = 0 - 1: (bf) r2 = r10 - 2: (07) r2 += -8 - 3: (18) r1 = 0xffff880ad081fa00 - 5: (85) call bpf_map_lookup_elem#1 - 6: (15) if r0 == 0x0 goto pc+7 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp - 7: (7a) *(u64 *)(r10 -16) = -8 - 8: (79) r1 = *(u64 *)(r10 -16) - 9: (b7) r2 = 2 - 10: (3d) if r2 >= r1 goto pc+3 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 - R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp - 11: (65) if r1 s> 0x4 goto pc+2 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 - R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp - 12: (0f) r0 += r1 - 13: (72) *(u8 *)(r0 +0) = 0 - R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4 - R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp - 14: (b7) r0 = 0 - 15: (95) exit - -This leaves us with two options for fixing this: i) to invalidate -all prior learned information once we switch signed context, ii) -to track min/max signed and unsigned boundaries separately as -done in [0]. (Given latter introduces major changes throughout -the whole verifier, it's rather net-next material, thus this -patch follows option i), meaning we can derive bounds either -from only signed tests or only unsigned tests.) There is still the -case of adjust_reg_min_max_vals(), where we adjust bounds on ALU -operations, meaning programs like the following where boundaries -on the reg get mixed in context later on when bounds are merged -on the dst reg must get rejected, too: - - 0: (7a) *(u64 *)(r10 -8) = 0 - 1: (bf) r2 = r10 - 2: (07) r2 += -8 - 3: (18) r1 = 0xffff89b2bf87ce00 - 5: (85) call bpf_map_lookup_elem#1 - 6: (15) if r0 == 0x0 goto pc+6 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp - 7: (7a) *(u64 *)(r10 -16) = -8 - 8: (79) r1 = *(u64 *)(r10 -16) - 9: (b7) r2 = 2 - 10: (3d) if r2 >= r1 goto pc+2 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 - R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp - 11: (b7) r7 = 1 - 12: (65) if r7 s> 0x0 goto pc+2 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 - R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp - 13: (b7) r0 = 0 - 14: (95) exit - - from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 - R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp - 15: (0f) r7 += r1 - 16: (65) if r7 s> 0x4 goto pc+2 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 - R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp - 17: (0f) r0 += r7 - 18: (72) *(u8 *)(r0 +0) = 0 - R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3 - R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp - 19: (b7) r0 = 0 - 20: (95) exit - -Meaning, in adjust_reg_min_max_vals() we must also reset range -values on the dst when src/dst registers have mixed signed/ -unsigned derived min/max value bounds with one unbounded value -as otherwise they can be added together deducing false boundaries. -Once both boundaries are established from either ALU ops or -compare operations w/o mixing signed/unsigned insns, then they -can safely be added to other regs also having both boundaries -established. Adding regs with one unbounded side to a map value -where the bounded side has been learned w/o mixing ops is -possible, but the resulting map value won't recover from that, -meaning such op is considered invalid on the time of actual -access. Invalid bounds are set on the dst reg in case i) src reg, -or ii) in case dst reg already had them. The only way to recover -would be to perform i) ALU ops but only 'add' is allowed on map -value types or ii) comparisons, but these are disallowed on -pointers in case they span a range. This is fine as only BPF_JEQ -and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers -which potentially turn them into PTR_TO_MAP_VALUE type depending -on the branch, so only here min/max value cannot be invalidated -for them. - -In terms of state pruning, value_from_signed is considered -as well in states_equal() when dealing with adjusted map values. -With regards to breaking existing programs, there is a small -risk, but use-cases are rather quite narrow where this could -occur and mixing compares probably unlikely. - -Joint work with Josef and Edward. - - [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html - -Fixes: 484611357c19 ("bpf: allow access into map value arrays") -Reported-by: Edward Cree <ecree@solarflare.com> -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Edward Cree <ecree@solarflare.com> -Signed-off-by: Josef Bacik <jbacik@fb.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/bpf_verifier.h | 1 + - kernel/bpf/verifier.c | 110 +++++++++++++++++++++++++++++++++++++------ - 2 files changed, 97 insertions(+), 14 deletions(-) - -diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h -index 2edf8de..070fc49 100644 ---- a/include/linux/bpf_verifier.h -+++ b/include/linux/bpf_verifier.h -@@ -40,6 +40,7 @@ struct bpf_reg_state { - */ - s64 min_value; - u64 max_value; -+ bool value_from_signed; - }; - - enum bpf_stack_slot_type { -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 787b851..bfafb53 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -671,12 +671,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, - return -EACCES; - } - --static bool is_pointer_value(struct bpf_verifier_env *env, int regno) -+static bool __is_pointer_value(bool allow_ptr_leaks, -+ const struct bpf_reg_state *reg) - { -- if (env->allow_ptr_leaks) -+ if (allow_ptr_leaks) - return false; - -- switch (env->cur_state.regs[regno].type) { -+ switch (reg->type) { - case UNKNOWN_VALUE: - case CONST_IMM: - return false; -@@ -685,6 +686,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) - } - } - -+static bool is_pointer_value(struct bpf_verifier_env *env, int regno) -+{ -+ return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); -+} -+ - static int check_ptr_alignment(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, int off, int size) - { -@@ -1521,10 +1527,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, - } - - /* We don't know anything about what was done to this register, mark it -- * as unknown. -+ * as unknown. Also, if both derived bounds came from signed/unsigned -+ * mixed compares and one side is unbounded, we cannot really do anything -+ * with them as boundaries cannot be trusted. Thus, arithmetic of two -+ * regs of such kind will get invalidated bounds on the dst side. - */ -- if (min_val == BPF_REGISTER_MIN_RANGE && -- max_val == BPF_REGISTER_MAX_RANGE) { -+ if ((min_val == BPF_REGISTER_MIN_RANGE && -+ max_val == BPF_REGISTER_MAX_RANGE) || -+ (BPF_SRC(insn->code) == BPF_X && -+ ((min_val != BPF_REGISTER_MIN_RANGE && -+ max_val == BPF_REGISTER_MAX_RANGE) || -+ (min_val == BPF_REGISTER_MIN_RANGE && -+ max_val != BPF_REGISTER_MAX_RANGE) || -+ (dst_reg->min_value != BPF_REGISTER_MIN_RANGE && -+ dst_reg->max_value == BPF_REGISTER_MAX_RANGE) || -+ (dst_reg->min_value == BPF_REGISTER_MIN_RANGE && -+ dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) && -+ regs[insn->dst_reg].value_from_signed != -+ regs[insn->src_reg].value_from_signed)) { - reset_reg_range_values(regs, insn->dst_reg); - return; - } -@@ -1855,38 +1875,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, u64 val, - u8 opcode) - { -+ bool value_from_signed = true; -+ bool is_range = true; -+ - switch (opcode) { - case BPF_JEQ: - /* If this is false then we know nothing Jon Snow, but if it is - * true then we know for sure. - */ - true_reg->max_value = true_reg->min_value = val; -+ is_range = false; - break; - case BPF_JNE: - /* If this is true we know nothing Jon Snow, but if it is false - * we know the value for sure; - */ - false_reg->max_value = false_reg->min_value = val; -+ is_range = false; - break; - case BPF_JGT: -- /* Unsigned comparison, the minimum value is 0. */ -- false_reg->min_value = 0; -+ value_from_signed = false; -+ /* fallthrough */ - case BPF_JSGT: -+ if (true_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(true_reg, 0); -+ if (false_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(false_reg, 0); -+ if (opcode == BPF_JGT) { -+ /* Unsigned comparison, the minimum value is 0. */ -+ false_reg->min_value = 0; -+ } - /* If this is false then we know the maximum val is val, - * otherwise we know the min val is val+1. - */ - false_reg->max_value = val; -+ false_reg->value_from_signed = value_from_signed; - true_reg->min_value = val + 1; -+ true_reg->value_from_signed = value_from_signed; - break; - case BPF_JGE: -- /* Unsigned comparison, the minimum value is 0. */ -- false_reg->min_value = 0; -+ value_from_signed = false; -+ /* fallthrough */ - case BPF_JSGE: -+ if (true_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(true_reg, 0); -+ if (false_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(false_reg, 0); -+ if (opcode == BPF_JGE) { -+ /* Unsigned comparison, the minimum value is 0. */ -+ false_reg->min_value = 0; -+ } - /* If this is false then we know the maximum value is val - 1, - * otherwise we know the mimimum value is val. - */ - false_reg->max_value = val - 1; -+ false_reg->value_from_signed = value_from_signed; - true_reg->min_value = val; -+ true_reg->value_from_signed = value_from_signed; - break; - default: - break; -@@ -1894,6 +1939,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, - - check_reg_overflow(false_reg); - check_reg_overflow(true_reg); -+ if (is_range) { -+ if (__is_pointer_value(false, false_reg)) -+ reset_reg_range_values(false_reg, 0); -+ if (__is_pointer_value(false, true_reg)) -+ reset_reg_range_values(true_reg, 0); -+ } - } - - /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg -@@ -1903,39 +1954,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, u64 val, - u8 opcode) - { -+ bool value_from_signed = true; -+ bool is_range = true; -+ - switch (opcode) { - case BPF_JEQ: - /* If this is false then we know nothing Jon Snow, but if it is - * true then we know for sure. - */ - true_reg->max_value = true_reg->min_value = val; -+ is_range = false; - break; - case BPF_JNE: - /* If this is true we know nothing Jon Snow, but if it is false - * we know the value for sure; - */ - false_reg->max_value = false_reg->min_value = val; -+ is_range = false; - break; - case BPF_JGT: -- /* Unsigned comparison, the minimum value is 0. */ -- true_reg->min_value = 0; -+ value_from_signed = false; -+ /* fallthrough */ - case BPF_JSGT: -+ if (true_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(true_reg, 0); -+ if (false_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(false_reg, 0); -+ if (opcode == BPF_JGT) { -+ /* Unsigned comparison, the minimum value is 0. */ -+ true_reg->min_value = 0; -+ } - /* - * If this is false, then the val is <= the register, if it is - * true the register <= to the val. - */ - false_reg->min_value = val; -+ false_reg->value_from_signed = value_from_signed; - true_reg->max_value = val - 1; -+ true_reg->value_from_signed = value_from_signed; - break; - case BPF_JGE: -- /* Unsigned comparison, the minimum value is 0. */ -- true_reg->min_value = 0; -+ value_from_signed = false; -+ /* fallthrough */ - case BPF_JSGE: -+ if (true_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(true_reg, 0); -+ if (false_reg->value_from_signed != value_from_signed) -+ reset_reg_range_values(false_reg, 0); -+ if (opcode == BPF_JGE) { -+ /* Unsigned comparison, the minimum value is 0. */ -+ true_reg->min_value = 0; -+ } - /* If this is false then constant < register, if it is true then - * the register < constant. - */ - false_reg->min_value = val + 1; -+ false_reg->value_from_signed = value_from_signed; - true_reg->max_value = val; -+ true_reg->value_from_signed = value_from_signed; - break; - default: - break; -@@ -1943,6 +2019,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, - - check_reg_overflow(false_reg); - check_reg_overflow(true_reg); -+ if (is_range) { -+ if (__is_pointer_value(false, false_reg)) -+ reset_reg_range_values(false_reg, 0); -+ if (__is_pointer_value(false, true_reg)) -+ reset_reg_range_values(true_reg, 0); -+ } - } - - static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0095-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0095-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch deleted file mode 100644 index fc0cb2fc..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0095-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch +++ /dev/null @@ -1,83 +0,0 @@ -From c7d695dd43368b0231ed509066131696d1533738 Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Thu, 29 Jun 2017 03:04:59 +0200 -Subject: [PATCH 095/103] bpf: prevent leaking pointer via xadd on - unpriviledged - -commit 6bdf6abc56b53103324dfd270a86580306e1a232 upstream. - -Leaking kernel addresses on unpriviledged is generally disallowed, -for example, verifier rejects the following: - - 0: (b7) r0 = 0 - 1: (18) r2 = 0xffff897e82304400 - 3: (7b) *(u64 *)(r1 +48) = r2 - R2 leaks addr into ctx - -Doing pointer arithmetic on them is also forbidden, so that they -don't turn into unknown value and then get leaked out. However, -there's xadd as a special case, where we don't check the src reg -for being a pointer register, e.g. the following will pass: - - 0: (b7) r0 = 0 - 1: (7b) *(u64 *)(r1 +48) = r0 - 2: (18) r2 = 0xffff897e82304400 ; map - 4: (db) lock *(u64 *)(r1 +48) += r2 - 5: (95) exit - -We could store the pointer into skb->cb, loose the type context, -and then read it out from there again to leak it eventually out -of a map value. Or more easily in a different variant, too: - - 0: (bf) r6 = r1 - 1: (7a) *(u64 *)(r10 -8) = 0 - 2: (bf) r2 = r10 - 3: (07) r2 += -8 - 4: (18) r1 = 0x0 - 6: (85) call bpf_map_lookup_elem#1 - 7: (15) if r0 == 0x0 goto pc+3 - R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R6=ctx R10=fp - 8: (b7) r3 = 0 - 9: (7b) *(u64 *)(r0 +0) = r3 - 10: (db) lock *(u64 *)(r0 +0) += r6 - 11: (b7) r0 = 0 - 12: (95) exit - - from 7 to 11: R0=inv,min_value=0,max_value=0 R6=ctx R10=fp - 11: (b7) r0 = 0 - 12: (95) exit - -Prevent this by checking xadd src reg for pointer types. Also -add a couple of test cases related to this. - -Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") -Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Acked-by: Alexei Starovoitov <ast@kernel.org> -Acked-by: Martin KaFai Lau <kafai@fb.com> -Acked-by: Edward Cree <ecree@solarflare.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/verifier.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index bfafb53..6767670 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -880,6 +880,11 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) - if (err) - return err; - -+ if (is_pointer_value(env, insn->src_reg)) { -+ verbose("R%d leaks addr into mem\n", insn->src_reg); -+ return -EACCES; -+ } -+ - /* check whether atomic_add can read the memory */ - err = check_mem_access(env, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, -1); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0096-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0096-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch deleted file mode 100644 index 28963f0c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0096-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 7beac55cc4c612a3e74debd1d82a01f375b5a187 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet <edumazet@google.com> -Date: Mon, 29 Jan 2018 02:48:54 +0100 -Subject: [PATCH 096/103] x86: bpf_jit: small optimization in - emit_bpf_tail_call() - -[ upstream commit 84ccac6e7854ebbfb56d2fc6d5bef9be49bb304c ] - -Saves 4 bytes replacing following instructions : - -lea rax, [rsi + rdx * 8 + offsetof(...)] -mov rax, qword ptr [rax] -cmp rax, 0 - -by : - -mov rax, [rsi + rdx * 8 + offsetof(...)] -test rax, rax - -Signed-off-by: Eric Dumazet <edumazet@google.com> -Cc: Alexei Starovoitov <ast@kernel.org> -Cc: Daniel Borkmann <daniel@iogearbox.net> -Acked-by: Daniel Borkmann <daniel@iogearbox.net> -Acked-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/net/bpf_jit_comp.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c -index 15f7436..ece29e2 100644 ---- a/arch/x86/net/bpf_jit_comp.c -+++ b/arch/x86/net/bpf_jit_comp.c -@@ -281,7 +281,7 @@ static void emit_bpf_tail_call(u8 **pprog) - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ - offsetof(struct bpf_array, map.max_entries)); - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ --#define OFFSET1 47 /* number of bytes to jump */ -+#define OFFSET1 43 /* number of bytes to jump */ - EMIT2(X86_JBE, OFFSET1); /* jbe out */ - label1 = cnt; - -@@ -290,21 +290,20 @@ static void emit_bpf_tail_call(u8 **pprog) - */ - EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ - EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ --#define OFFSET2 36 -+#define OFFSET2 32 - EMIT2(X86_JA, OFFSET2); /* ja out */ - label2 = cnt; - EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ - EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ - - /* prog = array->ptrs[index]; */ -- EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ -+ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ - offsetof(struct bpf_array, ptrs)); -- EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ - - /* if (prog == NULL) - * goto out; - */ -- EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ -+ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ - #define OFFSET3 10 - EMIT2(X86_JE, OFFSET3); /* je out */ - label3 = cnt; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0097-bpf-fix-bpf_tail_call-x64-JIT.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0097-bpf-fix-bpf_tail_call-x64-JIT.patch deleted file mode 100644 index 3dc0c637..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0097-bpf-fix-bpf_tail_call-x64-JIT.patch +++ /dev/null @@ -1,62 +0,0 @@ -From ecfa2d777b29f5947197dd304baf515d1ac16355 Mon Sep 17 00:00:00 2001 -From: Alexei Starovoitov <ast@fb.com> -Date: Mon, 29 Jan 2018 02:48:55 +0100 -Subject: [PATCH 097/103] bpf: fix bpf_tail_call() x64 JIT - -[ upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ] - -- bpf prog_array just like all other types of bpf array accepts 32-bit index. - Clarify that in the comment. -- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes -- tighten corresponding check in the interpreter to stay consistent - -The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag -in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and -though JIT code is wrong it will check bounds correctly. -Hence two fixes tags. All other JITs don't have this problem. - -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") -Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") -Acked-by: Daniel Borkmann <daniel@iogearbox.net> -Acked-by: Martin KaFai Lau <kafai@fb.com> -Reviewed-by: Eric Dumazet <edumazet@google.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/net/bpf_jit_comp.c | 4 ++-- - kernel/bpf/core.c | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c -index ece29e2..7840331 100644 ---- a/arch/x86/net/bpf_jit_comp.c -+++ b/arch/x86/net/bpf_jit_comp.c -@@ -278,9 +278,9 @@ static void emit_bpf_tail_call(u8 **pprog) - /* if (index >= array->map.max_entries) - * goto out; - */ -- EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ -+ EMIT2(0x89, 0xD2); /* mov edx, edx */ -+ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ - offsetof(struct bpf_array, map.max_entries)); -- EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ - #define OFFSET1 43 /* number of bytes to jump */ - EMIT2(X86_JBE, OFFSET1); /* jbe out */ - label1 = cnt; -diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c -index aa6d981..ab9576b 100644 ---- a/kernel/bpf/core.c -+++ b/kernel/bpf/core.c -@@ -715,7 +715,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) - struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; - struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *prog; -- u64 index = BPF_R3; -+ u32 index = BPF_R3; - - if (unlikely(index >= array->map.max_entries)) - goto out; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0098-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0098-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch deleted file mode 100644 index d4d8de66..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0098-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch +++ /dev/null @@ -1,222 +0,0 @@ -From fa0b0b09af62c9157bb3bcc893a175dcd7034197 Mon Sep 17 00:00:00 2001 -From: Alexei Starovoitov <ast@kernel.org> -Date: Mon, 29 Jan 2018 02:48:56 +0100 -Subject: [PATCH 098/103] bpf: introduce BPF_JIT_ALWAYS_ON config - -[ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ] - -The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715. - -A quote from goolge project zero blog: -"At this point, it would normally be necessary to locate gadgets in -the host kernel code that can be used to actually leak data by reading -from an attacker-controlled location, shifting and masking the result -appropriately and then using the result of that as offset to an -attacker-controlled address for a load. But piecing gadgets together -and figuring out which ones work in a speculation context seems annoying. -So instead, we decided to use the eBPF interpreter, which is built into -the host kernel - while there is no legitimate way to invoke it from inside -a VM, the presence of the code in the host kernel's text section is sufficient -to make it usable for the attack, just like with ordinary ROP gadgets." - -To make attacker job harder introduce BPF_JIT_ALWAYS_ON config -option that removes interpreter from the kernel in favor of JIT-only mode. -So far eBPF JIT is supported by: -x64, arm64, arm32, sparc64, s390, powerpc64, mips64 - -The start of JITed program is randomized and code page is marked as read-only. -In addition "constant blinding" can be turned on with net.core.bpf_jit_harden - -v2->v3: -- move __bpf_prog_ret0 under ifdef (Daniel) - -v1->v2: -- fix init order, test_bpf and cBPF (Daniel's feedback) -- fix offloaded bpf (Jakub's feedback) -- add 'return 0' dummy in case something can invoke prog->bpf_func -- retarget bpf tree. For bpf-next the patch would need one extra hunk. - It will be sent when the trees are merged back to net-next - -Considered doing: - int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT; -but it seems better to land the patch as-is and in bpf-next remove -bpf_jit_enable global variable from all JITs, consolidate in one place -and remove this jit_init() function. - -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - init/Kconfig | 7 +++++++ - kernel/bpf/core.c | 18 ++++++++++++++++++ - lib/test_bpf.c | 11 +++++++---- - net/core/filter.c | 6 ++---- - net/core/sysctl_net_core.c | 6 ++++++ - net/socket.c | 9 +++++++++ - 6 files changed, 49 insertions(+), 8 deletions(-) - -diff --git a/init/Kconfig b/init/Kconfig -index 7f69e2e..e9b989c 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1634,6 +1634,13 @@ config BPF_SYSCALL - Enable the bpf() system call that allows to manipulate eBPF - programs and maps via file descriptors. - -+config BPF_JIT_ALWAYS_ON -+ bool "Permanently enable BPF JIT and remove BPF interpreter" -+ depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT -+ help -+ Enables BPF JIT and removes BPF interpreter to avoid -+ speculative execution of BPF instructions by the interpreter -+ - config SHMEM - bool "Use full shmem filesystem" if EXPERT - default y -diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c -index ab9576b..64c4b13 100644 ---- a/kernel/bpf/core.c -+++ b/kernel/bpf/core.c -@@ -458,6 +458,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) - } - EXPORT_SYMBOL_GPL(__bpf_call_base); - -+#ifndef CONFIG_BPF_JIT_ALWAYS_ON - /** - * __bpf_prog_run - run eBPF program on a given context - * @ctx: is the data we are operating on -@@ -923,6 +924,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) - } - STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */ - -+#else -+static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn) -+{ -+ return 0; -+} -+#endif -+ - bool bpf_prog_array_compatible(struct bpf_array *array, - const struct bpf_prog *fp) - { -@@ -970,7 +978,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) - */ - struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) - { -+#ifndef CONFIG_BPF_JIT_ALWAYS_ON - fp->bpf_func = (void *) __bpf_prog_run; -+#else -+ fp->bpf_func = (void *) __bpf_prog_ret0; -+#endif - - /* eBPF JITs can rewrite the program in case constant - * blinding is active. However, in case of error during -@@ -979,6 +991,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) - * be JITed, but falls back to the interpreter. - */ - fp = bpf_int_jit_compile(fp); -+#ifdef CONFIG_BPF_JIT_ALWAYS_ON -+ if (!fp->jited) { -+ *err = -ENOTSUPP; -+ return fp; -+ } -+#endif - bpf_prog_lock_ro(fp); - - /* The tail call compatibility check can only be done at -diff --git a/lib/test_bpf.c b/lib/test_bpf.c -index 0362da0..158b4a3 100644 ---- a/lib/test_bpf.c -+++ b/lib/test_bpf.c -@@ -5601,9 +5601,8 @@ static struct bpf_prog *generate_filter(int which, int *err) - return NULL; - } - } -- /* We don't expect to fail. */ - if (*err) { -- pr_cont("FAIL to attach err=%d len=%d\n", -+ pr_cont("FAIL to prog_create err=%d len=%d\n", - *err, fprog.len); - return NULL; - } -@@ -5626,6 +5625,10 @@ static struct bpf_prog *generate_filter(int which, int *err) - * checks. - */ - fp = bpf_prog_select_runtime(fp, err); -+ if (*err) { -+ pr_cont("FAIL to select_runtime err=%d\n", *err); -+ return NULL; -+ } - break; - } - -@@ -5811,8 +5814,8 @@ static __init int test_bpf(void) - pass_cnt++; - continue; - } -- -- return err; -+ err_cnt++; -+ continue; - } - - pr_cont("jited:%u ", fp->jited); -diff --git a/net/core/filter.c b/net/core/filter.c -index b391209..c066b00 100644 ---- a/net/core/filter.c -+++ b/net/core/filter.c -@@ -1005,11 +1005,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) - */ - goto out_err_free; - -- /* We are guaranteed to never error here with cBPF to eBPF -- * transitions, since there's no issue with type compatibility -- * checks on program arrays. -- */ - fp = bpf_prog_select_runtime(fp, &err); -+ if (err) -+ goto out_err_free; - - kfree(old_prog); - return fp; -diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c -index 0df2aa6..9955d6d 100644 ---- a/net/core/sysctl_net_core.c -+++ b/net/core/sysctl_net_core.c -@@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = { - .data = &bpf_jit_enable, - .maxlen = sizeof(int), - .mode = 0644, -+#ifndef CONFIG_BPF_JIT_ALWAYS_ON - .proc_handler = proc_dointvec -+#else -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = &one, -+ .extra2 = &one, -+#endif - }, - # ifdef CONFIG_HAVE_EBPF_JIT - { -diff --git a/net/socket.c b/net/socket.c -index 6bbccf0..8a9624e 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -2547,6 +2547,15 @@ static int __init sock_init(void) - - core_initcall(sock_init); /* early initcall */ - -+static int __init jit_init(void) -+{ -+#ifdef CONFIG_BPF_JIT_ALWAYS_ON -+ bpf_jit_enable = 1; -+#endif -+ return 0; -+} -+pure_initcall(jit_init); -+ - #ifdef CONFIG_PROC_FS - void socket_seq_show(struct seq_file *seq) - { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0099-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0099-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch deleted file mode 100644 index acbc5668..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0099-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch +++ /dev/null @@ -1,50 +0,0 @@ -From d4d31ba0cca574288cdaef8002d11cf27b300d0f Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Mon, 29 Jan 2018 02:48:57 +0100 -Subject: [PATCH 099/103] bpf: arsh is not supported in 32 bit alu thus reject - it - -[ upstream commit 7891a87efc7116590eaba57acc3c422487802c6f ] - -The following snippet was throwing an 'unknown opcode cc' warning -in BPF interpreter: - - 0: (18) r0 = 0x0 - 2: (7b) *(u64 *)(r10 -16) = r0 - 3: (cc) (u32) r0 s>>= (u32) r0 - 4: (95) exit - -Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X} -generation, not all of them do and interpreter does neither. We can -leave existing ones and implement it later in bpf-next for the -remaining ones, but reject this properly in verifier for the time -being. - -Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") -Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/verifier.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 6767670..6caf0ff1 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -1753,6 +1753,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) - return -EINVAL; - } - -+ if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { -+ verbose("BPF_ARSH not supported for 32 bit ALU\n"); -+ return -EINVAL; -+ } -+ - if ((opcode == BPF_LSH || opcode == BPF_RSH || - opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { - int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch deleted file mode 100644 index 129c2475..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 486eb3eb62604a5c691156b6a657b03fe67cb167 Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Mon, 29 Jan 2018 02:48:58 +0100 -Subject: [PATCH 100/103] bpf: avoid false sharing of map refcount with - max_entries - -[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ] - -In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds -speculation") also change the layout of struct bpf_map such that -false sharing of fast-path members like max_entries is avoided -when the maps reference counter is altered. Therefore enforce -them to be placed into separate cachelines. - -pahole dump after change: - - struct bpf_map { - const struct bpf_map_ops * ops; /* 0 8 */ - struct bpf_map * inner_map_meta; /* 8 8 */ - void * security; /* 16 8 */ - enum bpf_map_type map_type; /* 24 4 */ - u32 key_size; /* 28 4 */ - u32 value_size; /* 32 4 */ - u32 max_entries; /* 36 4 */ - u32 map_flags; /* 40 4 */ - u32 pages; /* 44 4 */ - u32 id; /* 48 4 */ - int numa_node; /* 52 4 */ - bool unpriv_array; /* 56 1 */ - - /* XXX 7 bytes hole, try to pack */ - - /* --- cacheline 1 boundary (64 bytes) --- */ - struct user_struct * user; /* 64 8 */ - atomic_t refcnt; /* 72 4 */ - atomic_t usercnt; /* 76 4 */ - struct work_struct work; /* 80 32 */ - char name[16]; /* 112 16 */ - /* --- cacheline 2 boundary (128 bytes) --- */ - - /* size: 128, cachelines: 2, members: 17 */ - /* sum members: 121, holes: 1, sum holes: 7 */ - }; - -Now all entries in the first cacheline are read only throughout -the life time of the map, set up once during map creation. Overall -struct size and number of cachelines doesn't change from the -reordering. struct bpf_map is usually first member and embedded -in map structs in specific map implementations, so also avoid those -members to sit at the end where it could potentially share the -cacheline with first map values e.g. in the array since remote -CPUs could trigger map updates just as well for those (easily -dirtying members like max_entries intentionally as well) while -having subsequent values in cache. - -Quoting from Google's Project Zero blog [1]: - - Additionally, at least on the Intel machine on which this was - tested, bouncing modified cache lines between cores is slow, - apparently because the MESI protocol is used for cache coherence - [8]. Changing the reference counter of an eBPF array on one - physical CPU core causes the cache line containing the reference - counter to be bounced over to that CPU core, making reads of the - reference counter on all other CPU cores slow until the changed - reference counter has been written back to memory. Because the - length and the reference counter of an eBPF array are stored in - the same cache line, this also means that changing the reference - counter on one physical CPU core causes reads of the eBPF array's - length to be slow on other physical CPU cores (intentional false - sharing). - -While this doesn't 'control' the out-of-bounds speculation through -masking the index as in commit b2157399cc98, triggering a manipulation -of the map's reference counter is really trivial, so lets not allow -to easily affect max_entries from it. - -Splitting to separate cachelines also generally makes sense from -a performance perspective anyway in that fast-path won't have a -cache miss if the map gets pinned, reused in other progs, etc out -of control path, thus also avoids unintentional false sharing. - - [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html - -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - include/linux/bpf.h | 16 ++++++++++++---- - 1 file changed, 12 insertions(+), 4 deletions(-) - -diff --git a/include/linux/bpf.h b/include/linux/bpf.h -index 0dbb21b..80064c8 100644 ---- a/include/linux/bpf.h -+++ b/include/linux/bpf.h -@@ -36,7 +36,10 @@ struct bpf_map_ops { - }; - - struct bpf_map { -- atomic_t refcnt; -+ /* 1st cacheline with read-mostly members of which some -+ * are also accessed in fast-path (e.g. ops, max_entries). -+ */ -+ const struct bpf_map_ops *ops ____cacheline_aligned; - enum bpf_map_type map_type; - u32 key_size; - u32 value_size; -@@ -44,10 +47,15 @@ struct bpf_map { - u32 map_flags; - u32 pages; - bool unpriv_array; -- struct user_struct *user; -- const struct bpf_map_ops *ops; -- struct work_struct work; -+ /* 7 bytes hole */ -+ -+ /* 2nd cacheline with misc members to avoid false sharing -+ * particularly with refcounting. -+ */ -+ struct user_struct *user ____cacheline_aligned; -+ atomic_t refcnt; - atomic_t usercnt; -+ struct work_struct work; - }; - - struct bpf_map_type_list { --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0101-bpf-fix-divides-by-zero.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0101-bpf-fix-divides-by-zero.patch deleted file mode 100644 index 73a0ef6c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0101-bpf-fix-divides-by-zero.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 919f74d13fc915b102092df42673100a20ac5e99 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet <edumazet@google.com> -Date: Mon, 29 Jan 2018 02:48:59 +0100 -Subject: [PATCH 101/103] bpf: fix divides by zero - -[ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ] - -Divides by zero are not nice, lets avoid them if possible. - -Also do_div() seems not needed when dealing with 32bit operands, -but this seems a minor detail. - -Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") -Signed-off-by: Eric Dumazet <edumazet@google.com> -Reported-by: syzbot <syzkaller@googlegroups.com> -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c -index 64c4b13..879ca84 100644 ---- a/kernel/bpf/core.c -+++ b/kernel/bpf/core.c -@@ -642,7 +642,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) - DST = tmp; - CONT; - ALU_MOD_X: -- if (unlikely(SRC == 0)) -+ if (unlikely((u32)SRC == 0)) - return 0; - tmp = (u32) DST; - DST = do_div(tmp, (u32) SRC); -@@ -661,7 +661,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) - DST = div64_u64(DST, SRC); - CONT; - ALU_DIV_X: -- if (unlikely(SRC == 0)) -+ if (unlikely((u32)SRC == 0)) - return 0; - tmp = (u32) DST; - do_div(tmp, (u32) SRC); --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0102-bpf-fix-32-bit-divide-by-zero.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0102-bpf-fix-32-bit-divide-by-zero.patch deleted file mode 100644 index cb5d56f9..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0102-bpf-fix-32-bit-divide-by-zero.patch +++ /dev/null @@ -1,69 +0,0 @@ -From b0ff2bf4aaae990e6b8ce98f581aab8042a7f3e3 Mon Sep 17 00:00:00 2001 -From: Alexei Starovoitov <ast@kernel.org> -Date: Mon, 29 Jan 2018 02:49:00 +0100 -Subject: [PATCH 102/103] bpf: fix 32-bit divide by zero - -[ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ] - -due to some JITs doing if (src_reg == 0) check in 64-bit mode -for div/mod operations mask upper 32-bits of src register -before doing the check - -Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT") -Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.") -Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/verifier.c | 18 ++++++++++++++++++ - net/core/filter.c | 4 ++++ - 2 files changed, 22 insertions(+) - -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 6caf0ff1..2dce3aa 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -3235,6 +3235,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) - - - for (i = 0; i < insn_cnt; i++, insn++) { -+ if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || -+ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { -+ /* due to JIT bugs clear upper 32-bits of src register -+ * before div/mod operation -+ */ -+ insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg); -+ insn_buf[1] = *insn; -+ cnt = 2; -+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); -+ if (!new_prog) -+ return -ENOMEM; -+ -+ delta += cnt - 1; -+ env->prog = prog = new_prog; -+ insn = new_prog->insnsi + i + delta; -+ continue; -+ } -+ - if (insn->code != (BPF_JMP | BPF_CALL)) - continue; - -diff --git a/net/core/filter.c b/net/core/filter.c -index c066b00..615033b 100644 ---- a/net/core/filter.c -+++ b/net/core/filter.c -@@ -441,6 +441,10 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, - convert_bpf_extensions(fp, &insn)) - break; - -+ if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || -+ fp->code == (BPF_ALU | BPF_MOD | BPF_X)) -+ *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); -+ - *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); - break; - --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0103-bpf-reject-stores-into-ctx-via-st-and-xadd.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0103-bpf-reject-stores-into-ctx-via-st-and-xadd.patch deleted file mode 100644 index b5f74b5c..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0103-bpf-reject-stores-into-ctx-via-st-and-xadd.patch +++ /dev/null @@ -1,72 +0,0 @@ -From a17dcc431d2b2a6fcba9666df94abc5a1e14d1be Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann <daniel@iogearbox.net> -Date: Mon, 29 Jan 2018 02:49:01 +0100 -Subject: [PATCH 103/103] bpf: reject stores into ctx via st and xadd - -[ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ] - -Alexei found that verifier does not reject stores into context -via BPF_ST instead of BPF_STX. And while looking at it, we -also should not allow XADD variant of BPF_STX. - -The context rewriter is only assuming either BPF_LDX_MEM- or -BPF_STX_MEM-type operations, thus reject anything other than -that so that assumptions in the rewriter properly hold. Add -test cases as well for BPF selftests. - -Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields") -Reported-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -Signed-off-by: Alexei Starovoitov <ast@kernel.org> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - kernel/bpf/verifier.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index 2dce3aa..a58bb9e 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -691,6 +691,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) - return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); - } - -+static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) -+{ -+ const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; -+ -+ return reg->type == PTR_TO_CTX; -+} -+ - static int check_ptr_alignment(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, int off, int size) - { -@@ -885,6 +892,12 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) - return -EACCES; - } - -+ if (is_ctx_reg(env, insn->dst_reg)) { -+ verbose("BPF_XADD stores into R%d context is not allowed\n", -+ insn->dst_reg); -+ return -EACCES; -+ } -+ - /* check whether atomic_add can read the memory */ - err = check_mem_access(env, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, -1); -@@ -2879,6 +2892,12 @@ static int do_check(struct bpf_verifier_env *env) - if (err) - return err; - -+ if (is_ctx_reg(env, insn->dst_reg)) { -+ verbose("BPF_ST stores into R%d context is not allowed\n", -+ insn->dst_reg); -+ return -EACCES; -+ } -+ - /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc b/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc deleted file mode 100644 index e7fb7846..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc +++ /dev/null @@ -1,305 +0,0 @@ -patch 0001-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch -patch 0002-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch -patch 0003-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch -patch 0004-KAISER-Kernel-Address-Isolation.patch -patch 0005-kaiser-merged-update.patch -patch 0006-kaiser-do-not-set-_PAGE_NX-on-pgd_none.patch -patch 0007-kaiser-stack-map-PAGE_SIZE-at-THREAD_SIZE-PAGE_SIZE.patch -patch 0008-kaiser-fix-build-and-FIXME-in-alloc_ldt_struct.patch -patch 0009-kaiser-KAISER-depends-on-SMP.patch -patch 0010-kaiser-fix-regs-to-do_nmi-ifndef-CONFIG_KAISER.patch -patch 0011-kaiser-fix-perf-crashes.patch -patch 0012-kaiser-ENOMEM-if-kaiser_pagetable_walk-NULL.patch -patch 0013-kaiser-tidied-up-asm-kaiser.h-somewhat.patch -patch 0014-kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch -patch 0015-kaiser-align-addition-to-x86-mm-Makefile.patch -patch 0016-kaiser-cleanups-while-trying-for-gold-link.patch -patch 0017-kaiser-name-that-0x1000-KAISER_SHADOW_PGD_OFFSET.patch -patch 0018-kaiser-delete-KAISER_REAL_SWITCH-option.patch -patch 0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch -patch 0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch -patch 0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch -patch 0022-kaiser-PCID-0-for-kernel-and-128-for-user.patch -patch 0023-kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch -patch 0024-kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch -patch 0025-kaiser-kaiser_remove_mapping-move-along-the-pgd.patch -patch 0026-kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch -patch 0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch -patch 0028-x86-kaiser-Rename-and-simplify-X86_FEATURE_KAISER-ha.patch -patch 0029-x86-kaiser-Check-boottime-cmdline-params.patch -patch 0030-kaiser-use-ALTERNATIVE-instead-of-x86_cr3_pcid_noflu.patch -patch 0031-kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch -patch 0032-kaiser-asm-tlbflush.h-handle-noPGE-at-lower-level.patch -patch 0033-kaiser-kaiser_flush_tlb_on_return_to_user-check-PCID.patch -patch 0034-x86-paravirt-Dont-patch-flush_tlb_single.patch -patch 0035-x86-kaiser-Reenable-PARAVIRT.patch -patch 0036-kaiser-disabled-on-Xen-PV.patch -patch 0037-x86-kaiser-Move-feature-detection-up.patch -patch 0038-KPTI-Rename-to-PAGE_TABLE_ISOLATION.patch -patch 0039-KPTI-Report-when-enabled.patch -patch 0040-kaiser-Set-_PAGE_NX-only-if-supported.patch -patch 0041-kaiser-Set-_PAGE_NX-only-if-supported.patch -patch 0042-bpf-adjust-insn_aux_data-when-patching-insns.patch -patch 0043-bpf-move-fixup_bpf_calls-function.patch -patch 0044-bpf-refactor-fixup_bpf_calls.patch -patch 0045-bpf-prevent-out-of-bounds-speculation.patch -patch 0046-bpf-array-fix-overflow-in-max_entries-and-undefined-.patch -patch 0047-x86-Documentation-Add-PTI-description.patch -patch 0048-x86-cpu-Factor-out-application-of-forced-CPU-caps.patch -patch 0049-x86-cpufeatures-Make-CPU-bugs-sticky.patch -patch 0050-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch -patch 0051-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch -patch 0052-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch -patch 0053-x86-cpu-Merge-bugs.c-and-bugs_64.c.patch -patch 0054-sysfs-cpu-Add-vulnerability-folder.patch -patch 0055-x86-cpu-Implement-CPU-vulnerabilites-sysfs-functions.patch -patch 0056-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch -patch 0057-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch -patch 0058-sysfs-cpu-Fix-typos-in-vulnerability-documentation.patch -patch 0059-x86-alternatives-Fix-optimize_nops-checking.patch -patch 0060-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch -patch 0061-x86-mm-32-Move-setup_clear_cpu_cap-X86_FEATURE_PCID-.patch -patch 0062-objtool-modules-Discard-objtool-annotation-sections-.patch -patch 0063-objtool-Detect-jumps-to-retpoline-thunks.patch -patch 0064-objtool-Allow-alternatives-to-be-ignored.patch -patch 0065-x86-asm-Use-register-variable-to-get-stack-pointer-v.patch -patch 0066-x86-retpoline-Add-initial-retpoline-support.patch -patch 0067-x86-spectre-Add-boot-time-option-to-select-Spectre-v.patch -patch 0068-x86-retpoline-crypto-Convert-crypto-assembler-indire.patch -patch 0069-x86-retpoline-entry-Convert-entry-assembler-indirect.patch -patch 0070-x86-retpoline-ftrace-Convert-ftrace-assembler-indire.patch -patch 0071-x86-retpoline-hyperv-Convert-assembler-indirect-jump.patch -patch 0072-x86-retpoline-xen-Convert-Xen-hypercall-indirect-jum.patch -patch 0073-x86-retpoline-checksum32-Convert-assembler-indirect-.patch -patch 0074-x86-retpoline-irq32-Convert-assembler-indirect-jumps.patch -patch 0075-x86-retpoline-Fill-return-stack-buffer-on-vmexit.patch -patch 0076-x86-retpoline-Remove-compile-time-warning.patch -patch 0077-objtool-Fix-retpoline-support-for-pre-ORC-objtool.patch -patch 0078-x86-pti-efi-broken-conversion-from-efi-to-kernel-pag.patch -patch 0079-x86-retpoline-Fill-RSB-on-context-switch-for-affecte.patch -patch 0080-x86-retpoline-Add-LFENCE-to-the-retpoline-RSB-fillin.patch -patch 0081-objtool-Improve-error-message-for-bad-file-argument.patch -patch 0082-x86-cpufeature-Move-processor-tracing-out-of-scatter.patch -patch 0083-module-Add-retpoline-tag-to-VERMAGIC.patch -patch 0084-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch -patch 0085-x86-mce-Make-machine-check-speculation-protected.patch -patch 0086-retpoline-Introduce-start-end-markers-of-indirect-th.patch -patch 0087-kprobes-x86-Blacklist-indirect-thunk-functions-for-k.patch -patch 0088-kprobes-x86-Disable-optimizing-on-the-function-jumps.patch -patch 0089-x86-pti-Document-fix-wrong-index.patch -patch 0090-x86-retpoline-Optimize-inline-assembler-for-vmexit_f.patch -patch 0091-Revert-module-Add-retpoline-tag-to-VERMAGIC.patch -patch 0092-Map-the-vsyscall-page-with-_PAGE_USER.patch -patch 0093-vsyscall-Fix-permissions-for-emulate-mode-with-KAISE.patch -patch 0094-bpf-fix-mixed-signed-unsigned-derived-min-max-value-.patch -patch 0095-bpf-prevent-leaking-pointer-via-xadd-on-unpriviledge.patch -patch 0096-x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch -patch 0097-bpf-fix-bpf_tail_call-x64-JIT.patch -patch 0098-bpf-introduce-BPF_JIT_ALWAYS_ON-config.patch -patch 0099-bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-.patch -patch 0100-bpf-avoid-false-sharing-of-map-refcount-with-max_ent.patch -patch 0101-bpf-fix-divides-by-zero.patch -patch 0102-bpf-fix-32-bit-divide-by-zero.patch -patch 0103-bpf-reject-stores-into-ctx-via-st-and-xadd.patch -patch 0001-kaiser-fix-intel_bts-perf-crashes.patch -patch 0002-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch -patch 0003-kaiser-allocate-pgd-with-order-0-when-pti-off.patch -patch 0004-x86-asm-Fix-inline-asm-call-constraints-for-GCC-4.4.patch -patch 0005-x86-microcode-AMD-Do-not-load-when-running-on-a-hype.patch -patch 0006-x86-retpoline-Remove-the-esp-rsp-thunk.patch -patch 0007-module-retpoline-Warn-about-missing-retpoline-in-mod.patch -patch 0008-x86-cpufeatures-Add-CPUID_7_EDX-CPUID-leaf.patch -patch 0009-x86-cpufeatures-Add-Intel-feature-bits-for-Speculati.patch -patch 0010-x86-cpufeatures-Add-AMD-feature-bits-for-Speculation.patch -patch 0011-x86-msr-Add-definitions-for-new-speculation-control-.patch -patch 0012-x86-pti-Do-not-enable-PTI-on-CPUs-which-are-not-vuln.patch -patch 0013-x86-cpufeature-Blacklist-SPEC_CTRL-PRED_CMD-on-early.patch -patch 0014-x86-speculation-Add-basic-IBPB-Indirect-Branch-Predi.patch -patch 0015-x86-nospec-Fix-header-guards-names.patch -patch 0016-x86-bugs-Drop-one-mitigation-from-dmesg.patch -patch 0017-x86-cpu-bugs-Make-retpoline-module-warning-condition.patch -patch 0018-x86-cpufeatures-Clean-up-Spectre-v2-related-CPUID-fl.patch -patch 0019-x86-retpoline-Simplify-vmexit_fill_RSB.patch -patch 0020-x86-spectre-Check-CONFIG_RETPOLINE-in-command-line-p.patch -patch 0021-x86-entry-64-Remove-the-SYSCALL64-fast-path.patch -patch 0022-x86-entry-64-Push-extra-regs-right-away.patch -patch 0024-Documentation-Document-array_index_nospec.patch -patch 0025-array_index_nospec-Sanitize-speculative-array-de-ref.patch -patch 0026-x86-Implement-array_index_mask_nospec.patch -patch 0027-x86-Introduce-barrier_nospec.patch -patch 0028-x86-Introduce-__uaccess_begin_nospec-and-uaccess_try.patch -patch 0029-x86-usercopy-Replace-open-coded-stac-clac-with-__uac.patch -patch 0030-x86-uaccess-Use-__uaccess_begin_nospec-and-uaccess_t.patch -patch 0031-x86-get_user-Use-pointer-masking-to-limit-speculatio.patch -patch 0032-x86-syscall-Sanitize-syscall-table-de-references-und.patch -patch 0033-vfs-fdtable-Prevent-bounds-check-bypass-via-speculat.patch -patch 0034-x86-spectre-Report-get_user-mitigation-for-spectre_v.patch -patch 0035-x86-spectre-Fix-spelling-mistake-vunerable-vulnerabl.patch -patch 0036-x86-cpuid-Fix-up-virtual-IBRS-IBPB-STIBP-feature-bit.patch -patch 0037-x86-retpoline-Avoid-retpolines-for-built-in-__init-f.patch -patch 0038-x86-spectre-Simplify-spectre_v2-command-line-parsing.patch -patch 0039-x86-pti-Mark-constant-arrays-as-__initconst.patch -patch 0040-x86-speculation-Fix-typo-IBRS_ATT-which-should-be-IB.patch -patch 0041-x86-microcode-Do-the-family-check-first.patch -patch 0001-kaiser-fix-compile-error-without-vsyscall.patch -patch 0002-x86-entry-64-compat-Clear-registers-for-compat-sysca.patch -patch 0003-x86-speculation-Update-Speculation-Control-microcode.patch -patch 0004-x86-speculation-Correct-Speculation-Control-microcod.patch -patch 0005-x86-speculation-Clean-up-various-Spectre-related-det.patch -patch 0006-x86-speculation-Fix-up-array_index_nospec_mask-asm-c.patch -patch 0007-x86-speculation-Add-asm-msr-index.h-dependency.patch -patch 0008-x86-cpu-Rename-cpu_data.x86_mask-to-cpu_data.x86_ste.patch -patch 0009-x86-spectre-Fix-an-error-message.patch -patch 0010-x86-cpu-Change-type-of-x86_cache_size-variable-to-un.patch -patch 0011-x86-microcode-AMD-Change-load_microcode_amd-s-param-.patch -patch 0012-x86-entry-64-Clear-extra-registers-beyond-syscall-ar.patch -patch 0001-x86-mm-Remove-flush_tlb-and-flush_tlb_current_task.patch -patch 0002-x86-mm-Make-flush_tlb_mm_range-more-predictable.patch -patch 0003-x86-mm-Reimplement-flush_tlb_page-using-flush_tlb_mm.patch -patch 0004-x86-mm-Remove-the-UP-asm-tlbflush.h-code-always-use-.patch -patch 0005-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch -patch 0006-x86-speculation-Use-Indirect-Branch-Prediction-Barri.patch -patch 0007-bpf-x64-implement-retpoline-for-tail-call.patch -patch 0008-x86-spectre_v2-Don-t-check-microcode-versions-when-r.patch -patch 0009-Revert-x86-retpoline-Simplify-vmexit_fill_RSB.patch -patch 0010-x86-speculation-Use-IBRS-if-available-before-calling.patch -patch 0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch -patch 0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch -patch 0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch -patch 0014-x86-speculation-Move-firmware_restrict_branch_specul.patch -patch 0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch -patch 0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch -patch 0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch -patch 0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch -patch 0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch -patch 0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch -patch 0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch -patch 0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch -patch 0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch -patch 0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch -patch 0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch -patch 0012-KVM-VMX-Make-indirect-call-speculation-safe.patch -patch 0013-x86-kvm-Update-spectre-v1-mitigation.patch -patch 0014-KVM-nVMX-kmap-can-t-fail.patch -patch 0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch -patch 0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch -patch 0017-KVM-nVMX-Eliminate-vmcs02-pool.patch -patch 0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch -patch 0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch -patch 0020-KVM-x86-Add-IBPB-support.patch -patch 0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch -patch 0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch -patch 0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch -patch 0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch -patch 0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch -patch 0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch -patch 0027-x86-add-MULTIUSER-dependency-for-KVM.patch -patch 0028-KVM-add-X86_LOCAL_APIC-dependency.patch -patch 0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch -patch 0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch -patch 0031-KVM-nVMX-invvpid-handling-improvements.patch -patch 0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch -patch 0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch -patch 0001-x86-paravirt-objtool-Annotate-indirect-calls.patch -patch 0002-x86-module-Detect-and-skip-invalid-relocations.patch -patch 0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch -patch 0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch -patch 0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch -patch 0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch -patch 0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch -patch 0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch -patch 0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch -patch 0010-kvm-x86-fix-icebp-instruction-handling.patch -patch 0011-bpf-x64-increase-number-of-passes.patch -patch 0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch -patch 0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch -patch 0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch -patch 0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch -patch 0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch -patch 0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch -patch 0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch -patch 0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch -patch 0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch -patch 0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch -patch 0022-x86-nospec-Simplify-alternative_msr_write.patch -patch 0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch -patch 0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch -patch 0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch -patch 0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch -patch 0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch -patch 0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch -patch 0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch -patch 0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch -patch 0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch -patch 0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch -patch 0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch -patch 0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch -patch 0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch -patch 0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch -patch 0037-x86-process-Optimize-TIF_NOTSC-switch.patch -patch 0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch -patch 0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch -patch 0040-nospec-Move-array_index_nospec-parameter-checking-in.patch -patch 0041-nospec-Allow-index-argument-to-have-const-qualified-.patch -patch 0042-nospec-Kill-array_index_nospec_mask_check.patch -patch 0043-nospec-Include-asm-barrier.h-dependency.patch -patch 0044-prctl-Add-speculation-control-prctls.patch -patch 0045-nospec-Allow-getting-setting-on-non-current-task.patch -patch 0046-x86-bugs-Make-boot-modes-__ro_after_init.patch -patch 0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch -patch 0048-proc-fix-coredump-vs-read-proc-stat-race.patch -patch 0049-proc-Provide-details-on-speculation-flaw-mitigations.patch -patch 0050-prctl-Add-force-disable-speculation.patch -patch 0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch -patch 0052-seccomp-Enable-speculation-flaw-mitigations.patch -patch 0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch -patch 0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch -patch 0055-seccomp-Move-speculation-migitation-control-to-arch-.patch -patch 0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch -patch 0057-x86-bugs-Rename-_RDS-to-_SSBD.patch -patch 0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch -patch 0059-x86-bugs-Make-cpu_show_common-static.patch -patch 0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch -patch 0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch -patch 0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch -patch 0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch -patch 0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch -patch 0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch -patch 0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch -patch 0067-x86-cpufeatures-Add-FEATURE_ZEN.patch -patch 0068-x86-speculation-Handle-HT-correctly-on-AMD.patch -patch 0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch -patch 0070-x86-speculation-Add-virtualized-speculative-store-by.patch -patch 0071-x86-speculation-Rework-speculative_store_bypass_upda.patch -patch 0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch -patch 0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch -patch 0074-x86-bugs-Remove-x86_spec_ctrl_set.patch -patch 0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch -patch 0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch -patch 0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch -patch 0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch -patch 0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch -patch 0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch -patch 0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch -patch 0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch -patch 0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch -patch 0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch -patch 0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch -patch 0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch -patch 0087-objtool-Move-checking-code-to-check.c.patch -patch 0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch -patch 0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch -patch 0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch -patch 0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch -patch 0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch -patch 0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch -patch 0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch -patch 0002-complete-e390f9a-port-for-v4.9.106.patch -patch 0003-KVM-x86-introduce-linear_-read-write-_system.patch -patch 0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch -patch 0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch -patch 0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch -patch 0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch -patch 0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch -patch 0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch -patch 0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto-rt.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto-rt.patch deleted file mode 100644 index 4ce23f3a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto-rt.patch +++ /dev/null @@ -1,186 +0,0 @@ -From c4c4bc077dd11d2b5626a7033a265e78c43ccf8e Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sun, 28 Jan 2018 10:38:50 -0800 -Subject: [PATCH] x86/asm: Move 'status' from thread_struct to thread_info - -(cherry picked from commit 37a8f7c38339b22b69876d6f5a0ab851565284e3) - -The TS_COMPAT bit is very hot and is accessed from code paths that mostly -also touch thread_info::flags. Move it into struct thread_info to improve -cache locality. - -The only reason it was in thread_struct is that there was a brief period -during which arch-specific fields were not allowed in struct thread_info. - -Linus suggested further changing: - - ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); - -to: - - if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED))) - ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); - -on the theory that frequently dirtying the cacheline even in pure 64-bit -code that never needs to modify status hurts performance. That could be a -reasonable followup patch, but I suspect it matters less on top of this -patch. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Acked-by: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Kernel Hardening <kernel-hardening@lists.openwall.com> -Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/common.c | 4 ++-- - arch/x86/include/asm/processor.h | 2 -- - arch/x86/include/asm/syscall.h | 6 +++--- - arch/x86/include/asm/thread_info.h | 3 ++- - arch/x86/kernel/process_64.c | 4 ++-- - arch/x86/kernel/ptrace.c | 2 +- - arch/x86/kernel/signal.c | 2 +- - 7 files changed, 11 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c -index 56d01a339ba4..021cebbdab92 100644 ---- a/arch/x86/entry/common.c -+++ b/arch/x86/entry/common.c -@@ -208,7 +208,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) - * special case only applies after poking regs and before the - * very next return to user mode. - */ -- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); -+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); - #endif - - user_enter_irqoff(); -@@ -306,7 +306,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) - unsigned int nr = (unsigned int)regs->orig_ax; - - #ifdef CONFIG_IA32_EMULATION -- current->thread.status |= TS_COMPAT; -+ ti->status |= TS_COMPAT; - #endif - - if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index 83db0eae9979..f9cb6c03cf49 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -391,8 +391,6 @@ struct thread_struct { - unsigned short gsindex; - #endif - -- u32 status; /* thread synchronous flags */ -- - #ifdef CONFIG_X86_64 - unsigned long fsbase; - unsigned long gsbase; -diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h -index e3c95e8e61c5..03eedc21246d 100644 ---- a/arch/x86/include/asm/syscall.h -+++ b/arch/x86/include/asm/syscall.h -@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, - * TS_COMPAT is set for 32-bit syscall entries and then - * remains set until we return to user mode. - */ -- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) -+ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) - /* - * Sign-extend the value so (int)-EFOO becomes (long)-EFOO - * and will match correctly in comparisons. -@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, - unsigned long *args) - { - # ifdef CONFIG_IA32_EMULATION -- if (task->thread.status & TS_COMPAT) -+ if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; -@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, - const unsigned long *args) - { - # ifdef CONFIG_IA32_EMULATION -- if (task->thread.status & TS_COMPAT) -+ if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index 5ceb3a1c2b1a..f581e2642a9f 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -56,6 +56,7 @@ struct thread_info { - unsigned long flags; /* low level flags */ - int preempt_lazy_count; /* 0 => lazy preemptable - <0 => BUG */ -+ u32 status; /* thread synchronous flags */ - }; - - #define INIT_THREAD_INFO(tsk) \ -@@ -235,7 +236,7 @@ static inline int arch_within_stack_frames(const void * const stack, - #define in_ia32_syscall() true - #else - #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ -- current->thread.status & TS_COMPAT) -+ current_thread_info()->status & TS_COMPAT) - #endif - - /* -diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c -index b3760b3c1ca0..dca15e11d6ae 100644 ---- a/arch/x86/kernel/process_64.c -+++ b/arch/x86/kernel/process_64.c -@@ -512,7 +512,7 @@ void set_personality_ia32(bool x32) - current->personality &= ~READ_IMPLIES_EXEC; - /* in_compat_syscall() uses the presence of the x32 - syscall bit flag to determine compat status */ -- current->thread.status &= ~TS_COMPAT; -+ current_thread_info()->status &= ~TS_COMPAT; - } else { - set_thread_flag(TIF_IA32); - clear_thread_flag(TIF_X32); -@@ -520,7 +520,7 @@ void set_personality_ia32(bool x32) - current->mm->context.ia32_compat = TIF_IA32; - current->personality |= force_personality32; - /* Prepare the first "return" to user space */ -- current->thread.status |= TS_COMPAT; -+ current_thread_info()->status |= TS_COMPAT; - } - } - EXPORT_SYMBOL_GPL(set_personality_ia32); -diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c -index 0e63c0267f99..e497d374412a 100644 ---- a/arch/x86/kernel/ptrace.c -+++ b/arch/x86/kernel/ptrace.c -@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) - */ - regs->orig_ax = value; - if (syscall_get_nr(child, regs) >= 0) -- child->thread.status |= TS_I386_REGS_POKED; -+ child->thread_info.status |= TS_I386_REGS_POKED; - break; - - case offsetof(struct user32, regs.eflags): -diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c -index 763af1d0de64..b1a5d252d482 100644 ---- a/arch/x86/kernel/signal.c -+++ b/arch/x86/kernel/signal.c -@@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) - * than the tracee. - */ - #ifdef CONFIG_IA32_EMULATION -- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) -+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) - return __NR_ia32_restart_syscall; - #endif - #ifdef CONFIG_X86_X32_ABI --- -2.11.1 - diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto.patch deleted file mode 100644 index 28daae3a..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto.patch +++ /dev/null @@ -1,187 +0,0 @@ -From be756593e543b901edc0e7489949f08c8d2737bf Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski <luto@kernel.org> -Date: Sun, 28 Jan 2018 10:38:50 -0800 -Subject: [PATCH 23/42] x86/asm: Move 'status' from thread_struct to - thread_info - -(cherry picked from commit 37a8f7c38339b22b69876d6f5a0ab851565284e3) - -The TS_COMPAT bit is very hot and is accessed from code paths that mostly -also touch thread_info::flags. Move it into struct thread_info to improve -cache locality. - -The only reason it was in thread_struct is that there was a brief period -during which arch-specific fields were not allowed in struct thread_info. - -Linus suggested further changing: - - ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); - -to: - - if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED))) - ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); - -on the theory that frequently dirtying the cacheline even in pure 64-bit -code that never needs to modify status hurts performance. That could be a -reasonable followup patch, but I suspect it matters less on top of this -patch. - -Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> -Signed-off-by: Andy Lutomirski <luto@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Ingo Molnar <mingo@kernel.org> -Acked-by: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Borislav Petkov <bp@alien8.de> -Cc: Kernel Hardening <kernel-hardening@lists.openwall.com> -Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org -Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/common.c | 4 ++-- - arch/x86/include/asm/processor.h | 2 -- - arch/x86/include/asm/syscall.h | 6 +++--- - arch/x86/include/asm/thread_info.h | 3 ++- - arch/x86/kernel/process_64.c | 4 ++-- - arch/x86/kernel/ptrace.c | 2 +- - arch/x86/kernel/signal.c | 2 +- - 7 files changed, 11 insertions(+), 12 deletions(-) - -diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c -index bdd9cc5..bd1d102 100644 ---- a/arch/x86/entry/common.c -+++ b/arch/x86/entry/common.c -@@ -201,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) - * special case only applies after poking regs and before the - * very next return to user mode. - */ -- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); -+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); - #endif - - user_enter_irqoff(); -@@ -299,7 +299,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) - unsigned int nr = (unsigned int)regs->orig_ax; - - #ifdef CONFIG_IA32_EMULATION -- current->thread.status |= TS_COMPAT; -+ ti->status |= TS_COMPAT; - #endif - - if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index e40b19c..a781668 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -391,8 +391,6 @@ struct thread_struct { - unsigned short gsindex; - #endif - -- u32 status; /* thread synchronous flags */ -- - #ifdef CONFIG_X86_64 - unsigned long fsbase; - unsigned long gsbase; -diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h -index e3c95e8..03eedc2 100644 ---- a/arch/x86/include/asm/syscall.h -+++ b/arch/x86/include/asm/syscall.h -@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, - * TS_COMPAT is set for 32-bit syscall entries and then - * remains set until we return to user mode. - */ -- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) -+ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) - /* - * Sign-extend the value so (int)-EFOO becomes (long)-EFOO - * and will match correctly in comparisons. -@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, - unsigned long *args) - { - # ifdef CONFIG_IA32_EMULATION -- if (task->thread.status & TS_COMPAT) -+ if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; -@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, - const unsigned long *args) - { - # ifdef CONFIG_IA32_EMULATION -- if (task->thread.status & TS_COMPAT) -+ if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index bdf9c4c..89978b9 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -54,6 +54,7 @@ struct task_struct; - - struct thread_info { - unsigned long flags; /* low level flags */ -+ u32 status; /* thread synchronous flags */ - }; - - #define INIT_THREAD_INFO(tsk) \ -@@ -213,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack, - #define in_ia32_syscall() true - #else - #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ -- current->thread.status & TS_COMPAT) -+ current_thread_info()->status & TS_COMPAT) - #endif - - /* -diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c -index b3760b3..dca15e1 100644 ---- a/arch/x86/kernel/process_64.c -+++ b/arch/x86/kernel/process_64.c -@@ -512,7 +512,7 @@ void set_personality_ia32(bool x32) - current->personality &= ~READ_IMPLIES_EXEC; - /* in_compat_syscall() uses the presence of the x32 - syscall bit flag to determine compat status */ -- current->thread.status &= ~TS_COMPAT; -+ current_thread_info()->status &= ~TS_COMPAT; - } else { - set_thread_flag(TIF_IA32); - clear_thread_flag(TIF_X32); -@@ -520,7 +520,7 @@ void set_personality_ia32(bool x32) - current->mm->context.ia32_compat = TIF_IA32; - current->personality |= force_personality32; - /* Prepare the first "return" to user space */ -- current->thread.status |= TS_COMPAT; -+ current_thread_info()->status |= TS_COMPAT; - } - } - EXPORT_SYMBOL_GPL(set_personality_ia32); -diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c -index 0e63c02..e497d37 100644 ---- a/arch/x86/kernel/ptrace.c -+++ b/arch/x86/kernel/ptrace.c -@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) - */ - regs->orig_ax = value; - if (syscall_get_nr(child, regs) >= 0) -- child->thread.status |= TS_I386_REGS_POKED; -+ child->thread_info.status |= TS_I386_REGS_POKED; - break; - - case offsetof(struct user32, regs.eflags): -diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c -index 763af1d..b1a5d25 100644 ---- a/arch/x86/kernel/signal.c -+++ b/arch/x86/kernel/signal.c -@@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) - * than the tracee. - */ - #ifdef CONFIG_IA32_EMULATION -- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) -+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) - return __NR_ia32_restart_syscall; - #endif - #ifdef CONFIG_X86_X32_ABI --- -2.7.4 - diff --git a/common/recipes-kernel/linux/linux-yocto-common_4.9.inc b/common/recipes-kernel/linux/linux-yocto-common_4.9.inc deleted file mode 100644 index 20bfc9cf..00000000 --- a/common/recipes-kernel/linux/linux-yocto-common_4.9.inc +++ /dev/null @@ -1,19 +0,0 @@ -FILESEXTRAPATHS_prepend := "${THISDIR}/linux-yocto-${LINUX_VERSION}:" - -PR := "${INC_PR}.1" - -KMACHINE_amdx86 ?= "common-pc-64" -SRCREV_meta_amdx86 ?= "803b8d600e45afa0375459bf599fe365571a3866" -LINUX_VERSION_amdx86 ?= "4.9.21" - -SRC_URI_append_amdx86 += " \ - file://upstream-backports.scc \ - file://hid.cfg \ - file://efi-partition.cfg \ - file://usb-serial.cfg \ - file://wifi-drivers.cfg \ - ${@bb.utils.contains('DISTRO_FEATURES', 'bluetooth', 'file://enable-bluetooth.cfg', 'file://disable-bluetooth.cfg', d)} \ - ${@bb.utils.contains('DISTRO', 'mel', 'file://enable-kgdb.cfg', 'file://disable-kgdb.cfg', d)} \ -" - -KERNEL_FEATURES_append_amdx86 = " cfg/smp.scc" diff --git a/common/recipes-kernel/linux/linux-yocto-rt_4.9.bb b/common/recipes-kernel/linux/linux-yocto-rt_4.9.bb deleted file mode 100644 index 4ca78831..00000000 --- a/common/recipes-kernel/linux/linux-yocto-rt_4.9.bb +++ /dev/null @@ -1,38 +0,0 @@ -KBRANCH ?= "standard/preempt-rt/base" - -require recipes-kernel/linux/linux-yocto.inc - -# Skip processing of this recipe if it is not explicitly specified as the -# PREFERRED_PROVIDER for virtual/kernel. This avoids errors when trying -# to build multiple virtual/kernel providers, e.g. as dependency of -# core-image-rt-sdk, core-image-rt. -python () { - if d.getVar("PREFERRED_PROVIDER_virtual/kernel") != "linux-yocto-rt": - raise bb.parse.SkipPackage("Set PREFERRED_PROVIDER_virtual/kernel to linux-yocto-rt to enable it") -} - -SRCREV_machine ?= "b6f5c933ce66e4a00412bb5c5691a2ab4187c979" -SRCREV_meta ?= "803b8d600e45afa0375459bf599fe365571a3866" - -SRC_URI = "git://git.yoctoproject.org/linux-yocto-4.9.git;branch=${KBRANCH};name=machine \ - git://git.yoctoproject.org/yocto-kernel-cache;type=kmeta;name=meta;branch=yocto-4.9;destsuffix=${KMETA}" - -LINUX_VERSION ?= "4.9.21" - -PV = "${LINUX_VERSION}+git${SRCPV}" - -KMETA = "kernel-meta" -KCONF_BSP_AUDIT_LEVEL = "2" - -LINUX_KERNEL_TYPE = "preempt-rt" - -COMPATIBLE_MACHINE = "(qemux86|qemux86-64|qemuarm|qemuppc|qemumips)" - -KERNEL_DEVICETREE_qemuarm = "versatile-pb.dtb" - -# Functionality flags -KERNEL_EXTRA_FEATURES ?= "features/netfilter/netfilter.scc features/taskstats/taskstats.scc" -KERNEL_FEATURES_append = " ${KERNEL_EXTRA_FEATURES}" -KERNEL_FEATURES_append_qemuall=" cfg/virtio.scc" -KERNEL_FEATURES_append_qemux86=" cfg/sound.scc cfg/paravirt_kvm.scc" -KERNEL_FEATURES_append_qemux86-64=" cfg/sound.scc" diff --git a/common/recipes-kernel/linux/linux-yocto-rt_4.9.bbappend b/common/recipes-kernel/linux/linux-yocto-rt_4.9.bbappend deleted file mode 100644 index 50e232e2..00000000 --- a/common/recipes-kernel/linux/linux-yocto-rt_4.9.bbappend +++ /dev/null @@ -1,4 +0,0 @@ -require linux-yocto-common_4.9.inc -KBRANCH_amdx86 ?= "standard/preempt-rt/base" -SRCREV_machine_amdx86 ?= "b6f5c933ce66e4a00412bb5c5691a2ab4187c979" -SRC_URI_append_amdx86 = " file://x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto-rt.patch" diff --git a/common/recipes-kernel/linux/linux-yocto_4.9.bb b/common/recipes-kernel/linux/linux-yocto_4.9.bb deleted file mode 100644 index d6b96ed8..00000000 --- a/common/recipes-kernel/linux/linux-yocto_4.9.bb +++ /dev/null @@ -1,44 +0,0 @@ -KBRANCH ?= "standard/base" - -require recipes-kernel/linux/linux-yocto.inc - -# board specific branches -KBRANCH_qemuarm ?= "standard/arm-versatile-926ejs" -KBRANCH_qemuarm64 ?= "standard/qemuarm64" -KBRANCH_qemumips ?= "standard/mti-malta32" -KBRANCH_qemuppc ?= "standard/qemuppc" -KBRANCH_qemux86 ?= "standard/base" -KBRANCH_qemux86-64 ?= "standard/base" -KBRANCH_qemumips64 ?= "standard/mti-malta64" - -SRCREV_machine_qemuarm ?= "bb5856d9b25d13c63a757a408cc8793d6f6302fa" -SRCREV_machine_qemuarm64 ?= "81055b89bd32414ecaf95156ce9a5fa6643e530a" -SRCREV_machine_qemumips ?= "4b150e5ceca8b2c4fb01f74b1f7a57cdeae63762" -SRCREV_machine_qemuppc ?= "81055b89bd32414ecaf95156ce9a5fa6643e530a" -SRCREV_machine_qemux86 ?= "81055b89bd32414ecaf95156ce9a5fa6643e530a" -SRCREV_machine_qemux86-64 ?= "81055b89bd32414ecaf95156ce9a5fa6643e530a" -SRCREV_machine_qemumips64 ?= "b300b94b77ed9fbb1c80a2d2441403ad2b7694a9" -SRCREV_machine ?= "81055b89bd32414ecaf95156ce9a5fa6643e530a" -SRCREV_meta ?= "803b8d600e45afa0375459bf599fe365571a3866" - -SRC_URI = "git://git.yoctoproject.org/linux-yocto-4.9.git;name=machine;branch=${KBRANCH}; \ - git://git.yoctoproject.org/yocto-kernel-cache;type=kmeta;name=meta;branch=yocto-4.9;destsuffix=${KMETA}" - -LINUX_VERSION ?= "4.9.21" - -PV = "${LINUX_VERSION}+git${SRCPV}" - -KMETA = "kernel-meta" -KCONF_BSP_AUDIT_LEVEL = "2" - -KERNEL_DEVICETREE_qemuarm = "versatile-pb.dtb" - -COMPATIBLE_MACHINE = "qemuarm|qemuarm64|qemux86|qemuppc|qemumips|qemumips64|qemux86-64" - -# Functionality flags -KERNEL_EXTRA_FEATURES ?= "features/netfilter/netfilter.scc" -KERNEL_FEATURES_append = " ${KERNEL_EXTRA_FEATURES}" -KERNEL_FEATURES_append_qemuall=" cfg/virtio.scc" -KERNEL_FEATURES_append_qemux86=" cfg/sound.scc cfg/paravirt_kvm.scc" -KERNEL_FEATURES_append_qemux86-64=" cfg/sound.scc cfg/paravirt_kvm.scc" -KERNEL_FEATURES_append = " ${@bb.utils.contains("TUNE_FEATURES", "mx32", " cfg/x32.scc", "" ,d)}" diff --git a/common/recipes-kernel/linux/linux-yocto_4.9.bbappend b/common/recipes-kernel/linux/linux-yocto_4.9.bbappend deleted file mode 100644 index 4fb27974..00000000 --- a/common/recipes-kernel/linux/linux-yocto_4.9.bbappend +++ /dev/null @@ -1,4 +0,0 @@ -require linux-yocto-common_4.9.inc -KBRANCH_amdx86 ?= "standard/base" -SRCREV_machine_amdx86 ?= "81055b89bd32414ecaf95156ce9a5fa6643e530a" -SRC_URI_append_amdx86 = " file://x86-asm-Move-status-from-thread_struct-to-thread_inf-linux-yocto.patch" |