diff options
178 files changed, 30907 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch new file mode 100644 index 00000000..9772c5f8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch @@ -0,0 +1,165 @@ +From af0e9ccc133f03f5150a7afba349a9f50897f793 Mon Sep 17 00:00:00 2001 +From: Wanpeng Li <wanpeng.li@hotmail.com> +Date: Thu, 14 Dec 2017 17:40:50 -0800 +Subject: [PATCH 01/33] KVM: Fix stack-out-of-bounds read in write_mmio +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit e39d200fa5bf5b94a0948db0dae44c1b73b84a56 upstream. + +Reported by syzkaller: + + BUG: KASAN: stack-out-of-bounds in write_mmio+0x11e/0x270 [kvm] + Read of size 8 at addr ffff8803259df7f8 by task syz-executor/32298 + + CPU: 6 PID: 32298 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #18 + Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016 + Call Trace: + dump_stack+0xab/0xe1 + print_address_description+0x6b/0x290 + kasan_report+0x28a/0x370 + write_mmio+0x11e/0x270 [kvm] + emulator_read_write_onepage+0x311/0x600 [kvm] + emulator_read_write+0xef/0x240 [kvm] + emulator_fix_hypercall+0x105/0x150 [kvm] + em_hypercall+0x2b/0x80 [kvm] + x86_emulate_insn+0x2b1/0x1640 [kvm] + x86_emulate_instruction+0x39a/0xb90 [kvm] + handle_exception+0x1b4/0x4d0 [kvm_intel] + vcpu_enter_guest+0x15a0/0x2640 [kvm] + kvm_arch_vcpu_ioctl_run+0x549/0x7d0 [kvm] + kvm_vcpu_ioctl+0x479/0x880 [kvm] + do_vfs_ioctl+0x142/0x9a0 + SyS_ioctl+0x74/0x80 + entry_SYSCALL_64_fastpath+0x23/0x9a + +The path of patched vmmcall will patch 3 bytes opcode 0F 01 C1(vmcall) +to the guest memory, however, write_mmio tracepoint always prints 8 bytes +through *(u64 *)val since kvm splits the mmio access into 8 bytes. This +leaks 5 bytes from the kernel stack (CVE-2017-17741). This patch fixes +it by just accessing the bytes which we operate on. + +Before patch: + +syz-executor-5567 [007] .... 51370.561696: kvm_mmio: mmio write len 3 gpa 0x10 val 0x1ffff10077c1010f + +After patch: + +syz-executor-13416 [002] .... 51302.299573: kvm_mmio: mmio write len 3 gpa 0x10 val 0xc1010f + +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Reviewed-by: Darren Kenny <darren.kenny@oracle.com> +Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> +Tested-by: Marc Zyngier <marc.zyngier@arm.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: Marc Zyngier <marc.zyngier@arm.com> +Cc: Christoffer Dall <christoffer.dall@linaro.org> +Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/arm/kvm/mmio.c | 6 +++--- + arch/x86/kvm/x86.c | 8 ++++---- + include/trace/events/kvm.h | 7 +++++-- + 3 files changed, 12 insertions(+), 9 deletions(-) + +diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c +index b6e715f..dac7ceb 100644 +--- a/arch/arm/kvm/mmio.c ++++ b/arch/arm/kvm/mmio.c +@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) + } + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, +- data); ++ &data); + data = vcpu_data_host_to_guest(vcpu, data, len); + vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); + } +@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), + len); + +- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); ++ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); + kvm_mmio_write_buf(data_buf, len, data); + + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, +- fault_ipa, 0); ++ fault_ipa, NULL); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 51a700a..9cc9117 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4242,7 +4242,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) + addr, n, v)) + && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) + break; +- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); ++ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); + handled += n; + addr += n; + len -= n; +@@ -4495,7 +4495,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) + { + if (vcpu->mmio_read_completed) { + trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, +- vcpu->mmio_fragments[0].gpa, *(u64 *)val); ++ vcpu->mmio_fragments[0].gpa, val); + vcpu->mmio_read_completed = 0; + return 1; + } +@@ -4517,14 +4517,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, + + static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) + { +- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); ++ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); + return vcpu_mmio_write(vcpu, gpa, bytes, val); + } + + static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, + void *val, int bytes) + { +- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); ++ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); + return X86EMUL_IO_NEEDED; + } + +diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h +index 8ade3eb..90fce4d 100644 +--- a/include/trace/events/kvm.h ++++ b/include/trace/events/kvm.h +@@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq, + { KVM_TRACE_MMIO_WRITE, "write" } + + TRACE_EVENT(kvm_mmio, +- TP_PROTO(int type, int len, u64 gpa, u64 val), ++ TP_PROTO(int type, int len, u64 gpa, void *val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( +@@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio, + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; +- __entry->val = val; ++ __entry->val = 0; ++ if (val) ++ memcpy(&__entry->val, val, ++ min_t(u32, sizeof(__entry->val), len)); + ), + + TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch new file mode 100644 index 00000000..64e0004b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch @@ -0,0 +1,39 @@ +From 122fd9dfb506c08b0a3093d6da080983cdf91e32 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings <ben@decadent.org.uk> +Date: Tue, 12 Jun 2018 01:14:34 +0100 +Subject: [PATCH 01/10] KVM: VMX: Expose SSBD properly to guests, 4.9 + supplement + +Fix an additional misuse of X86_FEATURE_SSBD in +guest_cpuid_has_spec_ctrl(). This function was introduced in the +backport of SSBD support to 4.9 and is not present upstream, so it was +not fixed by commit 43462d908821 "KVM: VMX: Expose SSBD properly to +guests." + +Fixes: 52817587e706 ("x86/cpufeatures: Disentangle SSBD enumeration") +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: kvm@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/cpuid.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index d22695c..cf503df 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -171,7 +171,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) + if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); +- return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); ++ return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SPEC_CTRL_SSBD))); + } + + static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-paravirt-objtool-Annotate-indirect-calls.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-paravirt-objtool-Annotate-indirect-calls.patch new file mode 100644 index 00000000..fddb3346 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-x86-paravirt-objtool-Annotate-indirect-calls.patch @@ -0,0 +1,129 @@ +From 8b18def6a2da1b716f49fad6744a41c94d31a2c5 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Wed, 17 Jan 2018 16:58:11 +0100 +Subject: [PATCH 01/93] x86/paravirt, objtool: Annotate indirect calls + +commit 3010a0663fd949d122eca0561b06b0a9453f7866 upstream. + +Paravirt emits indirect calls which get flagged by objtool retpoline +checks, annotate it away because all these indirect calls will be +patched out before we start userspace. + +This patching happens through alternative_instructions() -> +apply_paravirt() -> pv_init_ops.patch() which will eventually end up +in paravirt_patch_default(). This function _will_ write direct +alternatives. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/paravirt.h | 16 ++++++++++++---- + arch/x86/include/asm/paravirt_types.h | 5 ++++- + 2 files changed, 16 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h +index ce93281..24af8b1 100644 +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -6,6 +6,7 @@ + #ifdef CONFIG_PARAVIRT + #include <asm/pgtable_types.h> + #include <asm/asm.h> ++#include <asm/nospec-branch.h> + + #include <asm/paravirt_types.h> + +@@ -869,23 +870,27 @@ extern void default_banner(void); + + #define INTERRUPT_RETURN \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ +- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) ++ ANNOTATE_RETPOLINE_SAFE; \ ++ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) + + #define DISABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ ++ ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) + + #define ENABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ ++ ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) + + #ifdef CONFIG_X86_32 + #define GET_CR0_INTO_EAX \ + push %ecx; push %edx; \ ++ ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ + pop %edx; pop %ecx + #else /* !CONFIG_X86_32 */ +@@ -907,11 +912,13 @@ extern void default_banner(void); + */ + #define SWAPGS \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ +- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ ++ ANNOTATE_RETPOLINE_SAFE; \ ++ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ + ) + + #define GET_CR2_INTO_RAX \ +- call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) ++ ANNOTATE_RETPOLINE_SAFE; \ ++ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); + + #define PARAVIRT_ADJUST_EXCEPTION_FRAME \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ +@@ -921,7 +928,8 @@ extern void default_banner(void); + #define USERGS_SYSRET64 \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ + CLBR_NONE, \ +- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) ++ ANNOTATE_RETPOLINE_SAFE; \ ++ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) + #endif /* CONFIG_X86_32 */ + + #endif /* __ASSEMBLY__ */ +diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h +index 0f400c0..04b7971 100644 +--- a/arch/x86/include/asm/paravirt_types.h ++++ b/arch/x86/include/asm/paravirt_types.h +@@ -42,6 +42,7 @@ + #include <asm/desc_defs.h> + #include <asm/kmap_types.h> + #include <asm/pgtable_types.h> ++#include <asm/nospec-branch.h> + + struct page; + struct thread_struct; +@@ -391,7 +392,9 @@ int paravirt_disable_iospace(void); + * offset into the paravirt_patch_template structure, and can therefore be + * freely converted back into a structure offset. + */ +-#define PARAVIRT_CALL "call *%c[paravirt_opptr];" ++#define PARAVIRT_CALL \ ++ ANNOTATE_RETPOLINE_SAFE \ ++ "call *%c[paravirt_opptr];" + + /* + * These macros are intended to wrap calls through one of the paravirt +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-complete-e390f9a-port-for-v4.9.106.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-complete-e390f9a-port-for-v4.9.106.patch new file mode 100644 index 00000000..dbde0c07 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-complete-e390f9a-port-for-v4.9.106.patch @@ -0,0 +1,69 @@ +From 22510b00481d95adc62292797fe98fbfe215a649 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philip=20M=C3=BCller?= <philm@manjaro.org> +Date: Sat, 9 Jun 2018 13:42:05 +0200 +Subject: [PATCH 02/10] complete e390f9a port for v4.9.106 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +objtool ports introduced in v4.9.106 were not totally complete. Therefore +they resulted in issues like: + + module: overflow in relocation type 10 val XXXXXXXXXXX + ‘usbcore’ likely not compiled with -mcmodel=kernel + module: overflow in relocation type 10 val XXXXXXXXXXX + ‘scsi_mod’ likely not compiled with -mcmodel=kernel + +Missing part was the complete backport of commit e390f9a. + +Original notes by Josh Poimboeuf: + +The '__unreachable' and '__func_stack_frame_non_standard' sections are +only used at compile time. They're discarded for vmlinux but they +should also be discarded for modules. + +Since this is a recurring pattern, prefix the section names with +".discard.". It's a nice convention and vmlinux.lds.h already discards +such sections. + +Also remove the 'a' (allocatable) flag from the __unreachable section +since it doesn't make sense for a discarded section. + +Signed-off-by: Philip Müller <philm@manjaro.org> +Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") +Link: https://gitlab.manjaro.org/packages/core/linux49/issues/2 +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/vmlinux.lds.S | 2 -- + include/linux/compiler-gcc.h | 2 +- + 2 files changed, 1 insertion(+), 3 deletions(-) + +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index 4ef267f..e783a5d 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -352,8 +352,6 @@ SECTIONS + DISCARDS + /DISCARD/ : { + *(.eh_frame) +- *(__func_stack_frame_non_standard) +- *(__unreachable) + } + } + +diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h +index b69d102..b62cfb9 100644 +--- a/include/linux/compiler-gcc.h ++++ b/include/linux/compiler-gcc.h +@@ -202,7 +202,7 @@ + #ifdef CONFIG_STACK_VALIDATION + #define annotate_unreachable() ({ \ + asm("1:\t\n" \ +- ".pushsection __unreachable, \"a\"\t\n" \ ++ ".pushsection .discard.unreachable\t\n" \ + ".long 1b\t\n" \ + ".popsection\t\n"); \ + }) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch new file mode 100644 index 00000000..406a79d3 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch @@ -0,0 +1,97 @@ +From 1cd771013c357075c745f99419bdaf31503c5a51 Mon Sep 17 00:00:00 2001 +From: Jim Mattson <jmattson@google.com> +Date: Wed, 3 Jan 2018 14:31:38 -0800 +Subject: [PATCH 02/33] kvm: vmx: Scrub hardware GPRs at VM-exit + +commit 0cb5b30698fdc8f6b4646012e3acb4ddce430788 upstream. + +Guest GPR values are live in the hardware GPRs at VM-exit. Do not +leave any guest values in hardware GPRs after the guest GPR values are +saved to the vcpu_vmx structure. + +This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753. +Specifically, it defeats the Project Zero PoC for CVE 2017-5715. + +Suggested-by: Eric Northup <digitaleric@google.com> +Signed-off-by: Jim Mattson <jmattson@google.com> +Reviewed-by: Eric Northup <digitaleric@google.com> +Reviewed-by: Benjamin Serebrin <serebrin@google.com> +Reviewed-by: Andrew Honig <ahonig@google.com> +[Paolo: Add AMD bits, Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>] +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/svm.c | 19 +++++++++++++++++++ + arch/x86/kvm/vmx.c | 14 +++++++++++++- + 2 files changed, 32 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 975ea99..491f077 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -4858,6 +4858,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + "mov %%r14, %c[r14](%[svm]) \n\t" + "mov %%r15, %c[r15](%[svm]) \n\t" + #endif ++ /* ++ * Clear host registers marked as clobbered to prevent ++ * speculative use. ++ */ ++ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" ++ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" ++ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" ++ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" ++ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" ++#ifdef CONFIG_X86_64 ++ "xor %%r8, %%r8 \n\t" ++ "xor %%r9, %%r9 \n\t" ++ "xor %%r10, %%r10 \n\t" ++ "xor %%r11, %%r11 \n\t" ++ "xor %%r12, %%r12 \n\t" ++ "xor %%r13, %%r13 \n\t" ++ "xor %%r14, %%r14 \n\t" ++ "xor %%r15, %%r15 \n\t" ++#endif + "pop %%" _ASM_BP + : + : [svm]"a"(svm), +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 4ead27f..91ae4e2 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -8932,6 +8932,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + /* Save guest registers, load host registers, keep flags */ + "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" + "pop %0 \n\t" ++ "setbe %c[fail](%0)\n\t" + "mov %%" _ASM_AX ", %c[rax](%0) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" + __ASM_SIZE(pop) " %c[rcx](%0) \n\t" +@@ -8948,12 +8949,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + "mov %%r13, %c[r13](%0) \n\t" + "mov %%r14, %c[r14](%0) \n\t" + "mov %%r15, %c[r15](%0) \n\t" ++ "xor %%r8d, %%r8d \n\t" ++ "xor %%r9d, %%r9d \n\t" ++ "xor %%r10d, %%r10d \n\t" ++ "xor %%r11d, %%r11d \n\t" ++ "xor %%r12d, %%r12d \n\t" ++ "xor %%r13d, %%r13d \n\t" ++ "xor %%r14d, %%r14d \n\t" ++ "xor %%r15d, %%r15d \n\t" + #endif + "mov %%cr2, %%" _ASM_AX " \n\t" + "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + ++ "xor %%eax, %%eax \n\t" ++ "xor %%ebx, %%ebx \n\t" ++ "xor %%esi, %%esi \n\t" ++ "xor %%edi, %%edi \n\t" + "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" +- "setbe %c[fail](%0) \n\t" + ".pushsection .rodata \n\t" + ".global vmx_return \n\t" + "vmx_return: " _ASM_PTR " 2b \n\t" +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-module-Detect-and-skip-invalid-relocations.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-module-Detect-and-skip-invalid-relocations.patch new file mode 100644 index 00000000..3035344f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-x86-module-Detect-and-skip-invalid-relocations.patch @@ -0,0 +1,77 @@ +From 23f4b6492ade30e2f7fc21acfb162e46851cf0f0 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Fri, 3 Nov 2017 07:58:54 -0500 +Subject: [PATCH 02/93] x86/module: Detect and skip invalid relocations + +commit eda9cec4c9a12208a6f69fbe68f72a6311d50032 upstream. + +There have been some cases where external tooling (e.g., kpatch-build) +creates a corrupt relocation which targets the wrong address. This is a +silent failure which can corrupt memory in unexpected places. + +On x86, the bytes of data being overwritten by relocations are always +initialized to zero beforehand. Use that knowledge to add sanity checks +to detect such cases before they corrupt memory. + +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: jeyu@kernel.org +Cc: live-patching@vger.kernel.org +Link: http://lkml.kernel.org/r/37450d6c6225e54db107fba447ce9e56e5f758e9.1509713553.git.jpoimboe@redhat.com +[ Restructured the messages, as it's unclear whether the relocation or the target is corrupted. ] +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Cc: Matthias Kaehlcke <mka@chromium.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/module.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c +index 477ae80..87f30a8 100644 +--- a/arch/x86/kernel/module.c ++++ b/arch/x86/kernel/module.c +@@ -171,19 +171,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, + case R_X86_64_NONE: + break; + case R_X86_64_64: ++ if (*(u64 *)loc != 0) ++ goto invalid_relocation; + *(u64 *)loc = val; + break; + case R_X86_64_32: ++ if (*(u32 *)loc != 0) ++ goto invalid_relocation; + *(u32 *)loc = val; + if (val != *(u32 *)loc) + goto overflow; + break; + case R_X86_64_32S: ++ if (*(s32 *)loc != 0) ++ goto invalid_relocation; + *(s32 *)loc = val; + if ((s64)val != *(s32 *)loc) + goto overflow; + break; + case R_X86_64_PC32: ++ if (*(u32 *)loc != 0) ++ goto invalid_relocation; + val -= (u64)loc; + *(u32 *)loc = val; + #if 0 +@@ -199,6 +207,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, + } + return 0; + ++invalid_relocation: ++ pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", ++ (int)ELF64_R_TYPE(rel[i].r_info), loc, val); ++ return -ENOEXEC; ++ + overflow: + pr_err("overflow in relocation type %d val %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), val); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch new file mode 100644 index 00000000..b53db2f4 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch @@ -0,0 +1,45 @@ +From ab442dfc820b6ebdbb1c135e6fad66130d44e5a8 Mon Sep 17 00:00:00 2001 +From: Andrew Honig <ahonig@google.com> +Date: Wed, 10 Jan 2018 10:12:03 -0800 +Subject: [PATCH 03/33] KVM: x86: Add memory barrier on vmcs field lookup + +commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream. + +This adds a memory barrier when performing a lookup into +the vmcs_field_to_offset_table. This is related to +CVE-2017-5753. + +Signed-off-by: Andrew Honig <ahonig@google.com> +Reviewed-by: Jim Mattson <jmattson@google.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 91ae4e2..ee766c2 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -858,8 +858,16 @@ static inline short vmcs_field_to_offset(unsigned long field) + { + BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + +- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || +- vmcs_field_to_offset_table[field] == 0) ++ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) ++ return -ENOENT; ++ ++ /* ++ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a ++ * generic mechanism. ++ */ ++ asm("lfence"); ++ ++ if (vmcs_field_to_offset_table[field] == 0) + return -ENOENT; + + return vmcs_field_to_offset_table[field]; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-introduce-linear_-read-write-_system.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-introduce-linear_-read-write-_system.patch new file mode 100644 index 00000000..cb9af0b2 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-introduce-linear_-read-write-_system.patch @@ -0,0 +1,187 @@ +From 9dd58f6cbef90d8a962b6365db32391f4a6ac4f9 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Wed, 6 Jun 2018 16:43:02 +0200 +Subject: [PATCH 03/10] KVM: x86: introduce linear_{read,write}_system + +commit 79367a65743975e5cac8d24d08eccc7fdae832b0 upstream. + +Wrap the common invocation of ctxt->ops->read_std and ctxt->ops->write_std, so +as to have a smaller patch when the functions grow another argument. + +Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12) +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/emulate.c | 64 +++++++++++++++++++++++++------------------------- + 1 file changed, 32 insertions(+), 32 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 6faac71..b6ec3e9 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -802,6 +802,19 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) + return assign_eip_near(ctxt, ctxt->_eip + rel); + } + ++static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear, ++ void *data, unsigned size) ++{ ++ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); ++} ++ ++static int linear_write_system(struct x86_emulate_ctxt *ctxt, ++ ulong linear, void *data, ++ unsigned int size) ++{ ++ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); ++} ++ + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, +@@ -1500,8 +1513,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, + return emulate_gp(ctxt, index << 3 | 0x2); + + addr = dt.address + index * 8; +- return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, +- &ctxt->exception); ++ return linear_read_system(ctxt, addr, desc, sizeof *desc); + } + + static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, +@@ -1564,8 +1576,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, + if (rc != X86EMUL_CONTINUE) + return rc; + +- return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc), +- &ctxt->exception); ++ return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc)); + } + + /* allowed just for 8 bytes segments */ +@@ -1579,8 +1590,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, + if (rc != X86EMUL_CONTINUE) + return rc; + +- return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, +- &ctxt->exception); ++ return linear_write_system(ctxt, addr, desc, sizeof *desc); + } + + static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, +@@ -1741,8 +1751,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, + return ret; + } + } else if (ctxt->mode == X86EMUL_MODE_PROT64) { +- ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, +- sizeof(base3), &ctxt->exception); ++ ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3)); + if (ret != X86EMUL_CONTINUE) + return ret; + if (is_noncanonical_address(get_desc_base(&seg_desc) | +@@ -2055,11 +2064,11 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) + eip_addr = dt.address + (irq << 2); + cs_addr = dt.address + (irq << 2) + 2; + +- rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); ++ rc = linear_read_system(ctxt, cs_addr, &cs, 2); + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); ++ rc = linear_read_system(ctxt, eip_addr, &eip, 2); + if (rc != X86EMUL_CONTINUE) + return rc; + +@@ -3018,35 +3027,30 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, + u16 tss_selector, u16 old_tss_sel, + ulong old_tss_base, struct desc_struct *new_desc) + { +- const struct x86_emulate_ops *ops = ctxt->ops; + struct tss_segment_16 tss_seg; + int ret; + u32 new_tss_base = get_desc_base(new_desc); + +- ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, +- &ctxt->exception); ++ ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); + if (ret != X86EMUL_CONTINUE) + return ret; + + save_state_to_tss16(ctxt, &tss_seg); + +- ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, +- &ctxt->exception); ++ ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); + if (ret != X86EMUL_CONTINUE) + return ret; + +- ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, +- &ctxt->exception); ++ ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); + if (ret != X86EMUL_CONTINUE) + return ret; + + if (old_tss_sel != 0xffff) { + tss_seg.prev_task_link = old_tss_sel; + +- ret = ops->write_std(ctxt, new_tss_base, +- &tss_seg.prev_task_link, +- sizeof tss_seg.prev_task_link, +- &ctxt->exception); ++ ret = linear_write_system(ctxt, new_tss_base, ++ &tss_seg.prev_task_link, ++ sizeof tss_seg.prev_task_link); + if (ret != X86EMUL_CONTINUE) + return ret; + } +@@ -3162,38 +3166,34 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, + u16 tss_selector, u16 old_tss_sel, + ulong old_tss_base, struct desc_struct *new_desc) + { +- const struct x86_emulate_ops *ops = ctxt->ops; + struct tss_segment_32 tss_seg; + int ret; + u32 new_tss_base = get_desc_base(new_desc); + u32 eip_offset = offsetof(struct tss_segment_32, eip); + u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector); + +- ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, +- &ctxt->exception); ++ ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); + if (ret != X86EMUL_CONTINUE) + return ret; + + save_state_to_tss32(ctxt, &tss_seg); + + /* Only GP registers and segment selectors are saved */ +- ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip, +- ldt_sel_offset - eip_offset, &ctxt->exception); ++ ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip, ++ ldt_sel_offset - eip_offset); + if (ret != X86EMUL_CONTINUE) + return ret; + +- ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, +- &ctxt->exception); ++ ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); + if (ret != X86EMUL_CONTINUE) + return ret; + + if (old_tss_sel != 0xffff) { + tss_seg.prev_task_link = old_tss_sel; + +- ret = ops->write_std(ctxt, new_tss_base, +- &tss_seg.prev_task_link, +- sizeof tss_seg.prev_task_link, +- &ctxt->exception); ++ ret = linear_write_system(ctxt, new_tss_base, ++ &tss_seg.prev_task_link, ++ sizeof tss_seg.prev_task_link); + if (ret != X86EMUL_CONTINUE) + return ret; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch new file mode 100644 index 00000000..d1b9f3df --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch @@ -0,0 +1,54 @@ +From de05b6da8c54ed0aa2158ad3112ac582c88f0676 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Sun, 26 Mar 2017 23:51:24 +0200 +Subject: [PATCH 03/93] kvm/svm: Setup MCG_CAP on AMD properly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 74f169090b6f36b867c9df0454366dd9af6f62d1 ] + +MCG_CAP[63:9] bits are reserved on AMD. However, on an AMD guest, this +MSR returns 0x100010a. More specifically, bit 24 is set, which is simply +wrong. That bit is MCG_SER_P and is present only on Intel. Thus, clean +up the reserved bits in order not to confuse guests. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Joerg Roedel <joro@8bytes.org> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/svm.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index b82bb66..2d96e30 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -5437,6 +5437,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) + avic_handle_ldr_update(vcpu); + } + ++static void svm_setup_mce(struct kvm_vcpu *vcpu) ++{ ++ /* [63:9] are reserved. */ ++ vcpu->arch.mcg_cap &= 0x1ff; ++} ++ + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { + .cpu_has_kvm_support = has_svm, + .disabled_by_bios = is_disabled, +@@ -5552,6 +5558,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { + .pmu_ops = &amd_pmu_ops, + .deliver_posted_interrupt = svm_deliver_avic_intr, + .update_pi_irte = svm_update_pi_irte, ++ .setup_mce = svm_setup_mce, + }; + + static int __init svm_init(void) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch new file mode 100644 index 00000000..dd1f4c29 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch @@ -0,0 +1,48 @@ +From ce7bea11dfe01825a2ced79b5bcc04b7e781e63b Mon Sep 17 00:00:00 2001 +From: Liran Alon <liran.alon@oracle.com> +Date: Sun, 5 Nov 2017 16:56:33 +0200 +Subject: [PATCH 04/33] KVM: x86: emulator: Return to user-mode on L1 CPL=0 + emulation failure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 1f4dcb3b213235e642088709a1c54964d23365e9 ] + +On this case, handle_emulation_failure() fills kvm_run with +internal-error information which it expects to be delivered +to user-mode for further processing. +However, the code reports a wrong return-value which makes KVM to never +return to user-mode on this scenario. + +Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to +userspace") + +Signed-off-by: Liran Alon <liran.alon@oracle.com> +Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 9cc9117..abbb37a 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5265,7 +5265,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; +- r = EMULATE_FAIL; ++ r = EMULATE_USER_EXIT; + } + kvm_queue_exception(vcpu, UD_VECTOR); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch new file mode 100644 index 00000000..b1c3c02d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch @@ -0,0 +1,200 @@ +From 1ea42745a9e721d08413cd0c6728934da385010b Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Wed, 6 Jun 2018 17:37:49 +0200 +Subject: [PATCH 04/10] KVM: x86: pass kvm_vcpu to kvm_read_guest_virt and + kvm_write_guest_virt_system + +commit ce14e868a54edeb2e30cb7a7b104a2fc4b9d76ca upstream. + +Int the next patch the emulator's .read_std and .write_std callbacks will +grow another argument, which is not needed in kvm_read_guest_virt and +kvm_write_guest_virt_system's callers. Since we have to make separate +functions, let's give the currently existing names a nicer interface, too. + +Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12) +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 23 ++++++++++------------- + arch/x86/kvm/x86.c | 39 ++++++++++++++++++++++++++------------- + arch/x86/kvm/x86.h | 4 ++-- + 3 files changed, 38 insertions(+), 28 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index d39062c..a81463d 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -6906,8 +6906,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, + vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) + return 1; + +- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, +- sizeof(vmptr), &e)) { ++ if (kvm_read_guest_virt(vcpu, gva, &vmptr, sizeof(vmptr), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } +@@ -7455,8 +7454,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu) + vmx_instruction_info, true, &gva)) + return 1; + /* _system ok, as nested_vmx_check_permission verified cpl=0 */ +- kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva, +- &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL); ++ kvm_write_guest_virt_system(vcpu, gva, &field_value, ++ (is_long_mode(vcpu) ? 8 : 4), NULL); + } + + nested_vmx_succeed(vcpu); +@@ -7491,8 +7490,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) + if (get_vmx_mem_address(vcpu, exit_qualification, + vmx_instruction_info, false, &gva)) + return 1; +- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, +- &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { ++ if (kvm_read_guest_virt(vcpu, gva, &field_value, ++ (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } +@@ -7589,9 +7588,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) + vmx_instruction_info, true, &vmcs_gva)) + return 1; + /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */ +- if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva, +- (void *)&to_vmx(vcpu)->nested.current_vmptr, +- sizeof(u64), &e)) { ++ if (kvm_write_guest_virt_system(vcpu, vmcs_gva, ++ (void *)&to_vmx(vcpu)->nested.current_vmptr, ++ sizeof(u64), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } +@@ -7645,8 +7644,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + vmx_instruction_info, false, &gva)) + return 1; +- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, +- sizeof(operand), &e)) { ++ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } +@@ -7709,8 +7707,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + vmx_instruction_info, false, &gva)) + return 1; +- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, +- sizeof(u32), &e)) { ++ if (kvm_read_guest_virt(vcpu, gva, &vpid, sizeof(u32), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index d7974fc..af8e120 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4370,11 +4370,10 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, + return X86EMUL_CONTINUE; + } + +-int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, ++int kvm_read_guest_virt(struct kvm_vcpu *vcpu, + gva_t addr, void *val, unsigned int bytes, + struct x86_exception *exception) + { +- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, +@@ -4382,9 +4381,9 @@ int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, + } + EXPORT_SYMBOL_GPL(kvm_read_guest_virt); + +-static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, +- gva_t addr, void *val, unsigned int bytes, +- struct x86_exception *exception) ++static int emulator_read_std(struct x86_emulate_ctxt *ctxt, ++ gva_t addr, void *val, unsigned int bytes, ++ struct x86_exception *exception) + { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); +@@ -4399,18 +4398,16 @@ static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, + return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; + } + +-int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, +- gva_t addr, void *val, +- unsigned int bytes, +- struct x86_exception *exception) ++static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, ++ struct kvm_vcpu *vcpu, u32 access, ++ struct x86_exception *exception) + { +- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + void *data = val; + int r = X86EMUL_CONTINUE; + + while (bytes) { + gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, +- PFERR_WRITE_MASK, ++ access, + exception); + unsigned offset = addr & (PAGE_SIZE-1); + unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); +@@ -4431,6 +4428,22 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, + out: + return r; + } ++ ++static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, ++ unsigned int bytes, struct x86_exception *exception) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ ++ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, ++ PFERR_WRITE_MASK, exception); ++} ++ ++int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, ++ unsigned int bytes, struct x86_exception *exception) ++{ ++ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, ++ PFERR_WRITE_MASK, exception); ++} + EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); + + static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, +@@ -5137,8 +5150,8 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) + static const struct x86_emulate_ops emulate_ops = { + .read_gpr = emulator_read_gpr, + .write_gpr = emulator_write_gpr, +- .read_std = kvm_read_guest_virt_system, +- .write_std = kvm_write_guest_virt_system, ++ .read_std = emulator_read_std, ++ .write_std = emulator_write_std, + .read_phys = kvm_read_guest_phys_system, + .fetch = kvm_fetch_guest_virt, + .read_emulated = emulator_read_emulated, +diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h +index e8ff3e4..2133a18 100644 +--- a/arch/x86/kvm/x86.h ++++ b/arch/x86/kvm/x86.h +@@ -161,11 +161,11 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); + u64 get_kvmclock_ns(struct kvm *kvm); + +-int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, ++int kvm_read_guest_virt(struct kvm_vcpu *vcpu, + gva_t addr, void *val, unsigned int bytes, + struct x86_exception *exception); + +-int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, ++int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, + gva_t addr, void *val, unsigned int bytes, + struct x86_exception *exception); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch new file mode 100644 index 00000000..3d7259ab --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch @@ -0,0 +1,71 @@ +From 230ca3c5a44c752650e6bac9a4fe0eefc5ff0758 Mon Sep 17 00:00:00 2001 +From: Jim Mattson <jmattson@google.com> +Date: Wed, 5 Apr 2017 09:14:40 -0700 +Subject: [PATCH 04/93] kvm: nVMX: Disallow userspace-injected exceptions in + guest mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 28d06353881939703c34d82a1465136af176c620 ] + +The userspace exception injection API and code path are entirely +unprepared for exceptions that might cause a VM-exit from L2 to L1, so +the best course of action may be to simply disallow this for now. + +1. The API provides no mechanism for userspace to specify the new DR6 +bits for a #DB exception or the new CR2 value for a #PF +exception. Presumably, userspace is expected to modify these registers +directly with KVM_SET_SREGS before the next KVM_RUN ioctl. However, in +the event that L1 intercepts the exception, these registers should not +be changed. Instead, the new values should be provided in the +exit_qualification field of vmcs12 (Intel SDM vol 3, section 27.1). + +2. In the case of a userspace-injected #DB, inject_pending_event() +clears DR7.GD before calling vmx_queue_exception(). However, in the +event that L1 intercepts the exception, this is too early, because +DR7.GD should not be modified by a #DB that causes a VM-exit directly +(Intel SDM vol 3, section 27.1). + +3. If the injected exception is a #PF, nested_vmx_check_exception() +doesn't properly check whether or not L1 is interested in the +associated error code (using the #PF error code mask and match fields +from vmcs12). It may either return 0 when it should call +nested_vmx_vmexit() or vice versa. + +4. nested_vmx_check_exception() assumes that it is dealing with a +hardware-generated exception intercept from L2, with some of the +relevant details (the VM-exit interruption-information and the exit +qualification) live in vmcs02. For userspace-injected exceptions, this +is not the case. + +5. prepare_vmcs12() assumes that when its exit_intr_info argument +specifies valid information with a valid error code that it can VMREAD +the VM-exit interruption error code from vmcs02. For +userspace-injected exceptions, this is not the case. + +Signed-off-by: Jim Mattson <jmattson@google.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/x86.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 9f0f7e2..b27b93d 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3056,7 +3056,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, + return -EINVAL; + + if (events->exception.injected && +- (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) ++ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || ++ is_guest_mode(vcpu))) + return -EINVAL; + + process_nmi(vcpu); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch new file mode 100644 index 00000000..49770e88 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch @@ -0,0 +1,63 @@ +From 585df9100649b5038250e1c33cf8af019a77844c Mon Sep 17 00:00:00 2001 +From: Liran Alon <liran.alon@oracle.com> +Date: Sun, 5 Nov 2017 16:56:34 +0200 +Subject: [PATCH 05/33] KVM: x86: Don't re-execute instruction when not passing + CR2 value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 9b8ae63798cb97e785a667ff27e43fa6220cb734 ] + +In case of instruction-decode failure or emulation failure, +x86_emulate_instruction() will call reexecute_instruction() which will +attempt to use the cr2 value passed to x86_emulate_instruction(). +However, when x86_emulate_instruction() is called from +emulate_instruction(), cr2 is not passed (passed as 0) and therefore +it doesn't make sense to execute reexecute_instruction() logic at all. + +Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") + +Signed-off-by: Liran Alon <liran.alon@oracle.com> +Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kvm_host.h | 3 ++- + arch/x86/kvm/vmx.c | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index bdde807..6f6ee68 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1113,7 +1113,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + static inline int emulate_instruction(struct kvm_vcpu *vcpu, + int emulation_type) + { +- return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); ++ return x86_emulate_instruction(vcpu, 0, ++ emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); + } + + void kvm_enable_efer_bits(u64); +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index ee766c2..8e5001d 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -6232,7 +6232,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) + if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) + return 1; + +- err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); ++ err = emulate_instruction(vcpu, 0); + + if (err == EMULATE_USER_EXIT) { + ++vcpu->stat.mmio_exits; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch new file mode 100644 index 00000000..5cff1af9 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch @@ -0,0 +1,156 @@ +From 45e0a2316524254692219fce805e247dc8dadb20 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Wed, 6 Jun 2018 17:38:09 +0200 +Subject: [PATCH 05/10] kvm: x86: use correct privilege level for + sgdt/sidt/fxsave/fxrstor access + +commit 3c9fa24ca7c9c47605672916491f79e8ccacb9e6 upstream. + +The functions that were used in the emulation of fxrstor, fxsave, sgdt and +sidt were originally meant for task switching, and as such they did not +check privilege levels. This is very bad when the same functions are used +in the emulation of unprivileged instructions. This is CVE-2018-10853. + +The obvious fix is to add a new argument to ops->read_std and ops->write_std, +which decides whether the access is a "system" access or should use the +processor's CPL. + +Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12) +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kvm_emulate.h | 6 ++++-- + arch/x86/kvm/emulate.c | 12 ++++++------ + arch/x86/kvm/x86.c | 18 ++++++++++++++---- + 3 files changed, 24 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h +index e9cd7be..0b7d332 100644 +--- a/arch/x86/include/asm/kvm_emulate.h ++++ b/arch/x86/include/asm/kvm_emulate.h +@@ -105,11 +105,12 @@ struct x86_emulate_ops { + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. ++ * @system:[IN ] Whether the access is forced to be at CPL0. + */ + int (*read_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, + unsigned int bytes, +- struct x86_exception *fault); ++ struct x86_exception *fault, bool system); + + /* + * read_phys: Read bytes of standard (non-emulated/special) memory. +@@ -127,10 +128,11 @@ struct x86_emulate_ops { + * @addr: [IN ] Linear address to which to write. + * @val: [OUT] Value write to memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to write to memory. ++ * @system:[IN ] Whether the access is forced to be at CPL0. + */ + int (*write_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, +- struct x86_exception *fault); ++ struct x86_exception *fault, bool system); + /* + * fetch: Read bytes of standard (non-emulated/special) memory. + * Used for instruction fetch. +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index b6ec3e9..1e96a5a 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -805,14 +805,14 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) + static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear, + void *data, unsigned size) + { +- return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); ++ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true); + } + + static int linear_write_system(struct x86_emulate_ctxt *ctxt, + ulong linear, void *data, + unsigned int size) + { +- return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); ++ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true); + } + + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, +@@ -826,7 +826,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, + rc = linearize(ctxt, addr, size, false, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; +- return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); ++ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false); + } + + static int segmented_write_std(struct x86_emulate_ctxt *ctxt, +@@ -840,7 +840,7 @@ static int segmented_write_std(struct x86_emulate_ctxt *ctxt, + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; +- return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); ++ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false); + } + + /* +@@ -2893,12 +2893,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, + #ifdef CONFIG_X86_64 + base |= ((u64)base3) << 32; + #endif +- r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); ++ r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true); + if (r != X86EMUL_CONTINUE) + return false; + if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) + return false; +- r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); ++ r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true); + if (r != X86EMUL_CONTINUE) + return false; + if ((perm >> bit_idx) & mask) +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index af8e120..2c4d91e 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4383,10 +4383,15 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_virt); + + static int emulator_read_std(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, +- struct x86_exception *exception) ++ struct x86_exception *exception, bool system) + { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); +- return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); ++ u32 access = 0; ++ ++ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3) ++ access |= PFERR_USER_MASK; ++ ++ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); + } + + static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, +@@ -4430,12 +4435,17 @@ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes + } + + static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, +- unsigned int bytes, struct x86_exception *exception) ++ unsigned int bytes, struct x86_exception *exception, ++ bool system) + { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ u32 access = PFERR_WRITE_MASK; ++ ++ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3) ++ access |= PFERR_USER_MASK; + + return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, +- PFERR_WRITE_MASK, exception); ++ access, exception); + } + + int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch new file mode 100644 index 00000000..1e33e521 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch @@ -0,0 +1,39 @@ +From a3032e35007a8178f448e471acb6bc6c972c087a Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> +Date: Mon, 5 Mar 2018 19:25:51 +0300 +Subject: [PATCH 05/93] x86/cpufeatures: Add Intel PCONFIG cpufeature + +commit 7958b2246fadf54b7ff820a2a5a2c5ca1554716f upstream. + +CPUID.0x7.0x0:EDX[18] indicates whether Intel CPU support PCONFIG instruction. + +Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Kai Huang <kai.huang@linux.intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/20180305162610.37510-4-kirill.shutemov@linux.intel.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index ed7a1d2..a248531 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -302,6 +302,7 @@ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ + #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ ++#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch new file mode 100644 index 00000000..9430b597 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch @@ -0,0 +1,67 @@ +From 399e9dee4411858aa4eb8894f031ff68ab3b5e9f Mon Sep 17 00:00:00 2001 +From: Wanpeng Li <wanpeng.li@hotmail.com> +Date: Sun, 5 Nov 2017 16:54:47 -0800 +Subject: [PATCH 06/33] KVM: X86: Fix operand/address-size during instruction + decoding +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 3853be2603191829b442b64dac6ae8ba0c027bf9 ] + +Pedro reported: + During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" + instruction under KVM produces different results on both memory and the SP + register depending on whether EPT support is enabled. With EPT the SP is + reduced by 4 bytes (and the written value is 0-padded) but without EPT support + it is only reduced by 2 bytes. The difference can be observed when the CS.DB + field is 1 (32-bit) but not when it's 0 (16-bit). + +The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D +also should be respected instead of just default operand/address-size/66H +prefix/67H prefix during instruction decoding. This patch fixes it by also +adjusting operand/address-size according to CS.D. + +Reported-by: Pedro Fonseca <pfonseca@cs.washington.edu> +Tested-by: Pedro Fonseca <pfonseca@cs.washington.edu> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: Nadav Amit <nadav.amit@gmail.com> +Cc: Pedro Fonseca <pfonseca@cs.washington.edu> +Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> +Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/emulate.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 9f676ad..9984daf 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -4971,6 +4971,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) + bool op_prefix = false; + bool has_seg_override = false; + struct opcode opcode; ++ u16 dummy; ++ struct desc_struct desc; + + ctxt->memop.type = OP_NONE; + ctxt->memopp = NULL; +@@ -4989,6 +4991,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) + switch (mode) { + case X86EMUL_MODE_REAL: + case X86EMUL_MODE_VM86: ++ def_op_bytes = def_ad_bytes = 2; ++ ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); ++ if (desc.d) ++ def_op_bytes = def_ad_bytes = 4; ++ break; + case X86EMUL_MODE_PROT16: + def_op_bytes = def_ad_bytes = 2; + break; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch new file mode 100644 index 00000000..a8632983 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch @@ -0,0 +1,84 @@ +From d98751217028054a791c98512d1ed81d406f55da Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Thu, 7 Jun 2018 09:13:48 -0700 +Subject: [PATCH 06/10] x86/spectre_v1: Disable compiler optimizations over + array_index_mask_nospec() + +commit eab6870fee877258122a042bfd99ee7908c40280 upstream. + +Mark Rutland noticed that GCC optimization passes have the potential to elide +necessary invocations of the array_index_mask_nospec() instruction sequence, +so mark the asm() volatile. + +Mark explains: + +"The volatile will inhibit *some* cases where the compiler could lift the + array_index_nospec() call out of a branch, e.g. where there are multiple + invocations of array_index_nospec() with the same arguments: + + if (idx < foo) { + idx1 = array_idx_nospec(idx, foo) + do_something(idx1); + } + + < some other code > + + if (idx < foo) { + idx2 = array_idx_nospec(idx, foo); + do_something_else(idx2); + } + + ... since the compiler can determine that the two invocations yield the same + result, and reuse the first result (likely the same register as idx was in + originally) for the second branch, effectively re-writing the above as: + + if (idx < foo) { + idx = array_idx_nospec(idx, foo); + do_something(idx); + } + + < some other code > + + if (idx < foo) { + do_something_else(idx); + } + + ... if we don't take the first branch, then speculatively take the second, we + lose the nospec protection. + + There's more info on volatile asm in the GCC docs: + + https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile + " + +Reported-by: Mark Rutland <mark.rutland@arm.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Acked-by: Mark Rutland <mark.rutland@arm.com> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Linus Torvalds <torvalds@linux-foundation.org> +Cc: <stable@vger.kernel.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Fixes: babdde2698d4 ("x86: Implement array_index_mask_nospec") +Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/barrier.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h +index 78d1c6a..eb53c2c 100644 +--- a/arch/x86/include/asm/barrier.h ++++ b/arch/x86/include/asm/barrier.h +@@ -37,7 +37,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + { + unsigned long mask; + +- asm ("cmp %1,%2; sbb %0,%0;" ++ asm volatile ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"g"(size),"r" (index) + :"cc"); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch new file mode 100644 index 00000000..ecb1cdd3 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch @@ -0,0 +1,57 @@ +From b4f699a49be9bbfa6bb5408e7f54c89b9bdc8919 Mon Sep 17 00:00:00 2001 +From: Andy Whitcroft <apw@canonical.com> +Date: Wed, 14 Mar 2018 11:24:27 +0000 +Subject: [PATCH 06/93] x86/speculation, objtool: Annotate indirect calls/jumps + for objtool on 32-bit kernels + +commit a14bff131108faf50cc0cf864589fd71ee216c96 upstream. + +In the following commit: + + 9e0e3c5130e9 ("x86/speculation, objtool: Annotate indirect calls/jumps for objtool") + +... we added annotations for CALL_NOSPEC/JMP_NOSPEC on 64-bit x86 kernels, +but we did not annotate the 32-bit path. + +Annotate it similarly. + +Signed-off-by: Andy Whitcroft <apw@canonical.com> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20180314112427.22351-1-apw@canonical.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index d0dabea..f928ad9 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -183,7 +183,10 @@ + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +-# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ ++# define CALL_NOSPEC \ ++ ALTERNATIVE( \ ++ ANNOTATE_RETPOLINE_SAFE \ ++ "call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch new file mode 100644 index 00000000..2ca432cf --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch @@ -0,0 +1,72 @@ +From 34cbfb000e9bd72eb48fb3d1e61be034053f743f Mon Sep 17 00:00:00 2001 +From: Nikita Leshenko <nikita.leshchenko@oracle.com> +Date: Sun, 5 Nov 2017 15:52:29 +0200 +Subject: [PATCH 07/33] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC + reconfigure race +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 ] + +KVM uses ioapic_handled_vectors to track vectors that need to notify the +IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an +interrupt with old configuration is pending or running and +ioapic_handled_vectors only remembers the newest configuration; +thus EOI from the old interrupt is not delievered to the IOAPIC. + +A previous commit db2bdcbbbd32 +("KVM: x86: fix edge EOI and IOAPIC reconfig race") +addressed this issue by adding pending edge-triggered interrupts to +ioapic_handled_vectors, fixing this race for edge-triggered interrupts. +The commit explicitly ignored level-triggered interrupts, +but this race applies to them as well: + +1) IOAPIC sends a level triggered interrupt vector to VCPU0 +2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC + to route the vector to VCPU1. The reconfiguration rewrites only the + upper 32 bits of the IOREDTBLn register. (Causes KVM to update + ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) +3) VCPU0 sends EOI for the vector, but it's not delievered to the + IOAPIC because the ioapic_handled_vectors doesn't include the vector. +4) New interrupts are not delievered to VCPU1 because remote_irr bit + is set forever. + +Therefore, the correct behavior is to add all pending and running +interrupts to ioapic_handled_vectors. + +This commit introduces a slight performance hit similar to +commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") +for the rare case that the vector is reused by a non-IOAPIC source on +VCPU0. We prefer to keep solution simple and not handle this case just +as the original commit does. + +Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") + +Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com> +Reviewed-by: Liran Alon <liran.alon@oracle.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/ioapic.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c +index 6e219e5..a7ac868 100644 +--- a/arch/x86/kvm/ioapic.c ++++ b/arch/x86/kvm/ioapic.c +@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) + index == RTC_GSI) { + if (kvm_apic_match_dest(vcpu, NULL, 0, + e->fields.dest_id, e->fields.dest_mode) || +- (e->fields.trig_mode == IOAPIC_EDGE_TRIG && +- kvm_apic_pending_eoi(vcpu, e->fields.vector))) ++ kvm_apic_pending_eoi(vcpu, e->fields.vector)) + __set_bit(e->fields.vector, + ioapic_handled_vectors); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch new file mode 100644 index 00000000..3ddb8ece --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch @@ -0,0 +1,59 @@ +From f08520b8eba49e29d01f53ac8f2a52022e435744 Mon Sep 17 00:00:00 2001 +From: Tony Luck <tony.luck@intel.com> +Date: Fri, 25 May 2018 14:41:39 -0700 +Subject: [PATCH 07/10] x86/mce: Improve error message when kernel cannot + recover + +commit c7d606f560e4c698884697fef503e4abacdd8c25 upstream. + +Since we added support to add recovery from some errors inside the kernel in: + +commit b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries") + +we have done a less than stellar job at reporting the cause of recoverable +machine checks that occur in other parts of the kernel. The user just gets +the unhelpful message: + + mce: [Hardware Error]: Machine check: Action required: unknown MCACOD + +doubly unhelpful when they check the manual for the reported IA32_MSR_STATUS.MCACOD +and see that it is listed as one of the standard recoverable values. + +Add an extra rule to the MCE severity table to catch this case and report it +as: + + mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel + +Fixes: b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries") +Signed-off-by: Tony Luck <tony.luck@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com> +Cc: Ashok Raj <ashok.raj@intel.com> +Cc: stable@vger.kernel.org # 4.6+ +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Borislav Petkov <bp@suse.de> +Link: https://lkml.kernel.org/r/4cc7c465150a9a48b8b9f45d0b840278e77eb9b5.1527283897.git.tony.luck@intel.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/mcheck/mce-severity.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c +index c7efbcf..17dbbdbb 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c ++++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c +@@ -143,6 +143,11 @@ static struct severity { + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), + USER + ), ++ MCESEV( ++ PANIC, "Data load in unrecoverable area of kernel", ++ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), ++ KERNEL ++ ), + #endif + MCESEV( + PANIC, "Action required: unknown MCACOD", +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch new file mode 100644 index 00000000..4da48ef5 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch @@ -0,0 +1,48 @@ +From 5516ae4d16ab0ce922de31fec20d5d5e198aa258 Mon Sep 17 00:00:00 2001 +From: Alexander Sergeyev <sergeev917@gmail.com> +Date: Tue, 13 Mar 2018 22:38:56 +0300 +Subject: [PATCH 07/93] x86/speculation: Remove Skylake C2 from Speculation + Control microcode blacklist + +commit e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 upstream. + +In accordance with Intel's microcode revision guidance from March 6 MCU +rev 0xc2 is cleared on both Skylake H/S and Skylake Xeon E3 processors +that share CPUID 506E3. + +Signed-off-by: Alexander Sergeyev <sergeev917@gmail.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Jia Zhang <qianyue.zj@alibaba-inc.com> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Kyle Huey <me@kylehuey.com> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Link: https://lkml.kernel.org/r/20180313193856.GA8580@localhost.localdomain +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/intel.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 7680425..8fb1d65 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -64,7 +64,7 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) + /* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; +- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf ++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release +@@ -82,7 +82,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, +- { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch new file mode 100644 index 00000000..6e097d05 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch @@ -0,0 +1,64 @@ +From aca211b549c07b81295e817e663a61a1ae1fd659 Mon Sep 17 00:00:00 2001 +From: Nikita Leshenko <nikita.leshchenko@oracle.com> +Date: Sun, 5 Nov 2017 15:52:32 +0200 +Subject: [PATCH 08/33] KVM: x86: ioapic: Clear Remote IRR when entry is + switched to edge-triggered +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit a8bfec2930525808c01f038825d1df3904638631 ] + +Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for +IOAPICs without an EOI register. They simulate the EOI message manually +by changing the trigger mode to edge and then back to level, with the +entry being masked during this. + +QEMU implements this feature in commit ed1263c363c9 +("ioapic: clear remote irr bit for edge-triggered interrupts") + +As a side effect, this commit removes an incorrect behavior where Remote +IRR was cleared when the redirection table entry was rewritten. This is not +consistent with the manual and also opens an opportunity for a strange +behavior when a redirection table entry is modified from an interrupt +handler that handles the same entry: The modification will clear the +Remote IRR bit even though the interrupt handler is still running. + +Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com> +Reviewed-by: Liran Alon <liran.alon@oracle.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> +Reviewed-by: Steve Rutherford <srutherford@google.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/ioapic.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c +index a7ac868..4b573c8 100644 +--- a/arch/x86/kvm/ioapic.c ++++ b/arch/x86/kvm/ioapic.c +@@ -306,8 +306,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) + } else { + e->bits &= ~0xffffffffULL; + e->bits |= (u32) val; +- e->fields.remote_irr = 0; + } ++ ++ /* ++ * Some OSes (Linux, Xen) assume that Remote IRR bit will ++ * be cleared by IOAPIC hardware when the entry is configured ++ * as edge-triggered. This behavior is used to simulate an ++ * explicit EOI on IOAPICs that don't have the EOI register. ++ */ ++ if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) ++ e->fields.remote_irr = 0; ++ + mask_after = e->fields.mask; + if (mask_before != mask_after) + kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch new file mode 100644 index 00000000..d8206d02 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch @@ -0,0 +1,60 @@ +From ed22188fb6b2b43b2af7b1f6714d3befb6fe7965 Mon Sep 17 00:00:00 2001 +From: Tony Luck <tony.luck@intel.com> +Date: Fri, 25 May 2018 14:42:09 -0700 +Subject: [PATCH 08/10] x86/mce: Check for alternate indication of machine + check recovery on Skylake + +commit 4c5717da1d021cf368eabb3cb1adcaead56c0d1e upstream. + +Currently we just check the "CAPID0" register to see whether the CPU +can recover from machine checks. + +But there are also some special SKUs which do not have all advanced +RAS features, but do enable machine check recovery for use with NVDIMMs. + +Add a check for any of bits {8:5} in the "CAPID5" register (each +reports some NVDIMM mode available, if any of them are set, then +the system supports memory machine check recovery). + +Signed-off-by: Tony Luck <tony.luck@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com> +Cc: Ashok Raj <ashok.raj@intel.com> +Cc: stable@vger.kernel.org # 4.9 +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Borislav Petkov <bp@suse.de> +Link: https://lkml.kernel.org/r/03cbed6e99ddafb51c2eadf9a3b7c8d7a0cc204e.1527283897.git.tony.luck@intel.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/quirks.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c +index 0bee04d..b57100a 100644 +--- a/arch/x86/kernel/quirks.c ++++ b/arch/x86/kernel/quirks.c +@@ -643,12 +643,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) + /* Skylake */ + static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) + { +- u32 capid0; ++ u32 capid0, capid5; + + pci_read_config_dword(pdev, 0x84, &capid0); ++ pci_read_config_dword(pdev, 0x98, &capid5); + +- if ((capid0 & 0xc0) == 0xc0) ++ /* ++ * CAPID0{7:6} indicate whether this is an advanced RAS SKU ++ * CAPID5{8:5} indicate that various NVDIMM usage modes are ++ * enabled, so memory machine check recovery is also enabled. ++ */ ++ if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) + static_branch_inc(&mcsafe_key); ++ + } + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch new file mode 100644 index 00000000..1b5231fc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch @@ -0,0 +1,62 @@ +From 7737fc421365d9f2fd328b19fdccf005092d4ec1 Mon Sep 17 00:00:00 2001 +From: Tiantian Feng <fengtiantian@huawei.com> +Date: Wed, 19 Apr 2017 18:18:39 +0200 +Subject: [PATCH 08/93] x86/reboot: Turn off KVM when halting a CPU + +[ Upstream commit fba4f472b33aa81ca1836f57d005455261e9126f ] + +A CPU in VMX root mode will ignore INIT signals and will fail to bring +up the APs after reboot. Therefore, on a panic we disable VMX on all +CPUs before rebooting or triggering kdump. + +Do this when halting the machine as well, in case a firmware-level reboot +does not perform a cold reset for all processors. Without doing this, +rebooting the host may hang. + +Signed-off-by: Tiantian Feng <fengtiantian@huawei.com> +Signed-off-by: Xishi Qiu <qiuxishi@huawei.com> +[ Rewritten commit message. ] +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: kvm@vger.kernel.org +Link: http://lkml.kernel.org/r/20170419161839.30550-1-pbonzini@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/smp.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c +index c00cb64..420f2dc 100644 +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -33,6 +33,7 @@ + #include <asm/mce.h> + #include <asm/trace/irq_vectors.h> + #include <asm/kexec.h> ++#include <asm/virtext.h> + + /* + * Some notes on x86 processor bugs affecting SMP operation: +@@ -162,6 +163,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) + if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) + return NMI_HANDLED; + ++ cpu_emergency_vmxoff(); + stop_this_cpu(NULL); + + return NMI_HANDLED; +@@ -174,6 +176,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) + asmlinkage __visible void smp_reboot_interrupt(void) + { + ipi_entering_ack_irq(); ++ cpu_emergency_vmxoff(); + stop_this_cpu(NULL); + irq_exit(); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch new file mode 100644 index 00000000..071eccd3 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch @@ -0,0 +1,61 @@ +From a4337b660fe26046e81471186dc393ca77371b83 Mon Sep 17 00:00:00 2001 +From: Nikita Leshenko <nikita.leshchenko@oracle.com> +Date: Sun, 5 Nov 2017 15:52:33 +0200 +Subject: [PATCH 09/33] KVM: x86: ioapic: Preserve read-only values in the + redirection table +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit b200dded0a6974a3b69599832b2203483920ab25 ] + +According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are +read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb +("ioapic: keep RO bits for IOAPIC entry"). + +Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com> +Reviewed-by: Liran Alon <liran.alon@oracle.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> +Reviewed-by: Steve Rutherford <srutherford@google.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/ioapic.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c +index 4b573c8..5f810bb 100644 +--- a/arch/x86/kvm/ioapic.c ++++ b/arch/x86/kvm/ioapic.c +@@ -278,6 +278,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) + { + unsigned index; + bool mask_before, mask_after; ++ int old_remote_irr, old_delivery_status; + union kvm_ioapic_redirect_entry *e; + + switch (ioapic->ioregsel) { +@@ -300,6 +301,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) + return; + e = &ioapic->redirtbl[index]; + mask_before = e->fields.mask; ++ /* Preserve read-only fields */ ++ old_remote_irr = e->fields.remote_irr; ++ old_delivery_status = e->fields.delivery_status; + if (ioapic->ioregsel & 1) { + e->bits &= 0xffffffff; + e->bits |= (u64) val << 32; +@@ -307,6 +311,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) + e->bits &= ~0xffffffffULL; + e->bits |= (u32) val; + } ++ e->fields.remote_irr = old_remote_irr; ++ e->fields.delivery_status = old_delivery_status; + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch new file mode 100644 index 00000000..1e9973e7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch @@ -0,0 +1,79 @@ +From 29fa51519ae0978980c8fc154eba5b244ad7980f Mon Sep 17 00:00:00 2001 +From: Baoquan He <bhe@redhat.com> +Date: Thu, 27 Apr 2017 15:42:20 +0800 +Subject: [PATCH 09/93] x86/KASLR: Fix kexec kernel boot crash when KASLR + randomization fails + +[ Upstream commit da63b6b20077469bd6bd96e07991ce145fc4fbc4 ] + +Dave found that a kdump kernel with KASLR enabled will reset to the BIOS +immediately if physical randomization failed to find a new position for +the kernel. A kernel with the 'nokaslr' option works in this case. + +The reason is that KASLR will install a new page table for the identity +mapping, while it missed building it for the original kernel location +if KASLR physical randomization fails. + +This only happens in the kexec/kdump kernel, because the identity mapping +has been built for kexec/kdump in the 1st kernel for the whole memory by +calling init_pgtable(). Here if physical randomizaiton fails, it won't build +the identity mapping for the original area of the kernel but change to a +new page table '_pgtable'. Then the kernel will triple fault immediately +caused by no identity mappings. + +The normal kernel won't see this bug, because it comes here via startup_32() +and CR3 will be set to _pgtable already. In startup_32() the identity +mapping is built for the 0~4G area. In KASLR we just append to the existing +area instead of entirely overwriting it for on-demand identity mapping +building. So the identity mapping for the original area of kernel is still +there. + +To fix it we just switch to the new identity mapping page table when physical +KASLR succeeds. Otherwise we keep the old page table unchanged just like +"nokaslr" does. + +Signed-off-by: Baoquan He <bhe@redhat.com> +Signed-off-by: Dave Young <dyoung@redhat.com> +Acked-by: Kees Cook <keescook@chromium.org> +Cc: Borislav Petkov <bp@suse.de> +Cc: Dave Jiang <dave.jiang@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Garnier <thgarnie@google.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Yinghai Lu <yinghai@kernel.org> +Link: http://lkml.kernel.org/r/1493278940-5885-1-git-send-email-bhe@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/boot/compressed/kaslr.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c +index a66854d..af42b4d 100644 +--- a/arch/x86/boot/compressed/kaslr.c ++++ b/arch/x86/boot/compressed/kaslr.c +@@ -463,10 +463,17 @@ void choose_random_location(unsigned long input, + add_identity_map(random_addr, output_size); + *output = random_addr; + } ++ ++ /* ++ * This loads the identity mapping page table. ++ * This should only be done if a new physical address ++ * is found for the kernel, otherwise we should keep ++ * the old page table to make it be like the "nokaslr" ++ * case. ++ */ ++ finalize_identity_maps(); + } + +- /* This actually loads the identity pagetable on x86_64. */ +- finalize_identity_maps(); + + /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ + if (IS_ENABLED(CONFIG_X86_64)) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch new file mode 100644 index 00000000..76fa3b70 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch @@ -0,0 +1,103 @@ +From 1357825b6905bcf665161dc41b764a83b21954e9 Mon Sep 17 00:00:00 2001 +From: Tony Luck <tony.luck@intel.com> +Date: Fri, 22 Jun 2018 11:54:23 +0200 +Subject: [PATCH 09/10] x86/mce: Fix incorrect "Machine check from unknown + source" message + +commit 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 upstream. + +Some injection testing resulted in the following console log: + + mce: [Hardware Error]: CPU 22: Machine Check Exception: f Bank 1: bd80000000100134 + mce: [Hardware Error]: RIP 10:<ffffffffc05292dd> {pmem_do_bvec+0x11d/0x330 [nd_pmem]} + mce: [Hardware Error]: TSC c51a63035d52 ADDR 3234bc4000 MISC 88 + mce: [Hardware Error]: PROCESSOR 0:50654 TIME 1526502199 SOCKET 0 APIC 38 microcode 2000043 + mce: [Hardware Error]: Run the above through 'mcelog --ascii' + Kernel panic - not syncing: Machine check from unknown source + +This confused everybody because the first line quite clearly shows +that we found a logged error in "Bank 1", while the last line says +"unknown source". + +The problem is that the Linux code doesn't do the right thing +for a local machine check that results in a fatal error. + +It turns out that we know very early in the handler whether the +machine check is fatal. The call to mce_no_way_out() has checked +all the banks for the CPU that took the local machine check. If +it says we must crash, we can do so right away with the right +messages. + +We do scan all the banks again. This means that we might initially +not see a problem, but during the second scan find something fatal. +If this happens we print a slightly different message (so I can +see if it actually every happens). + +[ bp: Remove unneeded severity assignment. ] + +Signed-off-by: Tony Luck <tony.luck@intel.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Ashok Raj <ashok.raj@intel.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com> +Cc: linux-edac <linux-edac@vger.kernel.org> +Cc: stable@vger.kernel.org # 4.2 +Link: http://lkml.kernel.org/r/52e049a497e86fd0b71c529651def8871c804df0.1527283897.git.tony.luck@intel.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/mcheck/mce.c | 26 ++++++++++++++++++-------- + 1 file changed, 18 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c +index 72bcd08..4711e1c 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -1169,13 +1169,18 @@ void do_machine_check(struct pt_regs *regs, long error_code) + lmce = m.mcgstatus & MCG_STATUS_LMCES; + + /* ++ * Local machine check may already know that we have to panic. ++ * Broadcast machine check begins rendezvous in mce_start() + * Go through all banks in exclusion of the other CPUs. This way we + * don't report duplicated events on shared banks because the first one +- * to see it will clear it. If this is a Local MCE, then no need to +- * perform rendezvous. ++ * to see it will clear it. + */ +- if (!lmce) ++ if (lmce) { ++ if (no_way_out) ++ mce_panic("Fatal local machine check", &m, msg); ++ } else { + order = mce_start(&no_way_out); ++ } + + for (i = 0; i < cfg->banks; i++) { + __clear_bit(i, toclear); +@@ -1251,12 +1256,17 @@ void do_machine_check(struct pt_regs *regs, long error_code) + no_way_out = worst >= MCE_PANIC_SEVERITY; + } else { + /* +- * Local MCE skipped calling mce_reign() +- * If we found a fatal error, we need to panic here. ++ * If there was a fatal machine check we should have ++ * already called mce_panic earlier in this function. ++ * Since we re-read the banks, we might have found ++ * something new. Check again to see if we found a ++ * fatal error. We call "mce_severity()" again to ++ * make sure we have the right "msg". + */ +- if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) +- mce_panic("Machine check from unknown source", +- NULL, NULL); ++ if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { ++ mce_severity(&m, cfg->tolerant, &msg, true); ++ mce_panic("Local fatal machine check!", &m, msg); ++ } + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch new file mode 100644 index 00000000..7ab25b0b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch @@ -0,0 +1,103 @@ +From fc18f773d54edfedf8875473d8e69753265a3dfd Mon Sep 17 00:00:00 2001 +From: Wanpeng Li <wanpeng.li@hotmail.com> +Date: Mon, 20 Nov 2017 14:52:21 -0800 +Subject: [PATCH 10/33] KVM: VMX: Fix rflags cache during vCPU reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit c37c28730bb031cc8a44a130c2555c0f3efbe2d0 ] + +Reported by syzkaller: + + *** Guest State *** + CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 + CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 + CR3 = 0x000000002081e000 + RSP = 0x000000000000fffa RIP = 0x0000000000000000 + RFLAGS=0x00023000 DR7 = 0x00000000000000 + ^^^^^^^^^^ + ------------[ cut here ]------------ + WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] + CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 + RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] + RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 + Call Trace: + kvm_vcpu_ioctl+0x479/0x880 [kvm] + do_vfs_ioctl+0x142/0x9a0 + SyS_ioctl+0x74/0x80 + entry_SYSCALL_64_fastpath+0x23/0x9a + +The failed vmentry is triggered by the following beautified testcase: + + #include <unistd.h> + #include <sys/syscall.h> + #include <string.h> + #include <stdint.h> + #include <linux/kvm.h> + #include <fcntl.h> + #include <sys/ioctl.h> + + long r[5]; + int main() + { + struct kvm_debugregs dr = { 0 }; + + r[2] = open("/dev/kvm", O_RDONLY); + r[3] = ioctl(r[2], KVM_CREATE_VM, 0); + r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); + struct kvm_guest_debug debug = { + .control = 0xf0403, + .arch = { + .debugreg[6] = 0x2, + .debugreg[7] = 0x2 + } + }; + ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); + ioctl(r[4], KVM_RUN, 0); + } + +which testcase tries to setup the processor specific debug +registers and configure vCPU for handling guest debug events through +KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set +rflags in order to set TF bit if single step is needed. All regs' caches +are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU +reset. However, the cache of rflags is not reset during vCPU reset. The +function vmx_get_rflags() returns an unreset rflags cache value since +the cache is marked avail, it is 0 after boot. Vmentry fails if the +rflags reserved bit 1 is 0. + +This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and +its cache to 0x2 during vCPU reset. + +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Tested-by: Dmitry Vyukov <dvyukov@google.com> +Reviewed-by: David Hildenbrand <david@redhat.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: Nadav Amit <nadav.amit@gmail.com> +Cc: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 8e5001d..98f6545 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -5171,7 +5171,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + } + +- vmcs_writel(GUEST_RFLAGS, 0x02); ++ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); + kvm_rip_write(vcpu, 0xfff0); + + vmcs_writel(GUEST_GDTR_BASE, 0); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kvm-x86-fix-icebp-instruction-handling.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kvm-x86-fix-icebp-instruction-handling.patch new file mode 100644 index 00000000..aef1109b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-kvm-x86-fix-icebp-instruction-handling.patch @@ -0,0 +1,88 @@ +From 694ba89c4cb4e43ae4cb418ea46b1415f6d31ce7 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds <torvalds@linux-foundation.org> +Date: Tue, 20 Mar 2018 12:16:59 -0700 +Subject: [PATCH 10/93] kvm/x86: fix icebp instruction handling + +commit 32d43cd391bacb5f0814c2624399a5dad3501d09 upstream. + +The undocumented 'icebp' instruction (aka 'int1') works pretty much like +'int3' in the absense of in-circuit probing equipment (except, +obviously, that it raises #DB instead of raising #BP), and is used by +some validation test-suites as such. + +But Andy Lutomirski noticed that his test suite acted differently in kvm +than on bare hardware. + +The reason is that kvm used an inexact test for the icebp instruction: +it just assumed that an all-zero VM exit qualification value meant that +the VM exit was due to icebp. + +That is not unlike the guess that do_debug() does for the actual +exception handling case, but it's purely a heuristic, not an absolute +rule. do_debug() does it because it wants to ascribe _some_ reasons to +the #DB that happened, and an empty %dr6 value means that 'icebp' is the +most likely casue and we have no better information. + +But kvm can just do it right, because unlike the do_debug() case, kvm +actually sees the real reason for the #DB in the VM-exit interruption +information field. + +So instead of relying on an inexact heuristic, just use the actual VM +exit information that says "it was 'icebp'". + +Right now the 'icebp' instruction isn't technically documented by Intel, +but that will hopefully change. The special "privileged software +exception" information _is_ actually mentioned in the Intel SDM, even +though the cause of it isn't enumerated. + +Reported-by: Andy Lutomirski <luto@kernel.org> +Tested-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/vmx.h | 1 + + arch/x86/kvm/vmx.c | 9 ++++++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h +index 6899cf1..9cbfbef 100644 +--- a/arch/x86/include/asm/vmx.h ++++ b/arch/x86/include/asm/vmx.h +@@ -309,6 +309,7 @@ enum vmcs_field { + #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ + #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ + #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ ++#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ + #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ + + /* GUEST_INTERRUPTIBILITY_INFO flags. */ +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 3c3558b..27f505d 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -1053,6 +1053,13 @@ static inline bool is_machine_check(u32 intr_info) + (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); + } + ++/* Undocumented: icebp/int1 */ ++static inline bool is_icebp(u32 intr_info) ++{ ++ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) ++ == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); ++} ++ + static inline bool cpu_has_vmx_msr_bitmap(void) + { + return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; +@@ -5708,7 +5715,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= dr6 | DR6_RTM; +- if (!(dr6 & ~DR6_RESERVED)) /* icebp */ ++ if (is_icebp(intr_info)) + skip_emulated_instruction(vcpu); + + kvm_queue_exception(vcpu, DB_VECTOR); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch new file mode 100644 index 00000000..d00a4886 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch @@ -0,0 +1,81 @@ +From 754013b3067881c493df74f91ad34099c3a32c61 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Fri, 22 Jun 2018 11:54:28 +0200 +Subject: [PATCH 10/10] x86/mce: Do not overwrite MCi_STATUS in + mce_no_way_out() + +commit 1f74c8a64798e2c488f86efc97e308b85fb7d7aa upstream. + +mce_no_way_out() does a quick check during #MC to see whether some of +the MCEs logged would require the kernel to panic immediately. And it +passes a struct mce where MCi_STATUS gets written. + +However, after having saved a valid status value, the next iteration +of the loop which goes over the MCA banks on the CPU, overwrites the +valid status value because we're using struct mce as storage instead of +a temporary variable. + +Which leads to MCE records with an empty status value: + + mce: [Hardware Error]: CPU 0: Machine Check Exception: 6 Bank 0: 0000000000000000 + mce: [Hardware Error]: RIP 10:<ffffffffbd42fbd7> {trigger_mce+0x7/0x10} + +In order to prevent the loss of the status register value, return +immediately when severity is a panic one so that we can panic +immediately with the first fatal MCE logged. This is also the intention +of this function and not to noodle over the banks while a fatal MCE is +already logged. + +Tony: read the rest of the MCA bank to populate the struct mce fully. + +Suggested-by: Tony Luck <tony.luck@intel.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: <stable@vger.kernel.org> +Link: https://lkml.kernel.org/r/20180622095428.626-8-bp@alien8.de +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c +index 4711e1c..bf6013d 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -779,23 +779,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll); + static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, + struct pt_regs *regs) + { +- int i, ret = 0; + char *tmp; ++ int i; + + for (i = 0; i < mca_cfg.banks; i++) { + m->status = mce_rdmsrl(msr_ops.status(i)); +- if (m->status & MCI_STATUS_VAL) { +- __set_bit(i, validp); +- if (quirk_no_way_out) +- quirk_no_way_out(i, m, regs); +- } ++ if (!(m->status & MCI_STATUS_VAL)) ++ continue; ++ ++ __set_bit(i, validp); ++ if (quirk_no_way_out) ++ quirk_no_way_out(i, m, regs); + + if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { ++ mce_read_aux(m, i); + *msg = tmp; +- ret = 1; ++ return 1; + } + } +- return ret; ++ return 0; + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch new file mode 100644 index 00000000..4e1d906b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch @@ -0,0 +1,82 @@ +From adbb63b59bd2792df649335e7d3c28be2fbbe1c2 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 25 Jan 2018 10:58:13 +0100 +Subject: [PATCH 11/33] KVM: x86: Make indirect calls in emulator speculation + safe + +(cherry picked from commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab) + +Replace the indirect calls with CALL_NOSPEC. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Ashok Raj <ashok.raj@intel.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: Jun Nakajima <jun.nakajima@intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: rga@amazon.de +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Asit Mallick <asit.k.mallick@intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Jason Baron <jbaron@akamai.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org +[dwmw2: Use ASM_CALL_CONSTRAINT like upstream, now we have it] +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/emulate.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 9984daf..6faac71 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -25,6 +25,7 @@ + #include <asm/kvm_emulate.h> + #include <linux/stringify.h> + #include <asm/debugreg.h> ++#include <asm/nospec-branch.h> + + #include "x86.h" + #include "tss.h" +@@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) + void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); + + flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; +- asm("push %[flags]; popf; call *%[fastop]" +- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); ++ asm("push %[flags]; popf; " CALL_NOSPEC ++ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); + return rc; + } + +@@ -5287,15 +5288,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, + + static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) + { +- register void *__sp asm(_ASM_SP); + ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; + + if (!(ctxt->d & ByteOp)) + fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; + +- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" ++ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" + : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), +- [fastop]"+S"(fop), "+r"(__sp) ++ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT + : "c"(ctxt->src2.val)); + + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-bpf-x64-increase-number-of-passes.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-bpf-x64-increase-number-of-passes.patch new file mode 100644 index 00000000..bf2556b8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-bpf-x64-increase-number-of-passes.patch @@ -0,0 +1,56 @@ +From 1909a1513f6d5b9170e40c4fee98bf2cd57b5b55 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Wed, 7 Mar 2018 22:10:01 +0100 +Subject: [PATCH 11/93] bpf, x64: increase number of passes + +commit 6007b080d2e2adb7af22bf29165f0594ea12b34c upstream. + +In Cilium some of the main programs we run today are hitting 9 passes +on x64's JIT compiler, and we've had cases already where we surpassed +the limit where the JIT then punts the program to the interpreter +instead, leading to insertion failures due to CONFIG_BPF_JIT_ALWAYS_ON +or insertion failures due to the prog array owner being JITed but the +program to insert not (both must have the same JITed/non-JITed property). + +One concrete case the program image shrunk from 12,767 bytes down to +10,288 bytes where the image converged after 16 steps. I've measured +that this took 340us in the JIT until it converges on my i7-6600U. Thus, +increase the original limit we had from day one where the JIT covered +cBPF only back then before we run into the case (as similar with the +complexity limit) where we trip over this and hit program rejections. +Also add a cond_resched() into the compilation loop, the JIT process +runs without any locks and may sleep anyway. + +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Acked-by: Alexei Starovoitov <ast@kernel.org> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/net/bpf_jit_comp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 1f7ed2e..cd97645 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -1135,7 +1135,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + * may converge on the last pass. In such case do one more + * pass to emit the final image + */ +- for (pass = 0; pass < 10 || image; pass++) { ++ for (pass = 0; pass < 20 || image; pass++) { + proglen = do_jit(prog, addrs, image, oldproglen, &ctx); + if (proglen <= 0) { + image = NULL; +@@ -1162,6 +1162,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + } + } + oldproglen = proglen; ++ cond_resched(); + } + + if (bpf_jit_enable > 1) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch new file mode 100644 index 00000000..ba052d9e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch @@ -0,0 +1,60 @@ +From 9eee1ba493f5899d7c3793818db16deaf084df21 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 25 Jan 2018 10:58:14 +0100 +Subject: [PATCH 12/33] KVM: VMX: Make indirect call speculation safe + +(cherry picked from commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699) + +Replace indirect call with CALL_NOSPEC. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Ashok Raj <ashok.raj@intel.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: Jun Nakajima <jun.nakajima@intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: rga@amazon.de +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Asit Mallick <asit.k.mallick@intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Jason Baron <jbaron@akamai.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 98f6545..6f3ed0e 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -8659,14 +8659,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) + #endif + "pushf\n\t" + __ASM_SIZE(push) " $%c[cs]\n\t" +- "call *%[entry]\n\t" ++ CALL_NOSPEC + : + #ifdef CONFIG_X86_64 + [sp]"=&r"(tmp), + #endif + "+r"(__sp) + : +- [entry]"r"(entry), ++ THUNK_TARGET(entry), + [ss]"i"(__KERNEL_DS), + [cs]"i"(__KERNEL_CS) + ); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch new file mode 100644 index 00000000..bdb55fda --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch @@ -0,0 +1,75 @@ +From 280488ceca9427dd91e5ee449d90f8cf16d8e65c Mon Sep 17 00:00:00 2001 +From: Matthias Kaehlcke <mka@chromium.org> +Date: Mon, 1 May 2017 15:47:41 -0700 +Subject: [PATCH 12/93] x86/mm/kaslr: Use the _ASM_MUL macro for multiplication + to work around Clang incompatibility + +[ Upstream commit 121843eb02a6e2fa30aefab64bfe183c97230c75 ] + +The constraint "rm" allows the compiler to put mix_const into memory. +When the input operand is a memory location then MUL needs an operand +size suffix, since Clang can't infer the multiplication width from the +operand. + +Add and use the _ASM_MUL macro which determines the operand size and +resolves to the NUL instruction with the corresponding suffix. + +This fixes the following error when building with clang: + + CC arch/x86/lib/kaslr.o + /tmp/kaslr-dfe1ad.s: Assembler messages: + /tmp/kaslr-dfe1ad.s:182: Error: no instruction mnemonic suffix given and no register operands; can't size instruction + +Signed-off-by: Matthias Kaehlcke <mka@chromium.org> +Cc: Grant Grundler <grundler@chromium.org> +Cc: Greg Hackmann <ghackmann@google.com> +Cc: Kees Cook <keescook@chromium.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Michael Davidson <md@google.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20170501224741.133938-1-mka@chromium.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/asm.h | 1 + + arch/x86/lib/kaslr.c | 3 ++- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h +index 7bb29a4..08684b3 100644 +--- a/arch/x86/include/asm/asm.h ++++ b/arch/x86/include/asm/asm.h +@@ -34,6 +34,7 @@ + #define _ASM_ADD __ASM_SIZE(add) + #define _ASM_SUB __ASM_SIZE(sub) + #define _ASM_XADD __ASM_SIZE(xadd) ++#define _ASM_MUL __ASM_SIZE(mul) + + #define _ASM_AX __ASM_REG(ax) + #define _ASM_BX __ASM_REG(bx) +diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c +index 121f59c..0c7fe44 100644 +--- a/arch/x86/lib/kaslr.c ++++ b/arch/x86/lib/kaslr.c +@@ -5,6 +5,7 @@ + * kernel starts. This file is included in the compressed kernel and + * normally linked in the regular. + */ ++#include <asm/asm.h> + #include <asm/kaslr.h> + #include <asm/msr.h> + #include <asm/archrandom.h> +@@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose) + } + + /* Circular multiply for better bit diffusion */ +- asm("mul %3" ++ asm(_ASM_MUL "%3" + : "=a" (random), "=d" (raw) + : "a" (random), "rm" (mix_const)); + random += raw; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch new file mode 100644 index 00000000..4331a9f4 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch @@ -0,0 +1,93 @@ +From b541de5f53d608796a946a42f5c3251e4dd07522 Mon Sep 17 00:00:00 2001 +From: Wanpeng Li <wanpeng.li@hotmail.com> +Date: Sat, 20 May 2017 20:32:32 -0700 +Subject: [PATCH 13/93] KVM: X86: Fix preempt the preemption timer cancel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 5acc1ca4fb15f00bfa3d4046e35ca381bc25d580 ] + +Preemption can occur during cancel preemption timer, and there will be +inconsistent status in lapic, vmx and vmcs field. + + CPU0 CPU1 + + preemption timer vmexit + handle_preemption_timer(vCPU0) + kvm_lapic_expired_hv_timer + vmx_cancel_hv_timer + vmx->hv_deadline_tsc = -1 + vmcs_clear_bits + /* hv_timer_in_use still true */ + sched_out + sched_in + kvm_arch_vcpu_load + vmx_set_hv_timer + write vmx->hv_deadline_tsc + vmcs_set_bits + /* back in kvm_lapic_expired_hv_timer */ + hv_timer_in_use = false + ... + vmx_vcpu_run + vmx_arm_hv_run + write preemption timer deadline + spurious preemption timer vmexit + handle_preemption_timer(vCPU0) + kvm_lapic_expired_hv_timer + WARN_ON(!apic->lapic_timer.hv_timer_in_use); + +This can be reproduced sporadically during boot of L2 on a +preemptible L1, causing a splat on L1. + + WARNING: CPU: 3 PID: 1952 at arch/x86/kvm/lapic.c:1529 kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] + CPU: 3 PID: 1952 Comm: qemu-system-x86 Not tainted 4.12.0-rc1+ #24 RIP: 0010:kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] + Call Trace: + handle_preemption_timer+0xe/0x20 [kvm_intel] + vmx_handle_exit+0xc9/0x15f0 [kvm_intel] + ? lock_acquire+0xdb/0x250 + ? lock_acquire+0xdb/0x250 + ? kvm_arch_vcpu_ioctl_run+0xdf3/0x1ce0 [kvm] + kvm_arch_vcpu_ioctl_run+0xe55/0x1ce0 [kvm] + kvm_vcpu_ioctl+0x384/0x7b0 [kvm] + ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm] + ? __fget+0xf3/0x210 + do_vfs_ioctl+0xa4/0x700 + ? __fget+0x114/0x210 + SyS_ioctl+0x79/0x90 + do_syscall_64+0x8f/0x750 + ? trace_hardirqs_on_thunk+0x1a/0x1c + entry_SYSCALL64_slow_path+0x25/0x25 + +This patch fixes it by disabling preemption while cancelling +preemption timer. This way cancel_hv_timer is atomic with +respect to kvm_arch_vcpu_load. + +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/lapic.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index 3f05c04..650ff4a 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -1358,8 +1358,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); + + static void cancel_hv_tscdeadline(struct kvm_lapic *apic) + { ++ preempt_disable(); + kvm_x86_ops->cancel_hv_timer(apic->vcpu); + apic->lapic_timer.hv_timer_in_use = false; ++ preempt_enable(); + } + + void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch new file mode 100644 index 00000000..8b58f32e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch @@ -0,0 +1,72 @@ +From 7a1d0c7758b49b1f107157db33df0aae1c10cf26 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Wed, 31 Jan 2018 17:47:03 -0800 +Subject: [PATCH 13/33] x86/kvm: Update spectre-v1 mitigation + +(cherry picked from commit 085331dfc6bbe3501fb936e657331ca943827600) + +Commit 75f139aaf896 "KVM: x86: Add memory barrier on vmcs field lookup" +added a raw 'asm("lfence");' to prevent a bounds check bypass of +'vmcs_field_to_offset_table'. + +The lfence can be avoided in this path by using the array_index_nospec() +helper designed for these types of fixes. + +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Paolo Bonzini <pbonzini@redhat.com> +Cc: Andrew Honig <ahonig@google.com> +Cc: kvm@vger.kernel.org +Cc: Jim Mattson <jmattson@google.com> +Link: https://lkml.kernel.org/r/151744959670.6342.3001723920950249067.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 20 +++++++++----------- + 1 file changed, 9 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 6f3ed0e..af90bc4 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -33,6 +33,7 @@ + #include <linux/slab.h> + #include <linux/tboot.h> + #include <linux/hrtimer.h> ++#include <linux/nospec.h> + #include "kvm_cache_regs.h" + #include "x86.h" + +@@ -856,21 +857,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { + + static inline short vmcs_field_to_offset(unsigned long field) + { +- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); ++ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); ++ unsigned short offset; + +- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) ++ BUILD_BUG_ON(size > SHRT_MAX); ++ if (field >= size) + return -ENOENT; + +- /* +- * FIXME: Mitigation for CVE-2017-5753. To be replaced with a +- * generic mechanism. +- */ +- asm("lfence"); +- +- if (vmcs_field_to_offset_table[field] == 0) ++ field = array_index_nospec(field, size); ++ offset = vmcs_field_to_offset_table[field]; ++ if (offset == 0) + return -ENOENT; +- +- return vmcs_field_to_offset_table[field]; ++ return offset; + } + + static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch new file mode 100644 index 00000000..43b1f38e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch @@ -0,0 +1,63 @@ +From 2c5329f428b85d1167abdd3206bdac08a02ae082 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= <jschoenh@amazon.de> +Date: Sat, 20 May 2017 13:22:56 +0200 +Subject: [PATCH 14/93] KVM: nVMX: Fix handling of lmsw instruction +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit e1d39b17e044e8ae819827810d87d809ba5f58c0 ] + +The decision whether or not to exit from L2 to L1 on an lmsw instruction is +based on bogus values: instead of using the information encoded within the +exit qualification, it uses the data also used for the mov-to-cr +instruction, which boils down to using whatever is in %eax at that point. + +Use the correct values instead. + +Without this fix, an L1 may not get notified when a 32-bit Linux L2 +switches its secondary CPUs to protected mode; the L1 is only notified on +the next modification of CR0. This short time window poses a problem, when +there is some other reason to exit to L1 in between. Then, L2 will be +resumed in real mode and chaos ensues. + +Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de> +Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 27f505d..8d842d9 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -7910,11 +7910,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, + { + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + int cr = exit_qualification & 15; +- int reg = (exit_qualification >> 8) & 15; +- unsigned long val = kvm_register_readl(vcpu, reg); ++ int reg; ++ unsigned long val; + + switch ((exit_qualification >> 4) & 3) { + case 0: /* mov to cr */ ++ reg = (exit_qualification >> 8) & 15; ++ val = kvm_register_readl(vcpu, reg); + switch (cr) { + case 0: + if (vmcs12->cr0_guest_host_mask & +@@ -7969,6 +7971,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, + * lmsw can change bits 1..3 of cr0, and only set bit 0 of + * cr0. Other attempted changes are ignored, with no exit. + */ ++ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; + if (vmcs12->cr0_guest_host_mask & 0xe & + (val ^ vmcs12->cr0_read_shadow)) + return true; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch new file mode 100644 index 00000000..38a23282 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch @@ -0,0 +1,47 @@ +From 6b359ffcb519698f93eadc2706d06805ce933086 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand <david@redhat.com> +Date: Wed, 25 Jan 2017 11:58:57 +0100 +Subject: [PATCH 14/33] KVM: nVMX: kmap() can't fail + +commit 42cf014d38d8822cce63703a467e00f65d000952 upstream. + +kmap() can't fail, therefore it will always return a valid pointer. Let's +just get rid of the unnecessary checks. + +Signed-off-by: David Hildenbrand <david@redhat.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index af90bc4..17fcbaf 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4742,10 +4742,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) + return 0; + + vapic_page = kmap(vmx->nested.virtual_apic_page); +- if (!vapic_page) { +- WARN_ON(1); +- return -ENOMEM; +- } + __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); + kunmap(vmx->nested.virtual_apic_page); + +@@ -9562,11 +9558,6 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + return false; + } + msr_bitmap_l1 = (unsigned long *)kmap(page); +- if (!msr_bitmap_l1) { +- nested_release_page_clean(page); +- WARN_ON(1); +- return false; +- } + + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch new file mode 100644 index 00000000..913e3fe5 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch @@ -0,0 +1,95 @@ +From 348032cf73954af79ac077ae0c13d6faa99294af Mon Sep 17 00:00:00 2001 +From: Roman Pen <roman.penyaev@profitbricks.com> +Date: Thu, 1 Jun 2017 10:55:03 +0200 +Subject: [PATCH 15/93] KVM: SVM: do not zero out segment attributes if segment + is unusable or not present +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit d9c1b5431d5f0e07575db785a022bce91051ac1d ] + +This is a fix for the problem [1], where VMCB.CPL was set to 0 and interrupt +was taken on userspace stack. The root cause lies in the specific AMD CPU +behaviour which manifests itself as unusable segment attributes on SYSRET. +The corresponding work around for the kernel is the following: + +61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue") + +In other turn virtualization side treated unusable segment incorrectly and +restored CPL from SS attributes, which were zeroed out few lines above. + +In current patch it is assured only that P bit is cleared in VMCB.save state +and segment attributes are not zeroed out if segment is not presented or is +unusable, therefore CPL can be safely restored from DPL field. + +This is only one part of the fix, since QEMU side should be fixed accordingly +not to zero out attributes on its side. Corresponding patch will follow. + +[1] Message id: CAJrWOzD6Xq==b-zYCDdFLgSRMPM-NkNuTSDFEtX=7MreT45i7Q@mail.gmail.com + +Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com> +Signed-off-by: Mikhail Sennikovskii <mikhail.sennikovskii@profitbricks.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim KrÄmář <rkrcmar@redhat.com> +Cc: kvm@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/svm.c | 24 +++++++++++------------- + 1 file changed, 11 insertions(+), 13 deletions(-) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 2d96e30..8551a54 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1876,6 +1876,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, + */ + if (var->unusable) + var->db = 0; ++ /* This is symmetric with svm_set_segment() */ + var->dpl = to_svm(vcpu)->vmcb->save.cpl; + break; + } +@@ -2021,18 +2022,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, + s->base = var->base; + s->limit = var->limit; + s->selector = var->selector; +- if (var->unusable) +- s->attrib = 0; +- else { +- s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); +- s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; +- s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; +- s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; +- s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; +- s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; +- s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; +- s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; +- } ++ s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); ++ s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; ++ s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; ++ s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; ++ s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; ++ s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; ++ s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; ++ s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; + + /* + * This is always accurate, except if SYSRET returned to a segment +@@ -2041,7 +2038,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, + * would entail passing the CPL to userspace and back. + */ + if (seg == VCPU_SREG_SS) +- svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; ++ /* This is symmetric with svm_get_segment() */ ++ svm->vmcb->save.cpl = (var->dpl & 3); + + mark_dirty(svm->vmcb, VMCB_SEG); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch new file mode 100644 index 00000000..806b1ac0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch @@ -0,0 +1,69 @@ +From b53c02711255aa79e4e1a9974ca24610c4fbd7d7 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand <david@redhat.com> +Date: Wed, 25 Jan 2017 11:58:58 +0100 +Subject: [PATCH 15/33] KVM: nVMX: vmx_complete_nested_posted_interrupt() can't + fail + +(cherry picked from commit 6342c50ad12e8ce0736e722184a7dbdea4a3477f) + +vmx_complete_nested_posted_interrupt() can't fail, let's turn it into +a void function. + +Signed-off-by: David Hildenbrand <david@redhat.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 17fcbaf..13dc454 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4722,7 +4722,7 @@ static bool vmx_get_enable_apicv(void) + return enable_apicv; + } + +-static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) ++static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); + int max_irr; +@@ -4733,13 +4733,13 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) + vmx->nested.pi_pending) { + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) +- return 0; ++ return; + + max_irr = find_last_bit( + (unsigned long *)vmx->nested.pi_desc->pir, 256); + + if (max_irr == 256) +- return 0; ++ return; + + vapic_page = kmap(vmx->nested.virtual_apic_page); + __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); +@@ -4752,7 +4752,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) + vmcs_write16(GUEST_INTR_STATUS, status); + } + } +- return 0; + } + + static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) +@@ -10440,7 +10439,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) + return 0; + } + +- return vmx_complete_nested_posted_interrupt(vcpu); ++ vmx_complete_nested_posted_interrupt(vcpu); ++ return 0; + } + + static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch new file mode 100644 index 00000000..cf8424c9 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch @@ -0,0 +1,42 @@ +From d79905a595224c714dc8da5df054653c3b958250 Mon Sep 17 00:00:00 2001 +From: Jim Mattson <jmattson@google.com> +Date: Thu, 1 Jun 2017 12:44:46 -0700 +Subject: [PATCH 16/93] KVM: nVMX: Update vmcs12->guest_linear_address on + nested VM-exit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit d281e13b0bfe745a21061a194e386a949784393f ] + +The guest-linear address field is set for VM exits due to attempts to +execute LMSW with a memory operand and VM exits due to attempts to +execute INS or OUTS for which the relevant segment is usable, +regardless of whether or not EPT is in use. + +Fixes: 119a9c01a5922 ("KVM: nVMX: pass valid guest linear-address to the L1") +Signed-off-by: Jim Mattson <jmattson@google.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 8d842d9..273313f 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -10621,8 +10621,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); + } + +- if (nested_cpu_has_ept(vmcs12)) +- vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); ++ vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); + + if (nested_cpu_has_vid(vmcs12)) + vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch new file mode 100644 index 00000000..e7f44b1b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch @@ -0,0 +1,119 @@ +From 50fefe1aabf115927dbe944d4607d3696ed2773e Mon Sep 17 00:00:00 2001 +From: David Matlack <dmatlack@google.com> +Date: Tue, 1 Aug 2017 14:00:40 -0700 +Subject: [PATCH 16/33] KVM: nVMX: mark vmcs12 pages dirty on L2 exit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +(cherry picked from commit c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570) + +The host physical addresses of L1's Virtual APIC Page and Posted +Interrupt descriptor are loaded into the VMCS02. The CPU may write +to these pages via their host physical address while L2 is running, +bypassing address-translation-based dirty tracking (e.g. EPT write +protection). Mark them dirty on every exit from L2 to prevent them +from getting out of sync with dirty tracking. + +Also mark the virtual APIC page and the posted interrupt descriptor +dirty when KVM is virtualizing posted interrupt processing. + +Signed-off-by: David Matlack <dmatlack@google.com> +Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 43 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 13dc454..2e88fd1 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4722,6 +4722,28 @@ static bool vmx_get_enable_apicv(void) + return enable_apicv; + } + ++static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ gfn_t gfn; ++ ++ /* ++ * Don't need to mark the APIC access page dirty; it is never ++ * written to by the CPU during APIC virtualization. ++ */ ++ ++ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { ++ gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; ++ kvm_vcpu_mark_page_dirty(vcpu, gfn); ++ } ++ ++ if (nested_cpu_has_posted_intr(vmcs12)) { ++ gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; ++ kvm_vcpu_mark_page_dirty(vcpu, gfn); ++ } ++} ++ ++ + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); +@@ -4729,18 +4751,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) + void *vapic_page; + u16 status; + +- if (vmx->nested.pi_desc && +- vmx->nested.pi_pending) { +- vmx->nested.pi_pending = false; +- if (!pi_test_and_clear_on(vmx->nested.pi_desc)) +- return; +- +- max_irr = find_last_bit( +- (unsigned long *)vmx->nested.pi_desc->pir, 256); ++ if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) ++ return; + +- if (max_irr == 256) +- return; ++ vmx->nested.pi_pending = false; ++ if (!pi_test_and_clear_on(vmx->nested.pi_desc)) ++ return; + ++ max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); ++ if (max_irr != 256) { + vapic_page = kmap(vmx->nested.virtual_apic_page); + __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); + kunmap(vmx->nested.virtual_apic_page); +@@ -4752,6 +4771,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) + vmcs_write16(GUEST_INTR_STATUS, status); + } + } ++ ++ nested_mark_vmcs12_pages_dirty(vcpu); + } + + static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) +@@ -8009,6 +8030,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + KVM_ISA_VMX); + ++ /* ++ * The host physical addresses of some pages of guest memory ++ * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU ++ * may write to these pages via their host physical address while ++ * L2 is running, bypassing any address-translation-based dirty ++ * tracking (e.g. EPT write protection). ++ * ++ * Mark them dirty on every exit from L2 to prevent them from ++ * getting out of sync with dirty tracking. ++ */ ++ nested_mark_vmcs12_pages_dirty(vcpu); ++ + if (vmx->nested.nested_run_pending) + return false; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch new file mode 100644 index 00000000..96687e49 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch @@ -0,0 +1,295 @@ +From 8e52c41b7072930e5951b324964f31ef6991f3af Mon Sep 17 00:00:00 2001 +From: Jim Mattson <jmattson@google.com> +Date: Mon, 27 Nov 2017 17:22:25 -0600 +Subject: [PATCH 17/33] KVM: nVMX: Eliminate vmcs02 pool +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +(cherry picked from commit de3a0021a60635de96aa92713c1a31a96747d72c) + +The potential performance advantages of a vmcs02 pool have never been +realized. To simplify the code, eliminate the pool. Instead, a single +vmcs02 is allocated per VCPU when the VCPU enters VMX operation. + +Cc: stable@vger.kernel.org # prereq for Spectre mitigation +Signed-off-by: Jim Mattson <jmattson@google.com> +Signed-off-by: Mark Kanda <mark.kanda@oracle.com> +Reviewed-by: Ameya More <ameya.more@oracle.com> +Reviewed-by: David Hildenbrand <david@redhat.com> +Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 146 +++++++++-------------------------------------------- + 1 file changed, 23 insertions(+), 123 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 2e88fd1..099f221 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -174,7 +174,6 @@ module_param(ple_window_max, int, S_IRUGO); + extern const ulong vmx_return; + + #define NR_AUTOLOAD_MSRS 8 +-#define VMCS02_POOL_SIZE 1 + + struct vmcs { + u32 revision_id; +@@ -208,7 +207,7 @@ struct shared_msr_entry { + * stored in guest memory specified by VMPTRLD, but is opaque to the guest, + * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. + * More than one of these structures may exist, if L1 runs multiple L2 guests. +- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the ++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the + * underlying hardware which will be used to run L2. + * This structure is packed to ensure that its layout is identical across + * machines (necessary for live migration). +@@ -387,13 +386,6 @@ struct __packed vmcs12 { + */ + #define VMCS12_SIZE 0x1000 + +-/* Used to remember the last vmcs02 used for some recently used vmcs12s */ +-struct vmcs02_list { +- struct list_head list; +- gpa_t vmptr; +- struct loaded_vmcs vmcs02; +-}; +- + /* + * The nested_vmx structure is part of vcpu_vmx, and holds information we need + * for correct emulation of VMX (i.e., nested VMX) on this vcpu. +@@ -420,15 +412,15 @@ struct nested_vmx { + */ + bool sync_shadow_vmcs; + +- /* vmcs02_list cache of VMCSs recently used to run L2 guests */ +- struct list_head vmcs02_pool; +- int vmcs02_num; + bool change_vmcs01_virtual_x2apic_mode; + /* L2 must run next, and mustn't decide to exit to L1. */ + bool nested_run_pending; ++ ++ struct loaded_vmcs vmcs02; ++ + /* +- * Guest pages referred to in vmcs02 with host-physical pointers, so +- * we must keep them pinned while L2 runs. ++ * Guest pages referred to in the vmcs02 with host-physical ++ * pointers, so we must keep them pinned while L2 runs. + */ + struct page *apic_access_page; + struct page *virtual_apic_page; +@@ -6657,94 +6649,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) + } + + /* +- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. +- * We could reuse a single VMCS for all the L2 guests, but we also want the +- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this +- * allows keeping them loaded on the processor, and in the future will allow +- * optimizations where prepare_vmcs02 doesn't need to set all the fields on +- * every entry if they never change. +- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE +- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. +- * +- * The following functions allocate and free a vmcs02 in this pool. +- */ +- +-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ +-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) +-{ +- struct vmcs02_list *item; +- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) +- if (item->vmptr == vmx->nested.current_vmptr) { +- list_move(&item->list, &vmx->nested.vmcs02_pool); +- return &item->vmcs02; +- } +- +- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { +- /* Recycle the least recently used VMCS. */ +- item = list_last_entry(&vmx->nested.vmcs02_pool, +- struct vmcs02_list, list); +- item->vmptr = vmx->nested.current_vmptr; +- list_move(&item->list, &vmx->nested.vmcs02_pool); +- return &item->vmcs02; +- } +- +- /* Create a new VMCS */ +- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); +- if (!item) +- return NULL; +- item->vmcs02.vmcs = alloc_vmcs(); +- item->vmcs02.shadow_vmcs = NULL; +- if (!item->vmcs02.vmcs) { +- kfree(item); +- return NULL; +- } +- loaded_vmcs_init(&item->vmcs02); +- item->vmptr = vmx->nested.current_vmptr; +- list_add(&(item->list), &(vmx->nested.vmcs02_pool)); +- vmx->nested.vmcs02_num++; +- return &item->vmcs02; +-} +- +-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ +-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) +-{ +- struct vmcs02_list *item; +- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) +- if (item->vmptr == vmptr) { +- free_loaded_vmcs(&item->vmcs02); +- list_del(&item->list); +- kfree(item); +- vmx->nested.vmcs02_num--; +- return; +- } +-} +- +-/* +- * Free all VMCSs saved for this vcpu, except the one pointed by +- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs +- * must be &vmx->vmcs01. +- */ +-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) +-{ +- struct vmcs02_list *item, *n; +- +- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); +- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { +- /* +- * Something will leak if the above WARN triggers. Better than +- * a use-after-free. +- */ +- if (vmx->loaded_vmcs == &item->vmcs02) +- continue; +- +- free_loaded_vmcs(&item->vmcs02); +- list_del(&item->list); +- kfree(item); +- vmx->nested.vmcs02_num--; +- } +-} +- +-/* + * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), + * set the success or error code of an emulated VMX instruction, as specified + * by Vol 2B, VMX Instruction Reference, "Conventions". +@@ -7051,6 +6955,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + return 1; + } + ++ vmx->nested.vmcs02.vmcs = alloc_vmcs(); ++ vmx->nested.vmcs02.shadow_vmcs = NULL; ++ if (!vmx->nested.vmcs02.vmcs) ++ goto out_vmcs02; ++ loaded_vmcs_init(&vmx->nested.vmcs02); ++ + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = + (unsigned long *)__get_free_page(GFP_KERNEL); +@@ -7073,9 +6983,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + vmx->vmcs01.shadow_vmcs = shadow_vmcs; + } + +- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); +- vmx->nested.vmcs02_num = 0; +- + hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED); + vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; +@@ -7093,6 +7000,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + free_page((unsigned long)vmx->nested.msr_bitmap); + + out_msr_bitmap: ++ free_loaded_vmcs(&vmx->nested.vmcs02); ++ ++out_vmcs02: + return -ENOMEM; + } + +@@ -7178,7 +7088,7 @@ static void free_nested(struct vcpu_vmx *vmx) + vmx->vmcs01.shadow_vmcs = NULL; + } + kfree(vmx->nested.cached_vmcs12); +- /* Unpin physical memory we referred to in current vmcs02 */ ++ /* Unpin physical memory we referred to in the vmcs02 */ + if (vmx->nested.apic_access_page) { + nested_release_page(vmx->nested.apic_access_page); + vmx->nested.apic_access_page = NULL; +@@ -7194,7 +7104,7 @@ static void free_nested(struct vcpu_vmx *vmx) + vmx->nested.pi_desc = NULL; + } + +- nested_free_all_saved_vmcss(vmx); ++ free_loaded_vmcs(&vmx->nested.vmcs02); + } + + /* Emulate the VMXOFF instruction */ +@@ -7242,8 +7152,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) + kunmap(page); + nested_release_page(page); + +- nested_free_vmcs02(vmx, vmptr); +- + skip_emulated_instruction(vcpu); + nested_vmx_succeed(vcpu); + return 1; +@@ -8032,10 +7940,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) + + /* + * The host physical addresses of some pages of guest memory +- * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU +- * may write to these pages via their host physical address while +- * L2 is running, bypassing any address-translation-based dirty +- * tracking (e.g. EPT write protection). ++ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC ++ * Page). The CPU may write to these pages via their host ++ * physical address while L2 is running, bypassing any ++ * address-translation-based dirty tracking (e.g. EPT write ++ * protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. +@@ -10170,7 +10079,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) + struct vmcs12 *vmcs12; + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; +- struct loaded_vmcs *vmcs02; + bool ia32e; + u32 msr_entry_idx; + +@@ -10310,17 +10218,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) + * the nested entry. + */ + +- vmcs02 = nested_get_current_vmcs02(vmx); +- if (!vmcs02) +- return -ENOMEM; +- + enter_guest_mode(vcpu); + + if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) + vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + + cpu = get_cpu(); +- vmx->loaded_vmcs = vmcs02; ++ vmx->loaded_vmcs = &vmx->nested.vmcs02; + vmx_vcpu_put(vcpu); + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; +@@ -10833,10 +10737,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, + vm_exit_controls_reset_shadow(vmx); + vmx_segment_cache_clear(vmx); + +- /* if no vmcs02 cache requested, remove the one we used */ +- if (VMCS02_POOL_SIZE == 0) +- nested_free_vmcs02(vmx, vmx->nested.current_vmptr); +- + load_vmcs12_host_state(vcpu, vmcs12); + + /* Update any VMCS fields that might have changed while L2 ran */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch new file mode 100644 index 00000000..cb6045b1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch @@ -0,0 +1,62 @@ +From 1007b2c9e70fe3aaffda12b809da0f3b53642777 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 20 Apr 2018 14:06:29 +0200 +Subject: [PATCH 17/93] perf/x86: Fix possible Spectre-v1 indexing for + hw_perf_event cache_* + +commit ef9ee4ad38445a30909c48998624861716f2a994 upstream. + +> arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids[cache_type]' (local cap) +> arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids' (local cap) +> arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs[cache_type]' (local cap) +> arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs' (local cap) + +Userspace controls @config which contains 3 (byte) fields used for a 3 +dimensional array deref. + +Reported-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: <stable@kernel.org> +Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> +Cc: Arnaldo Carvalho de Melo <acme@redhat.com> +Cc: Jiri Olsa <jolsa@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Stephane Eranian <eranian@google.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Vince Weaver <vincent.weaver@maine.edu> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/events/core.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c +index 38623e2..6b955e3 100644 +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -303,17 +303,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) + + config = attr->config; + +- cache_type = (config >> 0) & 0xff; ++ cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; ++ cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; ++ cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; ++ cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); + + val = hw_cache_event_ids[cache_type][cache_op][cache_result]; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch new file mode 100644 index 00000000..a22f91a8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch @@ -0,0 +1,104 @@ +From 80f4f0e9de9cce1047ac0aac305aca7310e37313 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Thu, 11 Jan 2018 12:16:15 +0100 +Subject: [PATCH 18/33] KVM: VMX: introduce alloc_loaded_vmcs + +(cherry picked from commit f21f165ef922c2146cc5bdc620f542953c41714b) + +Group together the calls to alloc_vmcs and loaded_vmcs_init. Soon we'll also +allocate an MSR bitmap there. + +Cc: stable@vger.kernel.org # prereq for Spectre mitigation +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 38 +++++++++++++++++++++++--------------- + 1 file changed, 23 insertions(+), 15 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 099f221..6814355 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3514,11 +3514,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) + return vmcs; + } + +-static struct vmcs *alloc_vmcs(void) +-{ +- return alloc_vmcs_cpu(raw_smp_processor_id()); +-} +- + static void free_vmcs(struct vmcs *vmcs) + { + free_pages((unsigned long)vmcs, vmcs_config.order); +@@ -3537,6 +3532,22 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) + WARN_ON(loaded_vmcs->shadow_vmcs != NULL); + } + ++static struct vmcs *alloc_vmcs(void) ++{ ++ return alloc_vmcs_cpu(raw_smp_processor_id()); ++} ++ ++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) ++{ ++ loaded_vmcs->vmcs = alloc_vmcs(); ++ if (!loaded_vmcs->vmcs) ++ return -ENOMEM; ++ ++ loaded_vmcs->shadow_vmcs = NULL; ++ loaded_vmcs_init(loaded_vmcs); ++ return 0; ++} ++ + static void free_kvm_area(void) + { + int cpu; +@@ -6916,6 +6927,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + struct vmcs *shadow_vmcs; + const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED + | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; ++ int r; + + /* The Intel VMX Instruction Reference lists a bunch of bits that + * are prerequisite to running VMXON, most notably cr4.VMXE must be +@@ -6955,11 +6967,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + return 1; + } + +- vmx->nested.vmcs02.vmcs = alloc_vmcs(); +- vmx->nested.vmcs02.shadow_vmcs = NULL; +- if (!vmx->nested.vmcs02.vmcs) ++ r = alloc_loaded_vmcs(&vmx->nested.vmcs02); ++ if (r < 0) + goto out_vmcs02; +- loaded_vmcs_init(&vmx->nested.vmcs02); + + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = +@@ -9090,17 +9100,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) + if (!vmx->guest_msrs) + goto free_pml; + +- vmx->loaded_vmcs = &vmx->vmcs01; +- vmx->loaded_vmcs->vmcs = alloc_vmcs(); +- vmx->loaded_vmcs->shadow_vmcs = NULL; +- if (!vmx->loaded_vmcs->vmcs) +- goto free_msrs; + if (!vmm_exclusive) + kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); +- loaded_vmcs_init(vmx->loaded_vmcs); ++ err = alloc_loaded_vmcs(&vmx->vmcs01); + if (!vmm_exclusive) + kvm_cpu_vmxoff(); ++ if (err < 0) ++ goto free_msrs; + ++ vmx->loaded_vmcs = &vmx->vmcs01; + cpu = get_cpu(); + vmx_vcpu_load(&vmx->vcpu, cpu); + vmx->vcpu.cpu = cpu; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch new file mode 100644 index 00000000..40bc2cae --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch @@ -0,0 +1,53 @@ +From 8708c762c727c3c4a8fb6c75fc1d5585f89ece90 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 20 Apr 2018 14:25:48 +0200 +Subject: [PATCH 18/93] perf/x86/cstate: Fix possible Spectre-v1 indexing for + pkg_msr + +commit a5f81290ce475489fa2551c01a07470c1a4c932e upstream. + +> arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) + +Userspace controls @attr, sanitize cfg (attr->config) before using it +to index an array. + +Reported-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: <stable@kernel.org> +Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> +Cc: Arnaldo Carvalho de Melo <acme@redhat.com> +Cc: Jiri Olsa <jolsa@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Stephane Eranian <eranian@google.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Vince Weaver <vincent.weaver@maine.edu> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/events/intel/cstate.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c +index fec8a46..c6a04c0 100644 +--- a/arch/x86/events/intel/cstate.c ++++ b/arch/x86/events/intel/cstate.c +@@ -90,6 +90,7 @@ + #include <linux/module.h> + #include <linux/slab.h> + #include <linux/perf_event.h> ++#include <linux/nospec.h> + #include <asm/cpu_device_id.h> + #include <asm/intel-family.h> + #include "../perf_event.h" +@@ -300,6 +301,7 @@ static int cstate_pmu_event_init(struct perf_event *event) + } else if (event->pmu == &cstate_pkg_pmu) { + if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) + return -EINVAL; ++ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); + if (!pkg_msr[cfg].attr) + return -EINVAL; + event->hw.event_base = pkg_msr[cfg].msr; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch new file mode 100644 index 00000000..0a8db555 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch @@ -0,0 +1,585 @@ +From cc42f184dfdfed46c394274020b84a1641f24714 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Tue, 16 Jan 2018 16:51:18 +0100 +Subject: [PATCH 19/33] KVM: VMX: make MSR bitmaps per-VCPU + +(cherry picked from commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6) + +Place the MSR bitmap in struct loaded_vmcs, and update it in place +every time the x2apic or APICv state can change. This is rare and +the loop can handle 64 MSRs per iteration, in a similar fashion as +nested_vmx_prepare_msr_bitmap. + +This prepares for choosing, on a per-VM basis, whether to intercept +the SPEC_CTRL and PRED_CMD MSRs. + +Cc: stable@vger.kernel.org # prereq for Spectre mitigation +Suggested-by: Jim Mattson <jmattson@google.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 315 +++++++++++++++++++---------------------------------- + 1 file changed, 114 insertions(+), 201 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 6814355..c6a7563 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -110,6 +110,14 @@ static u64 __read_mostly host_xss; + static bool __read_mostly enable_pml = 1; + module_param_named(pml, enable_pml, bool, S_IRUGO); + ++#define MSR_TYPE_R 1 ++#define MSR_TYPE_W 2 ++#define MSR_TYPE_RW 3 ++ ++#define MSR_BITMAP_MODE_X2APIC 1 ++#define MSR_BITMAP_MODE_X2APIC_APICV 2 ++#define MSR_BITMAP_MODE_LM 4 ++ + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL + + /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ +@@ -191,6 +199,7 @@ struct loaded_vmcs { + struct vmcs *shadow_vmcs; + int cpu; + int launched; ++ unsigned long *msr_bitmap; + struct list_head loaded_vmcss_on_cpu_link; + }; + +@@ -429,8 +438,6 @@ struct nested_vmx { + bool pi_pending; + u16 posted_intr_nv; + +- unsigned long *msr_bitmap; +- + struct hrtimer preemption_timer; + bool preemption_timer_expired; + +@@ -531,6 +538,7 @@ struct vcpu_vmx { + unsigned long host_rsp; + u8 fail; + bool nmi_known_unmasked; ++ u8 msr_bitmap_mode; + u32 exit_intr_info; + u32 idt_vectoring_info; + ulong rflags; +@@ -902,6 +910,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); + static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); + static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); + static int alloc_identity_pagetable(struct kvm *kvm); ++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); + + static DEFINE_PER_CPU(struct vmcs *, vmxarea); + static DEFINE_PER_CPU(struct vmcs *, current_vmcs); +@@ -921,12 +930,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); + + static unsigned long *vmx_io_bitmap_a; + static unsigned long *vmx_io_bitmap_b; +-static unsigned long *vmx_msr_bitmap_legacy; +-static unsigned long *vmx_msr_bitmap_longmode; +-static unsigned long *vmx_msr_bitmap_legacy_x2apic; +-static unsigned long *vmx_msr_bitmap_longmode_x2apic; +-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive; +-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive; + static unsigned long *vmx_vmread_bitmap; + static unsigned long *vmx_vmwrite_bitmap; + +@@ -2517,36 +2520,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) + vmx->guest_msrs[from] = tmp; + } + +-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) +-{ +- unsigned long *msr_bitmap; +- +- if (is_guest_mode(vcpu)) +- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; +- else if (cpu_has_secondary_exec_ctrls() && +- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { +- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { +- if (is_long_mode(vcpu)) +- msr_bitmap = vmx_msr_bitmap_longmode_x2apic; +- else +- msr_bitmap = vmx_msr_bitmap_legacy_x2apic; +- } else { +- if (is_long_mode(vcpu)) +- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive; +- else +- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive; +- } +- } else { +- if (is_long_mode(vcpu)) +- msr_bitmap = vmx_msr_bitmap_longmode; +- else +- msr_bitmap = vmx_msr_bitmap_legacy; +- } +- +- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); +-} +- + /* + * Set up the vmcs to automatically save and restore system + * msrs. Don't touch the 64-bit msrs if the guest is in legacy +@@ -2587,7 +2560,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) + vmx->save_nmsrs = save_nmsrs; + + if (cpu_has_vmx_msr_bitmap()) +- vmx_set_msr_bitmap(&vmx->vcpu); ++ vmx_update_msr_bitmap(&vmx->vcpu); + } + + /* +@@ -3529,6 +3502,8 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) + loaded_vmcs_clear(loaded_vmcs); + free_vmcs(loaded_vmcs->vmcs); + loaded_vmcs->vmcs = NULL; ++ if (loaded_vmcs->msr_bitmap) ++ free_page((unsigned long)loaded_vmcs->msr_bitmap); + WARN_ON(loaded_vmcs->shadow_vmcs != NULL); + } + +@@ -3545,7 +3520,18 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs_init(loaded_vmcs); ++ ++ if (cpu_has_vmx_msr_bitmap()) { ++ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); ++ if (!loaded_vmcs->msr_bitmap) ++ goto out_vmcs; ++ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); ++ } + return 0; ++ ++out_vmcs: ++ free_loaded_vmcs(loaded_vmcs); ++ return -ENOMEM; + } + + static void free_kvm_area(void) +@@ -4548,10 +4534,8 @@ static void free_vpid(int vpid) + spin_unlock(&vmx_vpid_lock); + } + +-#define MSR_TYPE_R 1 +-#define MSR_TYPE_W 2 +-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +- u32 msr, int type) ++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type) + { + int f = sizeof(unsigned long); + +@@ -4585,8 +4569,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + } + } + +-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, +- u32 msr, int type) ++static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type) + { + int f = sizeof(unsigned long); + +@@ -4620,6 +4604,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + } + } + ++static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type, bool value) ++{ ++ if (value) ++ vmx_enable_intercept_for_msr(msr_bitmap, msr, type); ++ else ++ vmx_disable_intercept_for_msr(msr_bitmap, msr, type); ++} ++ + /* + * If a msr is allowed by L0, we should check whether it is allowed by L1. + * The corresponding bit will be cleared unless both of L0 and L1 allow it. +@@ -4666,58 +4659,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, + } + } + +-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) ++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) + { +- if (!longmode_only) +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +- msr, MSR_TYPE_R | MSR_TYPE_W); +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +- msr, MSR_TYPE_R | MSR_TYPE_W); +-} ++ u8 mode = 0; + +-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) +-{ +- if (apicv_active) { +- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, +- msr, MSR_TYPE_R); +- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, +- msr, MSR_TYPE_R); +- } else { +- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, +- msr, MSR_TYPE_R); +- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, +- msr, MSR_TYPE_R); ++ if (cpu_has_secondary_exec_ctrls() && ++ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { ++ mode |= MSR_BITMAP_MODE_X2APIC; ++ if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) ++ mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } ++ ++ if (is_long_mode(vcpu)) ++ mode |= MSR_BITMAP_MODE_LM; ++ ++ return mode; + } + +-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) ++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) ++ ++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, ++ u8 mode) + { +- if (apicv_active) { +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, +- msr, MSR_TYPE_R); +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, +- msr, MSR_TYPE_R); +- } else { +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, +- msr, MSR_TYPE_R); +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, +- msr, MSR_TYPE_R); ++ int msr; ++ ++ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { ++ unsigned word = msr / BITS_PER_LONG; ++ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; ++ msr_bitmap[word + (0x800 / sizeof(long))] = ~0; ++ } ++ ++ if (mode & MSR_BITMAP_MODE_X2APIC) { ++ /* ++ * TPR reads and writes can be virtualized even if virtual interrupt ++ * delivery is not in use. ++ */ ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); ++ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { ++ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); ++ } + } + } + +-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active) ++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) + { +- if (apicv_active) { +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, +- msr, MSR_TYPE_W); +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, +- msr, MSR_TYPE_W); +- } else { +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, +- msr, MSR_TYPE_W); +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, +- msr, MSR_TYPE_W); +- } ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; ++ u8 mode = vmx_msr_bitmap_mode(vcpu); ++ u8 changed = mode ^ vmx->msr_bitmap_mode; ++ ++ if (!changed) ++ return; ++ ++ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, ++ !(mode & MSR_BITMAP_MODE_LM)); ++ ++ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) ++ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); ++ ++ vmx->msr_bitmap_mode = mode; + } + + static bool vmx_get_enable_apicv(void) +@@ -4953,7 +4956,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) + } + + if (cpu_has_vmx_msr_bitmap()) +- vmx_set_msr_bitmap(vcpu); ++ vmx_update_msr_bitmap(vcpu); + } + + static u32 vmx_exec_control(struct vcpu_vmx *vmx) +@@ -5042,7 +5045,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) + vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); + } + if (cpu_has_vmx_msr_bitmap()) +- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); ++ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); + + vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ + +@@ -6371,7 +6374,7 @@ static void wakeup_handler(void) + + static __init int hardware_setup(void) + { +- int r = -ENOMEM, i, msr; ++ int r = -ENOMEM, i; + + rdmsrl_safe(MSR_EFER, &host_efer); + +@@ -6386,41 +6389,13 @@ static __init int hardware_setup(void) + if (!vmx_io_bitmap_b) + goto out; + +- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); +- if (!vmx_msr_bitmap_legacy) +- goto out1; +- +- vmx_msr_bitmap_legacy_x2apic = +- (unsigned long *)__get_free_page(GFP_KERNEL); +- if (!vmx_msr_bitmap_legacy_x2apic) +- goto out2; +- +- vmx_msr_bitmap_legacy_x2apic_apicv_inactive = +- (unsigned long *)__get_free_page(GFP_KERNEL); +- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive) +- goto out3; +- +- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); +- if (!vmx_msr_bitmap_longmode) +- goto out4; +- +- vmx_msr_bitmap_longmode_x2apic = +- (unsigned long *)__get_free_page(GFP_KERNEL); +- if (!vmx_msr_bitmap_longmode_x2apic) +- goto out5; +- +- vmx_msr_bitmap_longmode_x2apic_apicv_inactive = +- (unsigned long *)__get_free_page(GFP_KERNEL); +- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive) +- goto out6; +- + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmread_bitmap) +- goto out7; ++ goto out1; + + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmwrite_bitmap) +- goto out8; ++ goto out2; + + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); +@@ -6434,12 +6409,9 @@ static __init int hardware_setup(void) + + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); + +- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); +- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); +- + if (setup_vmcs_config(&vmcs_config) < 0) { + r = -EIO; +- goto out9; ++ goto out3; + } + + if (boot_cpu_has(X86_FEATURE_NX)) +@@ -6494,48 +6466,8 @@ static __init int hardware_setup(void) + kvm_tsc_scaling_ratio_frac_bits = 48; + } + +- vmx_disable_intercept_for_msr(MSR_FS_BASE, false); +- vmx_disable_intercept_for_msr(MSR_GS_BASE, false); +- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); +- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); +- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); +- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); +- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); +- +- memcpy(vmx_msr_bitmap_legacy_x2apic, +- vmx_msr_bitmap_legacy, PAGE_SIZE); +- memcpy(vmx_msr_bitmap_longmode_x2apic, +- vmx_msr_bitmap_longmode, PAGE_SIZE); +- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, +- vmx_msr_bitmap_legacy, PAGE_SIZE); +- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, +- vmx_msr_bitmap_longmode, PAGE_SIZE); +- + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + +- /* +- * enable_apicv && kvm_vcpu_apicv_active() +- */ +- for (msr = 0x800; msr <= 0x8ff; msr++) +- vmx_disable_intercept_msr_read_x2apic(msr, true); +- +- /* TMCCT */ +- vmx_enable_intercept_msr_read_x2apic(0x839, true); +- /* TPR */ +- vmx_disable_intercept_msr_write_x2apic(0x808, true); +- /* EOI */ +- vmx_disable_intercept_msr_write_x2apic(0x80b, true); +- /* SELF-IPI */ +- vmx_disable_intercept_msr_write_x2apic(0x83f, true); +- +- /* +- * (enable_apicv && !kvm_vcpu_apicv_active()) || +- * !enable_apicv +- */ +- /* TPR */ +- vmx_disable_intercept_msr_read_x2apic(0x808, false); +- vmx_disable_intercept_msr_write_x2apic(0x808, false); +- + if (enable_ept) { + kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, + (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, +@@ -6581,22 +6513,10 @@ static __init int hardware_setup(void) + + return alloc_kvm_area(); + +-out9: +- free_page((unsigned long)vmx_vmwrite_bitmap); +-out8: +- free_page((unsigned long)vmx_vmread_bitmap); +-out7: +- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); +-out6: +- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); +-out5: +- free_page((unsigned long)vmx_msr_bitmap_longmode); +-out4: +- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); + out3: +- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); ++ free_page((unsigned long)vmx_vmwrite_bitmap); + out2: +- free_page((unsigned long)vmx_msr_bitmap_legacy); ++ free_page((unsigned long)vmx_vmread_bitmap); + out1: + free_page((unsigned long)vmx_io_bitmap_b); + out: +@@ -6607,12 +6527,6 @@ static __init int hardware_setup(void) + + static __exit void hardware_unsetup(void) + { +- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); +- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); +- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); +- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); +- free_page((unsigned long)vmx_msr_bitmap_legacy); +- free_page((unsigned long)vmx_msr_bitmap_longmode); + free_page((unsigned long)vmx_io_bitmap_b); + free_page((unsigned long)vmx_io_bitmap_a); + free_page((unsigned long)vmx_vmwrite_bitmap); +@@ -6971,13 +6885,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + if (r < 0) + goto out_vmcs02; + +- if (cpu_has_vmx_msr_bitmap()) { +- vmx->nested.msr_bitmap = +- (unsigned long *)__get_free_page(GFP_KERNEL); +- if (!vmx->nested.msr_bitmap) +- goto out_msr_bitmap; +- } +- + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + if (!vmx->nested.cached_vmcs12) + goto out_cached_vmcs12; +@@ -7007,9 +6914,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + kfree(vmx->nested.cached_vmcs12); + + out_cached_vmcs12: +- free_page((unsigned long)vmx->nested.msr_bitmap); +- +-out_msr_bitmap: + free_loaded_vmcs(&vmx->nested.vmcs02); + + out_vmcs02: +@@ -7088,10 +6992,6 @@ static void free_nested(struct vcpu_vmx *vmx) + vmx->nested.vmxon = false; + free_vpid(vmx->nested.vpid02); + nested_release_vmcs12(vmx); +- if (vmx->nested.msr_bitmap) { +- free_page((unsigned long)vmx->nested.msr_bitmap); +- vmx->nested.msr_bitmap = NULL; +- } + if (enable_shadow_vmcs) { + vmcs_clear(vmx->vmcs01.shadow_vmcs); + free_vmcs(vmx->vmcs01.shadow_vmcs); +@@ -8450,7 +8350,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) + } + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); + +- vmx_set_msr_bitmap(vcpu); ++ vmx_update_msr_bitmap(vcpu); + } + + static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) +@@ -9068,6 +8968,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) + { + int err; + struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); ++ unsigned long *msr_bitmap; + int cpu; + + if (!vmx) +@@ -9108,6 +9009,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) + if (err < 0) + goto free_msrs; + ++ msr_bitmap = vmx->vmcs01.msr_bitmap; ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); ++ vmx->msr_bitmap_mode = 0; ++ + vmx->loaded_vmcs = &vmx->vmcs01; + cpu = get_cpu(); + vmx_vcpu_load(&vmx->vcpu, cpu); +@@ -9495,7 +9405,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + int msr; + struct page *page; + unsigned long *msr_bitmap_l1; +- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; ++ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + + /* This shortcut is ok because we support only x2APIC MSRs so far. */ + if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) +@@ -10007,6 +9917,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) + if (kvm_has_tsc_control) + decache_tsc_multiplier(vmx); + ++ if (cpu_has_vmx_msr_bitmap()) ++ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); ++ + if (enable_vpid) { + /* + * There is no direct mapping between vpid02 and vpid12, the +@@ -10694,7 +10607,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + + if (cpu_has_vmx_msr_bitmap()) +- vmx_set_msr_bitmap(vcpu); ++ vmx_update_msr_bitmap(vcpu); + + if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, + vmcs12->vm_exit_msr_load_count)) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch new file mode 100644 index 00000000..876e4bd9 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch @@ -0,0 +1,65 @@ +From 2c1bc0d092e3885ee643c9d5755957a1297b5245 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 20 Apr 2018 14:23:36 +0200 +Subject: [PATCH 19/93] perf/x86/msr: Fix possible Spectre-v1 indexing in the + MSR driver + +commit 06ce6e9b6d6c09d4129c6e24a1314a395d816c10 upstream. + +> arch/x86/events/msr.c:178 msr_event_init() warn: potential spectre issue 'msr' (local cap) + +Userspace controls @attr, sanitize cfg (attr->config) before using it +to index an array. + +Reported-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: <stable@kernel.org> +Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> +Cc: Arnaldo Carvalho de Melo <acme@redhat.com> +Cc: Jiri Olsa <jolsa@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Stephane Eranian <eranian@google.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Vince Weaver <vincent.weaver@maine.edu> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/events/msr.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c +index 4bb3ec6..be0b196 100644 +--- a/arch/x86/events/msr.c ++++ b/arch/x86/events/msr.c +@@ -1,4 +1,5 @@ + #include <linux/perf_event.h> ++#include <linux/nospec.h> + #include <asm/intel-family.h> + + enum perf_msr_id { +@@ -136,9 +137,6 @@ static int msr_event_init(struct perf_event *event) + if (event->attr.type != event->pmu->type) + return -ENOENT; + +- if (cfg >= PERF_MSR_EVENT_MAX) +- return -EINVAL; +- + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || +@@ -149,6 +147,11 @@ static int msr_event_init(struct perf_event *event) + event->attr.sample_period) /* no sampling */ + return -EINVAL; + ++ if (cfg >= PERF_MSR_EVENT_MAX) ++ return -EINVAL; ++ ++ cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); ++ + if (!msr[cfg].attr) + return -EINVAL; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch new file mode 100644 index 00000000..731a182a --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch @@ -0,0 +1,352 @@ +From b70d7889c078c97d11ae6412760f3231fda324cd Mon Sep 17 00:00:00 2001 +From: Ashok Raj <ashok.raj@intel.com> +Date: Thu, 1 Feb 2018 22:59:43 +0100 +Subject: [PATCH 20/33] KVM/x86: Add IBPB support + +(cherry picked from commit 15d45071523d89b3fb7372e2135fbd72f6af9506) + +The Indirect Branch Predictor Barrier (IBPB) is an indirect branch +control mechanism. It keeps earlier branches from influencing +later ones. + +Unlike IBRS and STIBP, IBPB does not define a new mode of operation. +It's a command that ensures predicted branch targets aren't used after +the barrier. Although IBRS and IBPB are enumerated by the same CPUID +enumeration, IBPB is very different. + +IBPB helps mitigate against three potential attacks: + +* Mitigate guests from being attacked by other guests. + - This is addressed by issing IBPB when we do a guest switch. + +* Mitigate attacks from guest/ring3->host/ring3. + These would require a IBPB during context switch in host, or after + VMEXIT. The host process has two ways to mitigate + - Either it can be compiled with retpoline + - If its going through context switch, and has set !dumpable then + there is a IBPB in that path. + (Tim's patch: https://patchwork.kernel.org/patch/10192871) + - The case where after a VMEXIT you return back to Qemu might make + Qemu attackable from guest when Qemu isn't compiled with retpoline. + There are issues reported when doing IBPB on every VMEXIT that resulted + in some tsc calibration woes in guest. + +* Mitigate guest/ring0->host/ring0 attacks. + When host kernel is using retpoline it is safe against these attacks. + If host kernel isn't using retpoline we might need to do a IBPB flush on + every VMEXIT. + +Even when using retpoline for indirect calls, in certain conditions 'ret' +can use the BTB on Skylake-era CPUs. There are other mitigations +available like RSB stuffing/clearing. + +* IBPB is issued only for SVM during svm_free_vcpu(). + VMX has a vmclear and SVM doesn't. Follow discussion here: + https://lkml.org/lkml/2018/1/15/146 + +Please refer to the following spec for more details on the enumeration +and control. + +Refer here to get documentation about mitigations. + +https://software.intel.com/en-us/side-channel-security-support + +[peterz: rebase and changelog rewrite] +[karahmed: - rebase + - vmx: expose PRED_CMD if guest has it in CPUID + - svm: only pass through IBPB if guest has it in CPUID + - vmx: support !cpu_has_vmx_msr_bitmap()] + - vmx: support nested] +[dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS) + PRED_CMD is a write-only MSR] + +Signed-off-by: Ashok Raj <ashok.raj@intel.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: kvm@vger.kernel.org +Cc: Asit Mallick <asit.k.mallick@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: Jun Nakajima <jun.nakajima@intel.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com +Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/cpuid.c | 11 +++++++- + arch/x86/kvm/cpuid.h | 12 ++++++++ + arch/x86/kvm/svm.c | 28 +++++++++++++++++++ + arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++-- + 4 files changed, 127 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index afa7bbb..42323be 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | + 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + ++ /* cpuid 0x80000008.ebx */ ++ const u32 kvm_cpuid_8000_0008_ebx_x86_features = ++ F(IBPB); ++ + /* cpuid 0xC0000001.edx */ + const u32 kvm_cpuid_C000_0001_edx_x86_features = + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | +@@ -607,7 +611,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + if (!g_phys_as) + g_phys_as = phys_as; + entry->eax = g_phys_as | (virt_as << 8); +- entry->ebx = entry->edx = 0; ++ entry->edx = 0; ++ /* IBPB isn't necessarily present in hardware cpuid */ ++ if (boot_cpu_has(X86_FEATURE_IBPB)) ++ entry->ebx |= F(IBPB); ++ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; ++ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + break; + } + case 0x80000019: +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index 35058c2..f4a2a1a 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -152,6 +152,18 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) + return best && (best->edx & bit(X86_FEATURE_RDTSCP)); + } + ++static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_cpuid_entry2 *best; ++ ++ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); ++ if (best && (best->ebx & bit(X86_FEATURE_IBPB))) ++ return true; ++ best = kvm_find_cpuid_entry(vcpu, 7, 0); ++ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); ++} ++ ++ + /* + * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 + */ +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 491f077..43e45b9 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -248,6 +248,7 @@ static const struct svm_direct_access_msrs { + { .index = MSR_CSTAR, .always = true }, + { .index = MSR_SYSCALL_MASK, .always = true }, + #endif ++ { .index = MSR_IA32_PRED_CMD, .always = false }, + { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, + { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, + { .index = MSR_IA32_LASTINTFROMIP, .always = false }, +@@ -510,6 +511,7 @@ struct svm_cpu_data { + struct kvm_ldttss_desc *tss_desc; + + struct page *save_area; ++ struct vmcb *current_vmcb; + }; + + static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); +@@ -1641,11 +1643,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) + __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, svm); ++ /* ++ * The vmcb page can be recycled, causing a false negative in ++ * svm_vcpu_load(). So do a full IBPB now. ++ */ ++ indirect_branch_prediction_barrier(); + } + + static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + { + struct vcpu_svm *svm = to_svm(vcpu); ++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + int i; + + if (unlikely(cpu != vcpu->cpu)) { +@@ -1674,6 +1682,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + if (static_cpu_has(X86_FEATURE_RDTSCP)) + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + ++ if (sd->current_vmcb != svm->vmcb) { ++ sd->current_vmcb = svm->vmcb; ++ indirect_branch_prediction_barrier(); ++ } + avic_vcpu_load(vcpu, cpu); + } + +@@ -3587,6 +3599,22 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) + case MSR_IA32_TSC: + kvm_write_tsc(vcpu, msr); + break; ++ case MSR_IA32_PRED_CMD: ++ if (!msr->host_initiated && ++ !guest_cpuid_has_ibpb(vcpu)) ++ return 1; ++ ++ if (data & ~PRED_CMD_IBPB) ++ return 1; ++ ++ if (!data) ++ break; ++ ++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); ++ if (is_guest_mode(vcpu)) ++ break; ++ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); ++ break; + case MSR_STAR: + svm->vmcb->save.star = data; + break; +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index c6a7563..855df75 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -550,6 +550,7 @@ struct vcpu_vmx { + u64 msr_host_kernel_gs_base; + u64 msr_guest_kernel_gs_base; + #endif ++ + u32 vm_entry_controls_shadow; + u32 vm_exit_controls_shadow; + /* +@@ -911,6 +912,8 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); + static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); + static int alloc_identity_pagetable(struct kvm *kvm); + static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); ++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type); + + static DEFINE_PER_CPU(struct vmcs *, vmxarea); + static DEFINE_PER_CPU(struct vmcs *, current_vmcs); +@@ -1841,6 +1844,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) + vmcs_write32(EXCEPTION_BITMAP, eb); + } + ++/* ++ * Check if MSR is intercepted for L01 MSR bitmap. ++ */ ++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) ++{ ++ unsigned long *msr_bitmap; ++ int f = sizeof(unsigned long); ++ ++ if (!cpu_has_vmx_msr_bitmap()) ++ return true; ++ ++ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; ++ ++ if (msr <= 0x1fff) { ++ return !!test_bit(msr, msr_bitmap + 0x800 / f); ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { ++ msr &= 0x1fff; ++ return !!test_bit(msr, msr_bitmap + 0xc00 / f); ++ } ++ ++ return true; ++} ++ + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) + { +@@ -2252,6 +2278,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { + per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; + vmcs_load(vmx->loaded_vmcs->vmcs); ++ indirect_branch_prediction_barrier(); + } + + if (!already_loaded) { +@@ -3048,6 +3075,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + case MSR_IA32_TSC: + kvm_write_tsc(vcpu, msr_info); + break; ++ case MSR_IA32_PRED_CMD: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has_ibpb(vcpu)) ++ return 1; ++ ++ if (data & ~PRED_CMD_IBPB) ++ return 1; ++ ++ if (!data) ++ break; ++ ++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); ++ ++ /* ++ * For non-nested: ++ * When it's written (to non-zero) for the first time, pass ++ * it through. ++ * ++ * For nested: ++ * The handling of the MSR bitmap for L2 guests is done in ++ * nested_vmx_merge_msr_bitmap. We should not touch the ++ * vmcs02.msr_bitmap here since it gets completely overwritten ++ * in the merging. ++ */ ++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, ++ MSR_TYPE_W); ++ break; + case MSR_IA32_CR_PAT: + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) +@@ -9406,9 +9460,23 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + struct page *page; + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; ++ /* ++ * pred_cmd is trying to verify two things: ++ * ++ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This ++ * ensures that we do not accidentally generate an L02 MSR bitmap ++ * from the L12 MSR bitmap that is too permissive. ++ * 2. That L1 or L2s have actually used the MSR. This avoids ++ * unnecessarily merging of the bitmap if the MSR is unused. This ++ * works properly because we only update the L01 MSR bitmap lazily. ++ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only ++ * updated to reflect this when L1 (or its L2s) actually write to ++ * the MSR. ++ */ ++ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + +- /* This shortcut is ok because we support only x2APIC MSRs so far. */ +- if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) ++ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && ++ !pred_cmd) + return false; + + page = nested_get_page(vcpu, vmcs12->msr_bitmap); +@@ -9443,6 +9511,13 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + MSR_TYPE_W); + } + } ++ ++ if (pred_cmd) ++ nested_vmx_disable_intercept_for_msr( ++ msr_bitmap_l1, msr_bitmap_l0, ++ MSR_IA32_PRED_CMD, ++ MSR_TYPE_W); ++ + kunmap(page); + nested_release_page_clean(page); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch new file mode 100644 index 00000000..c4c48d56 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch @@ -0,0 +1,59 @@ +From faf22307f64c353212c5c132f45f5e7414cea4bf Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 20 Apr 2018 14:08:58 +0200 +Subject: [PATCH 20/93] perf/x86: Fix possible Spectre-v1 indexing for + x86_pmu::event_map() + +commit 46b1b577229a091b137831becaa0fae8690ee15a upstream. + +> arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) +> arch/x86/events/intel/core.c:337 intel_pmu_event_map() warn: potential spectre issue 'intel_perfmon_event_map' +> arch/x86/events/intel/knc.c:122 knc_pmu_event_map() warn: potential spectre issue 'knc_perfmon_event_map' +> arch/x86/events/intel/p4.c:722 p4_pmu_event_map() warn: potential spectre issue 'p4_general_events' +> arch/x86/events/intel/p6.c:116 p6_pmu_event_map() warn: potential spectre issue 'p6_perfmon_event_map' +> arch/x86/events/amd/core.c:132 amd_pmu_event_map() warn: potential spectre issue 'amd_perfmon_event_map' + +Userspace controls @attr, sanitize @attr->config before passing it on +to x86_pmu::event_map(). + +Reported-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: <stable@kernel.org> +Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> +Cc: Arnaldo Carvalho de Melo <acme@redhat.com> +Cc: Jiri Olsa <jolsa@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Stephane Eranian <eranian@google.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Vince Weaver <vincent.weaver@maine.edu> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/events/core.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c +index 6b955e3..d36ada3 100644 +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -26,6 +26,7 @@ + #include <linux/cpu.h> + #include <linux/bitops.h> + #include <linux/device.h> ++#include <linux/nospec.h> + + #include <asm/apic.h> + #include <asm/stacktrace.h> +@@ -423,6 +424,8 @@ int x86_setup_perfctr(struct perf_event *event) + if (attr->config >= x86_pmu.max_events) + return -EINVAL; + ++ attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); ++ + /* + * The generic map: + */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch new file mode 100644 index 00000000..538a1137 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch @@ -0,0 +1,156 @@ +From dc7636423649302a329856f238df8820b9c7dc28 Mon Sep 17 00:00:00 2001 +From: KarimAllah Ahmed <karahmed@amazon.de> +Date: Thu, 1 Feb 2018 22:59:44 +0100 +Subject: [PATCH 21/33] KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES + +(cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd) + +Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO +(bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the +contents will come directly from the hardware, but user-space can still +override it. + +[dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional] + +Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> +Reviewed-by: Darren Kenny <darren.kenny@oracle.com> +Reviewed-by: Jim Mattson <jmattson@google.com> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Jun Nakajima <jun.nakajima@intel.com> +Cc: kvm@vger.kernel.org +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Asit Mallick <asit.k.mallick@intel.com> +Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Ashok Raj <ashok.raj@intel.com> +Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/cpuid.c | 8 +++++++- + arch/x86/kvm/cpuid.h | 8 ++++++++ + arch/x86/kvm/vmx.c | 15 +++++++++++++++ + arch/x86/kvm/x86.c | 1 + + 4 files changed, 31 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 42323be..4d3555b 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + /* cpuid 7.0.ecx*/ + const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; + ++ /* cpuid 7.0.edx*/ ++ const u32 kvm_cpuid_7_0_edx_x86_features = ++ F(ARCH_CAPABILITIES); ++ + /* all calls to cpuid_count() should be made on the same cpu */ + get_cpu(); + +@@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + /* PKU is not yet implemented for shadow paging. */ + if (!tdp_enabled) + entry->ecx &= ~F(PKU); ++ entry->edx &= kvm_cpuid_7_0_edx_x86_features; ++ cpuid_mask(&entry->edx, CPUID_7_EDX); + } else { + entry->ebx = 0; + entry->ecx = 0; ++ entry->edx = 0; + } + entry->eax = 0; +- entry->edx = 0; + break; + } + case 9: +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index f4a2a1a..a69906c 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -163,6 +163,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); + } + ++static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_cpuid_entry2 *best; ++ ++ best = kvm_find_cpuid_entry(vcpu, 7, 0); ++ return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); ++} ++ + + /* + * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 855df75..d8e3c02 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -551,6 +551,8 @@ struct vcpu_vmx { + u64 msr_guest_kernel_gs_base; + #endif + ++ u64 arch_capabilities; ++ + u32 vm_entry_controls_shadow; + u32 vm_exit_controls_shadow; + /* +@@ -2976,6 +2978,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + case MSR_IA32_TSC: + msr_info->data = guest_read_tsc(vcpu); + break; ++ case MSR_IA32_ARCH_CAPABILITIES: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has_arch_capabilities(vcpu)) ++ return 1; ++ msr_info->data = to_vmx(vcpu)->arch_capabilities; ++ break; + case MSR_IA32_SYSENTER_CS: + msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); + break; +@@ -3102,6 +3110,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; ++ case MSR_IA32_ARCH_CAPABILITIES: ++ if (!msr_info->host_initiated) ++ return 1; ++ vmx->arch_capabilities = data; ++ break; + case MSR_IA32_CR_PAT: + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) +@@ -5173,6 +5186,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) + ++vmx->nmsrs; + } + ++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); + + vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index abbb37a..d01742e 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { + #endif + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, ++ MSR_IA32_ARCH_CAPABILITIES + }; + + static unsigned num_msrs_to_save; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch new file mode 100644 index 00000000..39d81c71 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch @@ -0,0 +1,66 @@ +From 5fb8da20577a159d311db9c29e62dbb782529571 Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Sun, 20 May 2018 20:51:10 +0100 +Subject: [PATCH 21/93] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running + under Xen + +commit def9331a12977770cc6132d79f8e6565871e8e38 upstream + +When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set +on AMD cpus. + +This bug/feature bit is kind of special as it will be used very early +when switching threads. Setting the bit and clearing it a little bit +later leaves a critical window where things can go wrong. This time +window has enlarged a little bit by using setup_clear_cpu_cap() instead +of the hypervisor's set_cpu_features callback. It seems this larger +window now makes it rather easy to hit the problem. + +The proper solution is to never set the bit in case of Xen. + +Signed-off-by: Juergen Gross <jgross@suse.com> +Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Juergen Gross <jgross@suse.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/amd.c | 5 +++-- + arch/x86/xen/enlighten.c | 4 +--- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index c375bc6..747f8a2 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -824,8 +824,9 @@ static void init_amd(struct cpuinfo_x86 *c) + if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) + set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); + +- /* AMD CPUs don't reset SS attributes on SYSRET */ +- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); ++ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ ++ if (!cpu_has(c, X86_FEATURE_XENPV)) ++ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + } + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index 5226379..8b97c87 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -1968,10 +1968,8 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); + + static void xen_set_cpu_features(struct cpuinfo_x86 *c) + { +- if (xen_pv_domain()) { +- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); ++ if (xen_pv_domain()) + set_cpu_cap(c, X86_FEATURE_XENPV); +- } + } + + static void xen_pin_vcpu(int cpu) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch new file mode 100644 index 00000000..9a833616 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch @@ -0,0 +1,305 @@ +From 3a5351279f63e7822bbfe5c0f4ee3d5a1a5bced1 Mon Sep 17 00:00:00 2001 +From: KarimAllah Ahmed <karahmed@amazon.de> +Date: Thu, 1 Feb 2018 22:59:45 +0100 +Subject: [PATCH 22/33] KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL + +(cherry picked from commit d28b387fb74da95d69d2615732f50cceb38e9a4d) + +[ Based on a patch from Ashok Raj <ashok.raj@intel.com> ] + +Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for +guests that will only mitigate Spectre V2 through IBRS+IBPB and will not +be using a retpoline+IBPB based approach. + +To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for +guests that do not actually use the MSR, only start saving and restoring +when a non-zero is written to it. + +No attempt is made to handle STIBP here, intentionally. Filtering STIBP +may be added in a future patch, which may require trapping all writes +if we don't want to pass it through directly to the guest. + +[dwmw2: Clean up CPUID bits, save/restore manually, handle reset] + +Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Darren Kenny <darren.kenny@oracle.com> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Reviewed-by: Jim Mattson <jmattson@google.com> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Jun Nakajima <jun.nakajima@intel.com> +Cc: kvm@vger.kernel.org +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Asit Mallick <asit.k.mallick@intel.com> +Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Ashok Raj <ashok.raj@intel.com> +Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/cpuid.c | 8 ++-- + arch/x86/kvm/cpuid.h | 11 ++++++ + arch/x86/kvm/vmx.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++- + arch/x86/kvm/x86.c | 2 +- + 4 files changed, 118 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 4d3555b..bcebe84 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = +- F(IBPB); ++ F(IBPB) | F(IBRS); + + /* cpuid 0xC0000001.edx */ + const u32 kvm_cpuid_C000_0001_edx_x86_features = +@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = +- F(ARCH_CAPABILITIES); ++ F(SPEC_CTRL) | F(ARCH_CAPABILITIES); + + /* all calls to cpuid_count() should be made on the same cpu */ + get_cpu(); +@@ -618,9 +618,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + g_phys_as = phys_as; + entry->eax = g_phys_as | (virt_as << 8); + entry->edx = 0; +- /* IBPB isn't necessarily present in hardware cpuid */ ++ /* IBRS and IBPB aren't necessarily present in hardware cpuid */ + if (boot_cpu_has(X86_FEATURE_IBPB)) + entry->ebx |= F(IBPB); ++ if (boot_cpu_has(X86_FEATURE_IBRS)) ++ entry->ebx |= F(IBRS); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + break; +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index a69906c..841e80d 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -163,6 +163,17 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); + } + ++static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_cpuid_entry2 *best; ++ ++ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); ++ if (best && (best->ebx & bit(X86_FEATURE_IBRS))) ++ return true; ++ best = kvm_find_cpuid_entry(vcpu, 7, 0); ++ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); ++} ++ + static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) + { + struct kvm_cpuid_entry2 *best; +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index d8e3c02..c564d03 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -552,6 +552,7 @@ struct vcpu_vmx { + #endif + + u64 arch_capabilities; ++ u64 spec_ctrl; + + u32 vm_entry_controls_shadow; + u32 vm_exit_controls_shadow; +@@ -1847,6 +1848,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) + } + + /* ++ * Check if MSR is intercepted for currently loaded MSR bitmap. ++ */ ++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) ++{ ++ unsigned long *msr_bitmap; ++ int f = sizeof(unsigned long); ++ ++ if (!cpu_has_vmx_msr_bitmap()) ++ return true; ++ ++ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; ++ ++ if (msr <= 0x1fff) { ++ return !!test_bit(msr, msr_bitmap + 0x800 / f); ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { ++ msr &= 0x1fff; ++ return !!test_bit(msr, msr_bitmap + 0xc00 / f); ++ } ++ ++ return true; ++} ++ ++/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ + static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +@@ -2978,6 +3002,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + case MSR_IA32_TSC: + msr_info->data = guest_read_tsc(vcpu); + break; ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has_ibrs(vcpu)) ++ return 1; ++ ++ msr_info->data = to_vmx(vcpu)->spec_ctrl; ++ break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) +@@ -3083,6 +3114,36 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + case MSR_IA32_TSC: + kvm_write_tsc(vcpu, msr_info); + break; ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has_ibrs(vcpu)) ++ return 1; ++ ++ /* The STIBP bit doesn't fault even if it's not advertised */ ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) ++ return 1; ++ ++ vmx->spec_ctrl = data; ++ ++ if (!data) ++ break; ++ ++ /* ++ * For non-nested: ++ * When it's written (to non-zero) for the first time, pass ++ * it through. ++ * ++ * For nested: ++ * The handling of the MSR bitmap for L2 guests is done in ++ * nested_vmx_merge_msr_bitmap. We should not touch the ++ * vmcs02.msr_bitmap here since it gets completely overwritten ++ * in the merging. We update the vmcs01 here for L1 as well ++ * since it will end up touching the MSR anyway now. ++ */ ++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, ++ MSR_IA32_SPEC_CTRL, ++ MSR_TYPE_RW); ++ break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) +@@ -5216,6 +5277,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) + u64 cr0; + + vmx->rmode.vm86_active = 0; ++ vmx->spec_ctrl = 0; + + vmx->soft_vnmi_blocked = 0; + +@@ -8806,6 +8868,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + + vmx_arm_hv_timer(vcpu); + ++ /* ++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if ++ * it's non-zero. Since vmentry is serialising on affected CPUs, there ++ * is no need to worry about the conditional branch over the wrmsr ++ * being speculatively taken. ++ */ ++ if (vmx->spec_ctrl) ++ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); ++ + vmx->__launched = vmx->loaded_vmcs->launched; + asm( + /* Store host registers */ +@@ -8924,6 +8995,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + #endif + ); + ++ /* ++ * We do not use IBRS in the kernel. If this vCPU has used the ++ * SPEC_CTRL MSR it may have left it on; save the value and ++ * turn it off. This is much more efficient than blindly adding ++ * it to the atomic save/restore list. Especially as the former ++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. ++ * ++ * For non-nested case: ++ * If the L01 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ * ++ * For nested case: ++ * If the L02 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ */ ++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) ++ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); ++ ++ if (vmx->spec_ctrl) ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +@@ -9476,7 +9568,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* +- * pred_cmd is trying to verify two things: ++ * pred_cmd & spec_ctrl are trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap +@@ -9489,9 +9581,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); ++ bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); + + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && +- !pred_cmd) ++ !pred_cmd && !spec_ctrl) + return false; + + page = nested_get_page(vcpu, vmcs12->msr_bitmap); +@@ -9527,6 +9620,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + } + } + ++ if (spec_ctrl) ++ nested_vmx_disable_intercept_for_msr( ++ msr_bitmap_l1, msr_bitmap_l0, ++ MSR_IA32_SPEC_CTRL, ++ MSR_TYPE_R | MSR_TYPE_W); ++ + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index d01742e..d2ea523 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -975,7 +975,7 @@ static u32 msrs_to_save[] = { + #endif + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, +- MSR_IA32_ARCH_CAPABILITIES ++ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES + }; + + static unsigned num_msrs_to_save; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-nospec-Simplify-alternative_msr_write.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-nospec-Simplify-alternative_msr_write.patch new file mode 100644 index 00000000..aef6dcc5 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-x86-nospec-Simplify-alternative_msr_write.patch @@ -0,0 +1,71 @@ +From 0ba8203bd88d5640bd6b062b09d3514d5787161d Mon Sep 17 00:00:00 2001 +From: Linus Torvalds <torvalds@linux-foundation.org> +Date: Tue, 1 May 2018 15:55:51 +0200 +Subject: [PATCH 22/93] x86/nospec: Simplify alternative_msr_write() + +commit 1aa7a5735a41418d8e01fa7c9565eb2657e2ea3f upstream + +The macro is not type safe and I did look for why that "g" constraint for +the asm doesn't work: it's because the asm is more fundamentally wrong. + +It does + + movl %[val], %%eax + +but "val" isn't a 32-bit value, so then gcc will pass it in a register, +and generate code like + + movl %rsi, %eax + +and gas will complain about a nonsensical 'mov' instruction (it's moving a +64-bit register to a 32-bit one). + +Passing it through memory will just hide the real bug - gcc still thinks +the memory location is 64-bit, but the "movl" will only load the first 32 +bits and it all happens to work because x86 is little-endian. + +Convert it to a type safe inline function with a little trick which hands +the feature into the ALTERNATIVE macro. + +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index f928ad9..870acfc 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -241,15 +241,16 @@ static inline void vmexit_fill_RSB(void) + #endif + } + +-#define alternative_msr_write(_msr, _val, _feature) \ +- asm volatile(ALTERNATIVE("", \ +- "movl %[msr], %%ecx\n\t" \ +- "movl %[val], %%eax\n\t" \ +- "movl $0, %%edx\n\t" \ +- "wrmsr", \ +- _feature) \ +- : : [msr] "i" (_msr), [val] "i" (_val) \ +- : "eax", "ecx", "edx", "memory") ++static __always_inline ++void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) ++{ ++ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) ++ : : "c" (msr), ++ "a" (val), ++ "d" (val >> 32), ++ [feature] "i" (feature) ++ : "memory"); ++} + + static inline void indirect_branch_prediction_barrier(void) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch new file mode 100644 index 00000000..905134c7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch @@ -0,0 +1,192 @@ +From c8b2b4bc3e5eddb48f6eda57e9138a2ea2d39345 Mon Sep 17 00:00:00 2001 +From: KarimAllah Ahmed <karahmed@amazon.de> +Date: Sat, 3 Feb 2018 15:56:23 +0100 +Subject: [PATCH 23/33] KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL + +(cherry picked from commit b2ac58f90540e39324e7a29a7ad471407ae0bf48) + +[ Based on a patch from Paolo Bonzini <pbonzini@redhat.com> ] + +... basically doing exactly what we do for VMX: + +- Passthrough SPEC_CTRL to guests (if enabled in guest CPUID) +- Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest + actually used it. + +Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Darren Kenny <darren.kenny@oracle.com> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Cc: Andrea Arcangeli <aarcange@redhat.com> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Jun Nakajima <jun.nakajima@intel.com> +Cc: kvm@vger.kernel.org +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Asit Mallick <asit.k.mallick@intel.com> +Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> +Cc: Greg KH <gregkh@linuxfoundation.org> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Ashok Raj <ashok.raj@intel.com> +Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/svm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 88 insertions(+) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 43e45b9..4a36977 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -183,6 +183,8 @@ struct vcpu_svm { + u64 gs_base; + } host; + ++ u64 spec_ctrl; ++ + u32 *msrpm; + + ulong nmi_iret_rip; +@@ -248,6 +250,7 @@ static const struct svm_direct_access_msrs { + { .index = MSR_CSTAR, .always = true }, + { .index = MSR_SYSCALL_MASK, .always = true }, + #endif ++ { .index = MSR_IA32_SPEC_CTRL, .always = false }, + { .index = MSR_IA32_PRED_CMD, .always = false }, + { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, + { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, +@@ -863,6 +866,25 @@ static bool valid_msr_intercept(u32 index) + return false; + } + ++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) ++{ ++ u8 bit_write; ++ unsigned long tmp; ++ u32 offset; ++ u32 *msrpm; ++ ++ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: ++ to_svm(vcpu)->msrpm; ++ ++ offset = svm_msrpm_offset(msr); ++ bit_write = 2 * (msr & 0x0f) + 1; ++ tmp = msrpm[offset]; ++ ++ BUG_ON(offset == MSR_INVALID); ++ ++ return !!test_bit(bit_write, &tmp); ++} ++ + static void set_msr_interception(u32 *msrpm, unsigned msr, + int read, int write) + { +@@ -1534,6 +1556,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) + u32 dummy; + u32 eax = 1; + ++ svm->spec_ctrl = 0; ++ + if (!init_event) { + svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; +@@ -3515,6 +3539,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + case MSR_VM_CR: + msr_info->data = svm->nested.vm_cr_msr; + break; ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has_ibrs(vcpu)) ++ return 1; ++ ++ msr_info->data = svm->spec_ctrl; ++ break; + case MSR_IA32_UCODE_REV: + msr_info->data = 0x01000065; + break; +@@ -3599,6 +3630,33 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) + case MSR_IA32_TSC: + kvm_write_tsc(vcpu, msr); + break; ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr->host_initiated && ++ !guest_cpuid_has_ibrs(vcpu)) ++ return 1; ++ ++ /* The STIBP bit doesn't fault even if it's not advertised */ ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) ++ return 1; ++ ++ svm->spec_ctrl = data; ++ ++ if (!data) ++ break; ++ ++ /* ++ * For non-nested: ++ * When it's written (to non-zero) for the first time, pass ++ * it through. ++ * ++ * For nested: ++ * The handling of the MSR bitmap for L2 guests is done in ++ * nested_svm_vmrun_msrpm. ++ * We update the L1 MSR bit as well since it will end up ++ * touching the MSR anyway now. ++ */ ++ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); ++ break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) +@@ -4842,6 +4900,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + + local_irq_enable(); + ++ /* ++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if ++ * it's non-zero. Since vmentry is serialising on affected CPUs, there ++ * is no need to worry about the conditional branch over the wrmsr ++ * being speculatively taken. ++ */ ++ if (svm->spec_ctrl) ++ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); ++ + asm volatile ( + "push %%" _ASM_BP "; \n\t" + "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" +@@ -4934,6 +5001,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + #endif + ); + ++ /* ++ * We do not use IBRS in the kernel. If this vCPU has used the ++ * SPEC_CTRL MSR it may have left it on; save the value and ++ * turn it off. This is much more efficient than blindly adding ++ * it to the atomic save/restore list. Especially as the former ++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. ++ * ++ * For non-nested case: ++ * If the L01 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ * ++ * For nested case: ++ * If the L02 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ */ ++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) ++ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); ++ ++ if (svm->spec_ctrl) ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch new file mode 100644 index 00000000..6d18a50c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch @@ -0,0 +1,75 @@ +From c3a018c5b5ae383b51700cd636995916fc8c1f61 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:16 -0400 +Subject: [PATCH 23/93] x86/bugs: Concentrate bug detection into a separate + function + +commit 4a28bfe3267b68e22c663ac26185aa16c9b879ef upstream + +Combine the various logic which goes through all those +x86_cpu_id matching structures in one function. + +Suggested-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/common.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 301bbd1..357c589 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -879,21 +879,27 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { + {} + }; + +-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) ++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + { + u64 ia32_cap = 0; + ++ if (x86_match_cpu(cpu_no_speculation)) ++ return; ++ ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); ++ + if (x86_match_cpu(cpu_no_meltdown)) +- return false; ++ return; + + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) +- return false; ++ return; + +- return true; ++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + } + + /* +@@ -942,12 +948,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + + setup_force_cpu_cap(X86_FEATURE_ALWAYS); + +- if (!x86_match_cpu(cpu_no_speculation)) { +- if (cpu_vulnerable_to_meltdown(c)) +- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); +- setup_force_cpu_bug(X86_BUG_SPECTRE_V1); +- setup_force_cpu_bug(X86_BUG_SPECTRE_V2); +- } ++ cpu_set_bug_bits(c); + + fpu__init_system(c); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch new file mode 100644 index 00000000..8feed73a --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch @@ -0,0 +1,100 @@ +From 36417bad8e288e64df1067207030c67304c26ee5 Mon Sep 17 00:00:00 2001 +From: Liran Alon <liran.alon@oracle.com> +Date: Thu, 9 Nov 2017 20:27:20 +0200 +Subject: [PATCH 24/33] KVM: nVMX: Fix races when sending nested PI while dest + enters/leaves L2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 6b6977117f50d60455ace86b2d256f6fb4f3de05 upstream. + +Consider the following scenario: +1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI +to CPU B via virtual posted-interrupt mechanism. +2. CPU B is currently executing L2 guest. +3. vmx_deliver_nested_posted_interrupt() calls +kvm_vcpu_trigger_posted_interrupt() which will note that +vcpu->mode == IN_GUEST_MODE. +4. Assume that before CPU A sends the physical POSTED_INTR_NESTED_VECTOR +IPI, CPU B exits from L2 to L0 during event-delivery +(valid IDT-vectoring-info). +5. CPU A now sends the physical IPI. The IPI is received in host and +it's handler (smp_kvm_posted_intr_nested_ipi()) does nothing. +6. Assume that before CPU A sets pi_pending=true and KVM_REQ_EVENT, +CPU B continues to run in L0 and reach vcpu_enter_guest(). As +KVM_REQ_EVENT is not set yet, vcpu_enter_guest() will continue and resume +L2 guest. +7. At this point, CPU A sets pi_pending=true and KVM_REQ_EVENT but +it's too late! CPU B already entered L2 and KVM_REQ_EVENT will only be +consumed at next L2 entry! + +Another scenario to consider: +1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI +to CPU B via virtual posted-interrupt mechanism. +2. Assume that before CPU A calls kvm_vcpu_trigger_posted_interrupt(), +CPU B is at L0 and is about to resume into L2. Further assume that it is +in vcpu_enter_guest() after check for KVM_REQ_EVENT. +3. At this point, CPU A calls kvm_vcpu_trigger_posted_interrupt() which +will note that vcpu->mode != IN_GUEST_MODE. Therefore, do nothing and +return false. Then, will set pi_pending=true and KVM_REQ_EVENT. +4. Now CPU B continue and resumes into L2 guest without processing +the posted-interrupt until next L2 entry! + +To fix both issues, we just need to change +vmx_deliver_nested_posted_interrupt() to set pi_pending=true and +KVM_REQ_EVENT before calling kvm_vcpu_trigger_posted_interrupt(). + +It will fix the first scenario by chaging step (6) to note that +KVM_REQ_EVENT and pi_pending=true and therefore process +nested posted-interrupt. + +It will fix the second scenario by two possible ways: +1. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B has changed +vcpu->mode to IN_GUEST_MODE, physical IPI will be sent and will be received +when CPU resumes into L2. +2. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B hasn't yet +changed vcpu->mode to IN_GUEST_MODE, then after CPU B will change +vcpu->mode it will call kvm_request_pending() which will return true and +therefore force another round of vcpu_enter_guest() which will note that +KVM_REQ_EVENT and pi_pending=true and therefore process nested +posted-interrupt. + +Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing") +Signed-off-by: Liran Alon <liran.alon@oracle.com> +Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com> +Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com> +[Add kvm_vcpu_kick to also handle the case where L1 doesn't intercept L2 HLT + and L2 executes HLT instruction. - Paolo] +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index c564d03..85078c7 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4944,14 +4944,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, + + if (is_guest_mode(vcpu) && + vector == vmx->nested.posted_intr_nv) { +- /* the PIR and ON have been set by L1. */ +- kvm_vcpu_trigger_posted_interrupt(vcpu); + /* + * If a posted intr is not recognized by hardware, + * we will accomplish it in the next vmentry. + */ + vmx->nested.pi_pending = true; + kvm_make_request(KVM_REQ_EVENT, vcpu); ++ /* the PIR and ON have been set by L1. */ ++ if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) ++ kvm_vcpu_kick(vcpu); + return 0; + } + return -1; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch new file mode 100644 index 00000000..b86011cc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch @@ -0,0 +1,92 @@ +From 2ea1e87e0557d4994d239cf75a12cd624d3c7ef9 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:17 -0400 +Subject: [PATCH 24/93] x86/bugs: Concentrate bug reporting into a separate + function + +commit d1059518b4789cabe34bb4b714d07e6089c82ca1 upstream + +Those SysFS functions have a similar preamble, as such make common +code to handle them. + +Suggested-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 46 ++++++++++++++++++++++++++++++++-------------- + 1 file changed, 32 insertions(+), 14 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index b8b0b6e..4d9c5fe 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -313,30 +313,48 @@ static void __init spectre_v2_select_mitigation(void) + #undef pr_fmt + + #ifdef CONFIG_SYSFS +-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) ++ ++ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, ++ char *buf, unsigned int bug) + { +- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) ++ if (!boot_cpu_has_bug(bug)) + return sprintf(buf, "Not affected\n"); +- if (boot_cpu_has(X86_FEATURE_KAISER)) +- return sprintf(buf, "Mitigation: PTI\n"); ++ ++ switch (bug) { ++ case X86_BUG_CPU_MELTDOWN: ++ if (boot_cpu_has(X86_FEATURE_KAISER)) ++ return sprintf(buf, "Mitigation: PTI\n"); ++ ++ break; ++ ++ case X86_BUG_SPECTRE_V1: ++ return sprintf(buf, "Mitigation: __user pointer sanitization\n"); ++ ++ case X86_BUG_SPECTRE_V2: ++ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ++ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", ++ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", ++ spectre_v2_module_string()); ++ ++ default: ++ break; ++ } ++ + return sprintf(buf, "Vulnerable\n"); + } + ++ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); ++} ++ + ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) + { +- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) +- return sprintf(buf, "Not affected\n"); +- return sprintf(buf, "Mitigation: __user pointer sanitization\n"); ++ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); + } + + ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) + { +- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +- return sprintf(buf, "Not affected\n"); +- +- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], +- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", +- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", +- spectre_v2_module_string()); ++ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); + } + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch new file mode 100644 index 00000000..eb633c9c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch @@ -0,0 +1,103 @@ +From 15ca5afe3e56a0f80151aa4b6f06233b39736a2e Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Sat, 10 Feb 2018 23:39:24 +0000 +Subject: [PATCH 25/33] KVM/x86: Reduce retpoline performance impact in + slot_handle_level_range(), by always inlining iterator helper methods +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 928a4c39484281f8ca366f53a1db79330d058401 upstream. + +With retpoline, tight loops of "call this function for every XXX" are +very much pessimised by taking a prediction miss *every* time. This one +is by far the biggest contributor to the guest launch time with retpoline. + +By marking the iterator slot_handle_…() functions always_inline, we can +ensure that the indirect function call can be optimised away into a +direct call and it actually generates slightly smaller code because +some of the other conditionals can get optimised away too. + +Performance is now pretty close to what we see with nospectre_v2 on +the command line. + +Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> +Tested-by: Filippo Sironi <sironi@amazon.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Reviewed-by: Filippo Sironi <sironi@amazon.de> +Acked-by: Paolo Bonzini <pbonzini@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: arjan.van.de.ven@intel.com +Cc: dave.hansen@intel.com +Cc: jmattson@google.com +Cc: karahmed@amazon.de +Cc: kvm@vger.kernel.org +Cc: rkrcmar@redhat.com +Link: http://lkml.kernel.org/r/1518305967-31356-4-git-send-email-dwmw@amazon.co.uk +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/mmu.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index d9c7e98..ee4af7a 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -4636,7 +4636,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) + typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); + + /* The caller should hold mmu-lock before calling this function. */ +-static bool ++static __always_inline bool + slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, int start_level, int end_level, + gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) +@@ -4666,7 +4666,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, + return flush; + } + +-static bool ++static __always_inline bool + slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, int start_level, int end_level, + bool lock_flush_tlb) +@@ -4677,7 +4677,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + lock_flush_tlb); + } + +-static bool ++static __always_inline bool + slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) + { +@@ -4685,7 +4685,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); + } + +-static bool ++static __always_inline bool + slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) + { +@@ -4693,7 +4693,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); + } + +-static bool ++static __always_inline bool + slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch new file mode 100644 index 00000000..da25f2fe --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch @@ -0,0 +1,143 @@ +From f35005b1a8b68f66c980652ef5299cb422eb9123 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:18 -0400 +Subject: [PATCH 25/93] x86/bugs: Read SPEC_CTRL MSR during boot and re-use + reserved bits + +commit 1b86883ccb8d5d9506529d42dbe1a5257cb30b18 upstream + +The 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to all +the other bits as reserved. The Intel SDM glossary defines reserved as +implementation specific - aka unknown. + +As such at bootup this must be taken it into account and proper masking for +the bits in use applied. + +A copy of this document is available at +https://bugzilla.kernel.org/show_bug.cgi?id=199511 + +[ tglx: Made x86_spec_ctrl_base __ro_after_init ] + +Suggested-by: Jon Masters <jcm@redhat.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 24 ++++++++++++++++++++---- + arch/x86/kernel/cpu/bugs.c | 28 ++++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 870acfc..9ec3d4d 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -217,6 +217,17 @@ enum spectre_v2_mitigation { + SPECTRE_V2_IBRS, + }; + ++/* ++ * The Intel specification for the SPEC_CTRL MSR requires that we ++ * preserve any already set reserved bits at boot time (e.g. for ++ * future additions that this kernel is not currently aware of). ++ * We then set any additional mitigation bits that we want ++ * ourselves and always use this as the base for SPEC_CTRL. ++ * We also use this when handling guest entry/exit as below. ++ */ ++extern void x86_spec_ctrl_set(u64); ++extern u64 x86_spec_ctrl_get_default(void); ++ + extern char __indirect_thunk_start[]; + extern char __indirect_thunk_end[]; + +@@ -254,8 +265,9 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) + + static inline void indirect_branch_prediction_barrier(void) + { +- alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, +- X86_FEATURE_USE_IBPB); ++ u64 val = PRED_CMD_IBPB; ++ ++ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); + } + + /* +@@ -266,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void) + */ + #define firmware_restrict_branch_speculation_start() \ + do { \ ++ u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ ++ \ + preempt_disable(); \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + X86_FEATURE_USE_IBRS_FW); \ + } while (0) + + #define firmware_restrict_branch_speculation_end() \ + do { \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ ++ u64 val = x86_spec_ctrl_get_default(); \ ++ \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ + } while (0) +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 4d9c5fe..6ff972a 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -27,6 +27,12 @@ + + static void __init spectre_v2_select_mitigation(void); + ++/* ++ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any ++ * writes to SPEC_CTRL contain whatever reserved bits have been set. ++ */ ++static u64 __ro_after_init x86_spec_ctrl_base; ++ + void __init check_bugs(void) + { + identify_boot_cpu(); +@@ -36,6 +42,13 @@ void __init check_bugs(void) + print_cpu_info(&boot_cpu_data); + } + ++ /* ++ * Read the SPEC_CTRL MSR to account for reserved bits which may ++ * have unknown values. ++ */ ++ if (boot_cpu_has(X86_FEATURE_IBRS)) ++ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + +@@ -94,6 +107,21 @@ static const char *spectre_v2_strings[] = { + + static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + ++void x86_spec_ctrl_set(u64 val) ++{ ++ if (val & ~SPEC_CTRL_IBRS) ++ WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); ++ else ++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); ++} ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); ++ ++u64 x86_spec_ctrl_get_default(void) ++{ ++ return x86_spec_ctrl_base; ++} ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); ++ + #ifdef RETPOLINE + static bool spectre_v2_bad_module; + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch new file mode 100644 index 00000000..38255613 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch @@ -0,0 +1,70 @@ +From 75a724909e81cd4612490d633ab269495377d332 Mon Sep 17 00:00:00 2001 +From: Wanpeng Li <wanpeng.li@hotmail.com> +Date: Wed, 13 Dec 2017 10:46:40 +0100 +Subject: [PATCH 26/33] KVM: x86: fix escape of guest dr6 to the host +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit efdab992813fb2ed825745625b83c05032e9cda2 upstream. + +syzkaller reported: + + WARNING: CPU: 0 PID: 12927 at arch/x86/kernel/traps.c:780 do_debug+0x222/0x250 + CPU: 0 PID: 12927 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #16 + RIP: 0010:do_debug+0x222/0x250 + Call Trace: + <#DB> + debug+0x3e/0x70 + RIP: 0010:copy_user_enhanced_fast_string+0x10/0x20 + </#DB> + _copy_from_user+0x5b/0x90 + SyS_timer_create+0x33/0x80 + entry_SYSCALL_64_fastpath+0x23/0x9a + +The testcase sets a watchpoint (with perf_event_open) on a buffer that is +passed to timer_create() as the struct sigevent argument. In timer_create(), +copy_from_user()'s rep movsb triggers the BP. The testcase also sets +the debug registers for the guest. + +However, KVM only restores host debug registers when the host has active +watchpoints, which triggers a race condition when running the testcase with +multiple threads. The guest's DR6.BS bit can escape to the host before +another thread invokes timer_create(), and do_debug() complains. + +The fix is to respect do_debug()'s dr6 invariant when leaving KVM. + +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: David Hildenbrand <david@redhat.com> +Cc: Dmitry Vyukov <dvyukov@google.com> +Reviewed-by: David Hildenbrand <david@redhat.com> +Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/x86.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index d2ea523..af333e1 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2833,6 +2833,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) + kvm_x86_ops->vcpu_put(vcpu); + kvm_put_guest_fpu(vcpu); + vcpu->arch.last_host_tsc = rdtsc(); ++ /* ++ * If userspace has set any breakpoints or watchpoints, dr6 is restored ++ * on every vmexit, but if not, we might have a stale dr6 from the ++ * guest. do_debug expects dr6 to be cleared after it runs, do the same. ++ */ ++ set_debugreg(0, 6); + } + + static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch new file mode 100644 index 00000000..d0e8ddcb --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch @@ -0,0 +1,137 @@ +From d9dc73cbf12047f0d0e171366bfb962b3a592e6f Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:19 -0400 +Subject: [PATCH 26/93] x86/bugs, KVM: Support the combination of guest and + host IBRS + +commit 5cf687548705412da47c9cec342fd952d71ed3d5 upstream + +A guest may modify the SPEC_CTRL MSR from the value used by the +kernel. Since the kernel doesn't use IBRS, this means a value of zero is +what is needed in the host. + +But the 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to +the other bits as reserved so the kernel should respect the boot time +SPEC_CTRL value and use that. + +This allows to deal with future extensions to the SPEC_CTRL interface if +any at all. + +Note: This uses wrmsrl() instead of native_wrmsl(). I does not make any +difference as paravirt will over-write the callq *0xfff.. with the wrmsrl +assembler code. + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 10 ++++++++++ + arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++++++++ + arch/x86/kvm/svm.c | 6 ++---- + arch/x86/kvm/vmx.c | 6 ++---- + 4 files changed, 32 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 9ec3d4d..d1c2630 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -228,6 +228,16 @@ enum spectre_v2_mitigation { + extern void x86_spec_ctrl_set(u64); + extern u64 x86_spec_ctrl_get_default(void); + ++/* ++ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR ++ * the guest has, while on VMEXIT we restore the host view. This ++ * would be easier if SPEC_CTRL were architecturally maskable or ++ * shadowable for guests but this is not (currently) the case. ++ * Takes the guest view of SPEC_CTRL MSR as a parameter. ++ */ ++extern void x86_spec_ctrl_set_guest(u64); ++extern void x86_spec_ctrl_restore_host(u64); ++ + extern char __indirect_thunk_start[]; + extern char __indirect_thunk_end[]; + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 6ff972a..f5cad2f 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -122,6 +122,24 @@ u64 x86_spec_ctrl_get_default(void) + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); + ++void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) ++{ ++ if (!boot_cpu_has(X86_FEATURE_IBRS)) ++ return; ++ if (x86_spec_ctrl_base != guest_spec_ctrl) ++ wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); ++} ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); ++ ++void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) ++{ ++ if (!boot_cpu_has(X86_FEATURE_IBRS)) ++ return; ++ if (x86_spec_ctrl_base != guest_spec_ctrl) ++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++} ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); ++ + #ifdef RETPOLINE + static bool spectre_v2_bad_module; + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 8551a54..a07579f 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -4905,8 +4905,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ +- if (svm->spec_ctrl) +- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); ++ x86_spec_ctrl_set_guest(svm->spec_ctrl); + + asm volatile ( + "push %%" _ASM_BP "; \n\t" +@@ -5018,8 +5017,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + +- if (svm->spec_ctrl) +- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ x86_spec_ctrl_restore_host(svm->spec_ctrl); + + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 273313f..c386d13 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -8898,8 +8898,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ +- if (vmx->spec_ctrl) +- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); ++ x86_spec_ctrl_set_guest(vmx->spec_ctrl); + + vmx->__launched = vmx->loaded_vmcs->launched; + asm( +@@ -9037,8 +9036,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + +- if (vmx->spec_ctrl) +- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ x86_spec_ctrl_restore_host(vmx->spec_ctrl); + + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch new file mode 100644 index 00000000..ef01a1cb --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch @@ -0,0 +1,37 @@ +From 216ac4ef7d2da59cd2b3d6e34e559c7ef49a143d Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann <arnd@arndb.de> +Date: Wed, 19 Jul 2017 14:53:04 +0200 +Subject: [PATCH 27/33] x86: add MULTIUSER dependency for KVM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit c2ce3f5d89d57301e2756ac325fe2ebc33bfec30 upstream. + +KVM tries to select 'TASKSTATS', which had additional dependencies: + +warning: (KVM) selects TASKSTATS which has unmet direct dependencies (NET && MULTIUSER) + +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig +index ab8e32f..66da97d 100644 +--- a/arch/x86/kvm/Kconfig ++++ b/arch/x86/kvm/Kconfig +@@ -22,7 +22,7 @@ config KVM + depends on HAVE_KVM + depends on HIGH_RES_TIMERS + # for TASKSTATS/TASK_DELAY_ACCT: +- depends on NET ++ depends on NET && MULTIUSER + select PREEMPT_NOTIFIERS + select MMU_NOTIFIER + select ANON_INODES +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch new file mode 100644 index 00000000..c058dd8f --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch @@ -0,0 +1,148 @@ +From a24af5ff013ee664d221b6b4d4933f8317f4facb Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:20 -0400 +Subject: [PATCH 27/93] x86/bugs: Expose /sys/../spec_store_bypass + +commit c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf upstream + +Add the sysfs file for the new vulerability. It does not do much except +show the words 'Vulnerable' for recent x86 cores. + +Intel cores prior to family 6 are known not to be vulnerable, and so are +some Atoms and some Xeon Phi. + +It assumes that older Cyrix, Centaur, etc. cores are immune. + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/bugs.c | 5 +++++ + arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++ + drivers/base/cpu.c | 8 ++++++++ + include/linux/cpu.h | 2 ++ + 6 files changed, 40 insertions(+) + +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index dfd56ec..6d75a9c 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -355,6 +355,7 @@ What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 ++ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass + Date: January 2018 + Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> + Description: Information about CPU vulnerabilities +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index a248531..a688adb 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -335,5 +335,6 @@ + #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ + #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ + #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ ++#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index f5cad2f..64e17a9 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -403,4 +403,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c + { + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); + } ++ ++ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); ++} + #endif +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 357c589..4f1050a 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -879,10 +879,33 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { + {} + }; + ++static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, ++ { X86_VENDOR_CENTAUR, 5, }, ++ { X86_VENDOR_INTEL, 5, }, ++ { X86_VENDOR_NSC, 5, }, ++ { X86_VENDOR_ANY, 4, }, ++ {} ++}; ++ + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + { + u64 ia32_cap = 0; + ++ if (!x86_match_cpu(cpu_no_spec_store_bypass)) ++ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); ++ + if (x86_match_cpu(cpu_no_speculation)) + return; + +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index 56b6c85..cbb1cc6 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -519,14 +519,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, + return sprintf(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); ++static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, + &dev_attr_spectre_v1.attr, + &dev_attr_spectre_v2.attr, ++ &dev_attr_spec_store_bypass.attr, + NULL + }; + +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 2f475ad..917829b 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -50,6 +50,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_spec_store_bypass(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch new file mode 100644 index 00000000..5c62ba8b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch @@ -0,0 +1,41 @@ +From 7e8b0d6af232b1d642960ca4fb026a70bfaf1206 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann <arnd@arndb.de> +Date: Wed, 4 Oct 2017 12:28:18 +0200 +Subject: [PATCH 28/33] KVM: add X86_LOCAL_APIC dependency +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit e42eef4ba38806b18c4a74f0c276fb2e0b548173 upstream. + +The rework of the posted interrupt handling broke building without +support for the local APIC: + +ERROR: "boot_cpu_physical_apicid" [arch/x86/kvm/kvm-intel.ko] undefined! + +That configuration is probably not particularly useful anyway, so +we can avoid the randconfig failures by adding a Kconfig dependency. + +Fixes: 8b306e2f3c41 ("KVM: VMX: avoid double list add with VT-d posted interrupts") +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig +index 66da97d..9150e09 100644 +--- a/arch/x86/kvm/Kconfig ++++ b/arch/x86/kvm/Kconfig +@@ -23,6 +23,7 @@ config KVM + depends on HIGH_RES_TIMERS + # for TASKSTATS/TASK_DELAY_ACCT: + depends on NET && MULTIUSER ++ depends on X86_LOCAL_APIC + select PREEMPT_NOTIFIERS + select MMU_NOTIFIER + select ANON_INODES +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch new file mode 100644 index 00000000..19e234c1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch @@ -0,0 +1,36 @@ +From 516277f549be576a1146ab20f22ab17393a2c53c Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Sat, 28 Apr 2018 22:34:17 +0200 +Subject: [PATCH 28/93] x86/cpufeatures: Add X86_FEATURE_RDS + +commit 0cc5fa00b0a88dad140b4e5c2cead9951ad36822 upstream + +Add the CPU feature bit CPUID.7.0.EDX[31] which indicates whether the CPU +supports Reduced Data Speculation. + +[ tglx: Split it out from a later patch ] + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index a688adb..0c05c6c 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -306,6 +306,7 @@ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ ++#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ + + /* + * BUG word(s) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch new file mode 100644 index 00000000..b50d5453 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch @@ -0,0 +1,105 @@ +From 8e13680f134458dd1b0529ccb636ae5895fa8a4d Mon Sep 17 00:00:00 2001 +From: Wanpeng Li <wanpeng.li@hotmail.com> +Date: Thu, 14 Sep 2017 03:54:16 -0700 +Subject: [PATCH 29/33] KVM: async_pf: Fix #DF due to inject "Page not Present" + and "Page Ready" exceptions simultaneously +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 9a6e7c39810e4a8bc7fc95056cefb40583fe07ef upstream. + +qemu-system-x86-8600 [004] d..1 7205.687530: kvm_entry: vcpu 2 +qemu-system-x86-8600 [004] .... 7205.687532: kvm_exit: reason EXCEPTION_NMI rip 0xffffffffa921297d info ffffeb2c0e44e018 80000b0e +qemu-system-x86-8600 [004] .... 7205.687532: kvm_page_fault: address ffffeb2c0e44e018 error_code 0 +qemu-system-x86-8600 [004] .... 7205.687620: kvm_try_async_get_page: gva = 0xffffeb2c0e44e018, gfn = 0x427e4e +qemu-system-x86-8600 [004] .N.. 7205.687628: kvm_async_pf_not_present: token 0x8b002 gva 0xffffeb2c0e44e018 + kworker/4:2-7814 [004] .... 7205.687655: kvm_async_pf_completed: gva 0xffffeb2c0e44e018 address 0x7fcc30c4e000 +qemu-system-x86-8600 [004] .... 7205.687703: kvm_async_pf_ready: token 0x8b002 gva 0xffffeb2c0e44e018 +qemu-system-x86-8600 [004] d..1 7205.687711: kvm_entry: vcpu 2 + +After running some memory intensive workload in guest, I catch the kworker +which completes the GUP too quickly, and queues an "Page Ready" #PF exception +after the "Page not Present" exception before the next vmentry as the above +trace which will result in #DF injected to guest. + +This patch fixes it by clearing the queue for "Page not Present" if "Page Ready" +occurs before the next vmentry since the GUP has already got the required page +and shadow page table has already been fixed by "Page Ready" handler. + +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> +Fixes: 7c90705bf2a3 ("KVM: Inject asynchronous page fault into a PV guest if page is swapped out.") +[Changed indentation and added clearing of injected. - Radim] +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +[port from upstream v4.14-rc1, Don't assign to kvm_queued_exception::injected or + x86_exception::async_page_fault] +Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/x86.c | 34 ++++++++++++++++++++++++++-------- + 1 file changed, 26 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index af333e1..9f0f7e2 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8370,6 +8370,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) + sizeof(val)); + } + ++static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) ++{ ++ ++ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val, ++ sizeof(u32)); ++} ++ + void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) + { +@@ -8396,6 +8403,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) + { + struct x86_exception fault; ++ u32 val; + + trace_kvm_async_pf_ready(work->arch.token, work->gva); + if (work->wakeup_all) +@@ -8403,14 +8411,24 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + else + kvm_del_async_pf_gfn(vcpu, work->arch.gfn); + +- if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && +- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { +- fault.vector = PF_VECTOR; +- fault.error_code_valid = true; +- fault.error_code = 0; +- fault.nested_page_fault = false; +- fault.address = work->arch.token; +- kvm_inject_page_fault(vcpu, &fault); ++ if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && ++ !apf_get_user(vcpu, &val)) { ++ if (val == KVM_PV_REASON_PAGE_NOT_PRESENT && ++ vcpu->arch.exception.pending && ++ vcpu->arch.exception.nr == PF_VECTOR && ++ !apf_put_user(vcpu, 0)) { ++ vcpu->arch.exception.pending = false; ++ vcpu->arch.exception.nr = 0; ++ vcpu->arch.exception.has_error_code = false; ++ vcpu->arch.exception.error_code = 0; ++ } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { ++ fault.vector = PF_VECTOR; ++ fault.error_code_valid = true; ++ fault.error_code = 0; ++ fault.nested_page_fault = false; ++ fault.address = work->arch.token; ++ kvm_inject_page_fault(vcpu, &fault); ++ } + } + vcpu->arch.apf.halted = false; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch new file mode 100644 index 00000000..15084ab2 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch @@ -0,0 +1,272 @@ +From b3c238b8a317093dd74e635d553271f2c56cb8c3 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:21 -0400 +Subject: [PATCH 29/93] x86/bugs: Provide boot parameters for the + spec_store_bypass_disable mitigation + +commit 24f7fc83b9204d20f878c57cb77d261ae825e033 upstream + +Contemporary high performance processors use a common industry-wide +optimization known as "Speculative Store Bypass" in which loads from +addresses to which a recent store has occurred may (speculatively) see an +older value. Intel refers to this feature as "Memory Disambiguation" which +is part of their "Smart Memory Access" capability. + +Memory Disambiguation can expose a cache side-channel attack against such +speculatively read values. An attacker can create exploit code that allows +them to read memory outside of a sandbox environment (for example, +malicious JavaScript in a web page), or to perform more complex attacks +against code running within the same privilege level, e.g. via the stack. + +As a first step to mitigate against such attacks, provide two boot command +line control knobs: + + nospec_store_bypass_disable + spec_store_bypass_disable=[off,auto,on] + +By default affected x86 processors will power on with Speculative +Store Bypass enabled. Hence the provided kernel parameters are written +from the point of view of whether to enable a mitigation or not. +The parameters are as follows: + + - auto - Kernel detects whether your CPU model contains an implementation + of Speculative Store Bypass and picks the most appropriate + mitigation. + + - on - disable Speculative Store Bypass + - off - enable Speculative Store Bypass + +[ tglx: Reordered the checks so that the whole evaluation is not done + when the CPU does not support RDS ] + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 33 +++++++++++ + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/nospec-branch.h | 6 ++ + arch/x86/kernel/cpu/bugs.c | 103 +++++++++++++++++++++++++++++++++++ + 4 files changed, 143 insertions(+) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 4b438e4..348ca9d 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2686,6 +2686,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + allow data leaks with this option, which is equivalent + to spectre_v2=off. + ++ nospec_store_bypass_disable ++ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability ++ + noxsave [BUGS=X86] Disables x86 extended register state save + and restore using xsave. The kernel will fallback to + enabling legacy floating-point and sse state. +@@ -3962,6 +3965,36 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + Not specifying this option is equivalent to + spectre_v2=auto. + ++ spec_store_bypass_disable= ++ [HW] Control Speculative Store Bypass (SSB) Disable mitigation ++ (Speculative Store Bypass vulnerability) ++ ++ Certain CPUs are vulnerable to an exploit against a ++ a common industry wide performance optimization known ++ as "Speculative Store Bypass" in which recent stores ++ to the same memory location may not be observed by ++ later loads during speculative execution. The idea ++ is that such stores are unlikely and that they can ++ be detected prior to instruction retirement at the ++ end of a particular speculation execution window. ++ ++ In vulnerable processors, the speculatively forwarded ++ store can be used in a cache side channel attack, for ++ example to read memory to which the attacker does not ++ directly have access (e.g. inside sandboxed code). ++ ++ This parameter controls whether the Speculative Store ++ Bypass optimization is used. ++ ++ on - Unconditionally disable Speculative Store Bypass ++ off - Unconditionally enable Speculative Store Bypass ++ auto - Kernel detects whether the CPU model contains an ++ implementation of Speculative Store Bypass and ++ picks the most appropriate mitigation ++ ++ Not specifying this option is equivalent to ++ spec_store_bypass_disable=auto. ++ + spia_io_base= [HW,MTD] + spia_fio_base= + spia_pedr= +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 0c05c6c..013f3de 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -204,6 +204,7 @@ + + #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ + #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ ++#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index d1c2630..7b9eacf 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -238,6 +238,12 @@ extern u64 x86_spec_ctrl_get_default(void); + extern void x86_spec_ctrl_set_guest(u64); + extern void x86_spec_ctrl_restore_host(u64); + ++/* The Speculative Store Bypass disable variants */ ++enum ssb_mitigation { ++ SPEC_STORE_BYPASS_NONE, ++ SPEC_STORE_BYPASS_DISABLE, ++}; ++ + extern char __indirect_thunk_start[]; + extern char __indirect_thunk_end[]; + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 64e17a9..75146d9 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -26,6 +26,7 @@ + #include <asm/intel-family.h> + + static void __init spectre_v2_select_mitigation(void); ++static void __init ssb_select_mitigation(void); + + /* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any +@@ -52,6 +53,12 @@ void __init check_bugs(void) + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + ++ /* ++ * Select proper mitigation for any exposure to the Speculative Store ++ * Bypass vulnerability. ++ */ ++ ssb_select_mitigation(); ++ + #ifdef CONFIG_X86_32 + /* + * Check whether we are able to run this kernel safely on SMP. +@@ -357,6 +364,99 @@ static void __init spectre_v2_select_mitigation(void) + } + + #undef pr_fmt ++#define pr_fmt(fmt) "Speculative Store Bypass: " fmt ++ ++static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; ++ ++/* The kernel command line selection */ ++enum ssb_mitigation_cmd { ++ SPEC_STORE_BYPASS_CMD_NONE, ++ SPEC_STORE_BYPASS_CMD_AUTO, ++ SPEC_STORE_BYPASS_CMD_ON, ++}; ++ ++static const char *ssb_strings[] = { ++ [SPEC_STORE_BYPASS_NONE] = "Vulnerable", ++ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" ++}; ++ ++static const struct { ++ const char *option; ++ enum ssb_mitigation_cmd cmd; ++} ssb_mitigation_options[] = { ++ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ ++ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ ++ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ ++}; ++ ++static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) ++{ ++ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; ++ char arg[20]; ++ int ret, i; ++ ++ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { ++ return SPEC_STORE_BYPASS_CMD_NONE; ++ } else { ++ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", ++ arg, sizeof(arg)); ++ if (ret < 0) ++ return SPEC_STORE_BYPASS_CMD_AUTO; ++ ++ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { ++ if (!match_option(arg, ret, ssb_mitigation_options[i].option)) ++ continue; ++ ++ cmd = ssb_mitigation_options[i].cmd; ++ break; ++ } ++ ++ if (i >= ARRAY_SIZE(ssb_mitigation_options)) { ++ pr_err("unknown option (%s). Switching to AUTO select\n", arg); ++ return SPEC_STORE_BYPASS_CMD_AUTO; ++ } ++ } ++ ++ return cmd; ++} ++ ++static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) ++{ ++ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; ++ enum ssb_mitigation_cmd cmd; ++ ++ if (!boot_cpu_has(X86_FEATURE_RDS)) ++ return mode; ++ ++ cmd = ssb_parse_cmdline(); ++ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && ++ (cmd == SPEC_STORE_BYPASS_CMD_NONE || ++ cmd == SPEC_STORE_BYPASS_CMD_AUTO)) ++ return mode; ++ ++ switch (cmd) { ++ case SPEC_STORE_BYPASS_CMD_AUTO: ++ case SPEC_STORE_BYPASS_CMD_ON: ++ mode = SPEC_STORE_BYPASS_DISABLE; ++ break; ++ case SPEC_STORE_BYPASS_CMD_NONE: ++ break; ++ } ++ ++ if (mode != SPEC_STORE_BYPASS_NONE) ++ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); ++ return mode; ++} ++ ++static void ssb_select_mitigation() ++{ ++ ssb_mode = __ssb_select_mitigation(); ++ ++ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) ++ pr_info("%s\n", ssb_strings[ssb_mode]); ++} ++ ++#undef pr_fmt + + #ifdef CONFIG_SYSFS + +@@ -382,6 +482,9 @@ ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + ++ case X86_BUG_SPEC_STORE_BYPASS: ++ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); ++ + default: + break; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch new file mode 100644 index 00000000..fefa3aac --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch @@ -0,0 +1,57 @@ +From 9d11f29130341345dee37007dd76b9c4e83956a9 Mon Sep 17 00:00:00 2001 +From: Jan Dakinevich <jan.dakinevich@gmail.com> +Date: Fri, 23 Feb 2018 11:42:17 +0100 +Subject: [PATCH 30/33] KVM: VMX: clean up declaration of VPID/EPT invalidation + types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 63f3ac48133a19110c8a3666028dbd9b1bf3dcb3 upstream + +- Remove VMX_EPT_EXTENT_INDIVIDUAL_ADDR, since there is no such type of + EPT invalidation + + - Add missing VPID types names + +Signed-off-by: Jan Dakinevich <jan.dakinevich@gmail.com> +Tested-by: Ladi Prosek <lprosek@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +[jwang: port to 4.4] +Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/vmx.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h +index a002b07..6899cf1 100644 +--- a/arch/x86/include/asm/vmx.h ++++ b/arch/x86/include/asm/vmx.h +@@ -399,10 +399,11 @@ enum vmcs_field { + #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2) + + #define VMX_NR_VPIDS (1 << 16) ++#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0 + #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 + #define VMX_VPID_EXTENT_ALL_CONTEXT 2 ++#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3 + +-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 + #define VMX_EPT_EXTENT_CONTEXT 1 + #define VMX_EPT_EXTENT_GLOBAL 2 + #define VMX_EPT_EXTENT_SHIFT 24 +@@ -419,8 +420,10 @@ enum vmcs_field { + #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) + + #define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */ ++#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */ + #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ + #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ ++#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ + + #define VMX_EPT_DEFAULT_GAW 3 + #define VMX_EPT_MAX_GAW 0x4 +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch new file mode 100644 index 00000000..d4c39c90 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch @@ -0,0 +1,183 @@ +From 58645a84abdc201b048cc16d3e1e500884ca452b Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:22 -0400 +Subject: [PATCH 30/93] x86/bugs/intel: Set proper CPU features and setup RDS + +commit 772439717dbf703b39990be58d8d4e3e4ad0598a upstream + +Intel CPUs expose methods to: + + - Detect whether RDS capability is available via CPUID.7.0.EDX[31], + + - The SPEC_CTRL MSR(0x48), bit 2 set to enable RDS. + + - MSR_IA32_ARCH_CAPABILITIES, Bit(4) no need to enable RRS. + +With that in mind if spec_store_bypass_disable=[auto,on] is selected set at +boot-time the SPEC_CTRL MSR to enable RDS if the platform requires it. + +Note that this does not fix the KVM case where the SPEC_CTRL is exposed to +guests which can muck with it, see patch titled : + KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS. + +And for the firmware (IBRS to be set), see patch titled: + x86/spectre_v2: Read SPEC_CTRL MSR during boot and re-use reserved bits + +[ tglx: Distangled it from the intel implementation and kept the call order ] + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/msr-index.h | 6 ++++++ + arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++-- + arch/x86/kernel/cpu/common.c | 10 ++++++---- + arch/x86/kernel/cpu/cpu.h | 3 +++ + arch/x86/kernel/cpu/intel.c | 1 + + 5 files changed, 44 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 0e4da8e..9f014c1 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -40,6 +40,7 @@ + #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ + #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ + #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ ++#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +@@ -61,6 +62,11 @@ + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ + #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ ++#define ARCH_CAP_RDS_NO (1 << 4) /* ++ * Not susceptible to Speculative Store Bypass ++ * attack, so no Reduced Data Speculation control ++ * required. ++ */ + + #define MSR_IA32_BBL_CR_CTL 0x00000119 + #define MSR_IA32_BBL_CR_CTL3 0x0000011e +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 75146d9..7dd16f4 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -116,7 +116,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + + void x86_spec_ctrl_set(u64 val) + { +- if (val & ~SPEC_CTRL_IBRS) ++ if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) + WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); + else + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); +@@ -443,8 +443,28 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + break; + } + +- if (mode != SPEC_STORE_BYPASS_NONE) ++ /* ++ * We have three CPU feature flags that are in play here: ++ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. ++ * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass ++ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation ++ */ ++ if (mode != SPEC_STORE_BYPASS_NONE) { + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); ++ /* ++ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses ++ * a completely different MSR and bit dependent on family. ++ */ ++ switch (boot_cpu_data.x86_vendor) { ++ case X86_VENDOR_INTEL: ++ x86_spec_ctrl_base |= SPEC_CTRL_RDS; ++ x86_spec_ctrl_set(SPEC_CTRL_RDS); ++ break; ++ case X86_VENDOR_AMD: ++ break; ++ } ++ } ++ + return mode; + } + +@@ -458,6 +478,12 @@ static void ssb_select_mitigation() + + #undef pr_fmt + ++void x86_spec_ctrl_setup_ap(void) ++{ ++ if (boot_cpu_has(X86_FEATURE_IBRS)) ++ x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); ++} ++ + #ifdef CONFIG_SYSFS + + ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 4f1050a..ab6b3ad 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -903,7 +903,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + { + u64 ia32_cap = 0; + +- if (!x86_match_cpu(cpu_no_spec_store_bypass)) ++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); ++ ++ if (!x86_match_cpu(cpu_no_spec_store_bypass) && ++ !(ia32_cap & ARCH_CAP_RDS_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + + if (x86_match_cpu(cpu_no_speculation)) +@@ -915,9 +919,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + if (x86_match_cpu(cpu_no_meltdown)) + return; + +- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) +- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); +- + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) + return; +@@ -1339,6 +1340,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) + #endif + mtrr_ap_init(); + validate_apic_and_package_id(c); ++ x86_spec_ctrl_setup_ap(); + } + + struct msr_range { +diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h +index 2584265..3b19d82 100644 +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], + + extern void get_cpu_cap(struct cpuinfo_x86 *c); + extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); ++ ++extern void x86_spec_ctrl_setup_ap(void); ++ + #endif /* ARCH_X86_CPU_H */ +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 8fb1d65..f15aea6 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -154,6 +154,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); ++ setup_clear_cpu_cap(X86_FEATURE_RDS); + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch new file mode 100644 index 00000000..e96f0d9b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch @@ -0,0 +1,102 @@ +From 1d5388c0b1e6eef66d7999451bb22cddf4cc5546 Mon Sep 17 00:00:00 2001 +From: Jan Dakinevich <jan.dakinevich@gmail.com> +Date: Fri, 23 Feb 2018 11:42:18 +0100 +Subject: [PATCH 31/33] KVM: nVMX: invvpid handling improvements +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit bcdde302b8268ef7dbc4ddbdaffb5b44eafe9a1e upstream + + - Expose all invalidation types to the L1 + + - Reject invvpid instruction, if L1 passed zero vpid value to single + context invalidations + +Signed-off-by: Jan Dakinevich <jan.dakinevich@gmail.com> +Tested-by: Ladi Prosek <lprosek@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +[jwang: port to 4.4] +Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 36 ++++++++++++++++++++++++------------ + 1 file changed, 24 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 85078c7..f6c0568 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -142,6 +142,12 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); + + #define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 + ++#define VMX_VPID_EXTENT_SUPPORTED_MASK \ ++ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \ ++ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \ ++ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \ ++ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT) ++ + /* + * These 2 parameters are used to config the controls for Pause-Loop Exiting: + * ple_gap: upper bound on the amount of time between two successive +@@ -2836,8 +2842,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) + */ + if (enable_vpid) + vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | +- VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | +- VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; ++ VMX_VPID_EXTENT_SUPPORTED_MASK; + else + vmx->nested.nested_vmx_vpid_caps = 0; + +@@ -7671,7 +7676,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); + +- types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; ++ types = (vmx->nested.nested_vmx_vpid_caps & ++ VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; + + if (type >= 32 || !(types & (1 << type))) { + nested_vmx_failValid(vcpu, +@@ -7693,21 +7699,27 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) + } + + switch (type) { ++ case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: + case VMX_VPID_EXTENT_SINGLE_CONTEXT: +- /* +- * Old versions of KVM use the single-context version so we +- * have to support it; just treat it the same as all-context. +- */ ++ case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: ++ if (!vpid) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); ++ skip_emulated_instruction(vcpu); ++ return 1; ++ } ++ break; + case VMX_VPID_EXTENT_ALL_CONTEXT: +- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); +- nested_vmx_succeed(vcpu); + break; + default: +- /* Trap individual address invalidation invvpid calls */ +- BUG_ON(1); +- break; ++ WARN_ON_ONCE(1); ++ skip_emulated_instruction(vcpu); ++ return 1; + } + ++ __vmx_flush_tlb(vcpu, vmx->nested.vpid02); ++ nested_vmx_succeed(vcpu); ++ + skip_emulated_instruction(vcpu); + return 1; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch new file mode 100644 index 00000000..ff00b421 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch @@ -0,0 +1,70 @@ +From 9b78406df0ca3d21903d71f41b64a793dad76cfc Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:23 -0400 +Subject: [PATCH 31/93] x86/bugs: Whitelist allowed SPEC_CTRL MSR values + +commit 1115a859f33276fe8afb31c60cf9d8e657872558 upstream + +Intel and AMD SPEC_CTRL (0x48) MSR semantics may differ in the +future (or in fact use different MSRs for the same functionality). + +As such a run-time mechanism is required to whitelist the appropriate MSR +values. + +[ tglx: Made the variable __ro_after_init ] + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 7dd16f4..b92c469 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -34,6 +34,12 @@ static void __init ssb_select_mitigation(void); + */ + static u64 __ro_after_init x86_spec_ctrl_base; + ++/* ++ * The vendor and possibly platform specific bits which can be modified in ++ * x86_spec_ctrl_base. ++ */ ++static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; ++ + void __init check_bugs(void) + { + identify_boot_cpu(); +@@ -116,7 +122,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + + void x86_spec_ctrl_set(u64 val) + { +- if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) ++ if (val & x86_spec_ctrl_mask) + WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); + else + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); +@@ -458,6 +464,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_RDS; ++ x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; + x86_spec_ctrl_set(SPEC_CTRL_RDS); + break; + case X86_VENDOR_AMD: +@@ -481,7 +488,7 @@ static void ssb_select_mitigation() + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_IBRS)) +- x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); ++ x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + } + + #ifdef CONFIG_SYSFS +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch new file mode 100644 index 00000000..4f0b4222 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch @@ -0,0 +1,105 @@ +From 0ebeae5f6b25b48c0559950e2b7c2f0a1ffd641c Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Thu, 22 Feb 2018 16:43:17 +0100 +Subject: [PATCH 32/33] KVM/x86: Remove indirect MSR op calls from SPEC_CTRL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit ecb586bd29c99fb4de599dec388658e74388daad upstream. + +Having a paravirt indirect call in the IBRS restore path is not a +good idea, since we are trying to protect from speculative execution +of bogus indirect branch targets. It is also slower, so use +native_wrmsrl() on the vmentry path too. + +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Reviewed-by: Jim Mattson <jmattson@google.com> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: KarimAllah Ahmed <karahmed@amazon.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: kvm@vger.kernel.org +Cc: stable@vger.kernel.org +Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d +Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonzini@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/svm.c | 7 ++++--- + arch/x86/kvm/vmx.c | 7 ++++--- + 2 files changed, 8 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 4a36977..8d33396 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -44,6 +44,7 @@ + #include <asm/debugreg.h> + #include <asm/kvm_para.h> + #include <asm/irq_remapping.h> ++#include <asm/microcode.h> + #include <asm/nospec-branch.h> + + #include <asm/virtext.h> +@@ -4907,7 +4908,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + * being speculatively taken. + */ + if (svm->spec_ctrl) +- wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + + asm volatile ( + "push %%" _ASM_BP "; \n\t" +@@ -5017,10 +5018,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) +- rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); ++ svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + if (svm->spec_ctrl) +- wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index f6c0568..aa2684a 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -49,6 +49,7 @@ + #include <asm/kexec.h> + #include <asm/apic.h> + #include <asm/irq_remapping.h> ++#include <asm/microcode.h> + #include <asm/nospec-branch.h> + + #include "trace.h" +@@ -8888,7 +8889,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + * being speculatively taken. + */ + if (vmx->spec_ctrl) +- wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + + vmx->__launched = vmx->loaded_vmcs->launched; + asm( +@@ -9024,10 +9025,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) +- rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); ++ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + if (vmx->spec_ctrl) +- wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch new file mode 100644 index 00000000..a79d655d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch @@ -0,0 +1,200 @@ +From 5066a8fdb740b1c31a315ea7da3a58c8208b15eb Mon Sep 17 00:00:00 2001 +From: David Woodhouse <dwmw@amazon.co.uk> +Date: Sun, 20 May 2018 20:52:05 +0100 +Subject: [PATCH 32/93] x86/bugs/AMD: Add support to disable RDS on + Fam[15,16,17]h if requested + +commit 764f3c21588a059cd783c6ba0734d4db2d72822d upstream + +AMD does not need the Speculative Store Bypass mitigation to be enabled. + +The parameters for this are already available and can be done via MSR +C001_1020. Each family uses a different bit in that MSR for this. + +[ tglx: Expose the bit mask via a variable and move the actual MSR fiddling + into the bugs code as that's the right thing to do and also required + to prepare for dynamic enable/disable ] + +Suggested-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/nospec-branch.h | 4 ++++ + arch/x86/kernel/cpu/amd.c | 26 ++++++++++++++++++++++++++ + arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++++++++++++- + arch/x86/kernel/cpu/common.c | 4 ++++ + 5 files changed, 61 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 013f3de..8797069 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -205,6 +205,7 @@ + #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ + #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ + #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ ++#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 7b9eacf..3a1541c 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -244,6 +244,10 @@ enum ssb_mitigation { + SPEC_STORE_BYPASS_DISABLE, + }; + ++/* AMD specific Speculative Store Bypass MSR data */ ++extern u64 x86_amd_ls_cfg_base; ++extern u64 x86_amd_ls_cfg_rds_mask; ++ + extern char __indirect_thunk_start[]; + extern char __indirect_thunk_end[]; + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 747f8a2..7551d9ad 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -9,6 +9,7 @@ + #include <asm/processor.h> + #include <asm/apic.h> + #include <asm/cpu.h> ++#include <asm/nospec-branch.h> + #include <asm/smp.h> + #include <asm/pci-direct.h> + #include <asm/delay.h> +@@ -542,6 +543,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) + rdmsrl(MSR_FAM10H_NODE_ID, value); + nodes_per_socket = ((value >> 3) & 7) + 1; + } ++ ++ if (c->x86 >= 0x15 && c->x86 <= 0x17) { ++ unsigned int bit; ++ ++ switch (c->x86) { ++ case 0x15: bit = 54; break; ++ case 0x16: bit = 33; break; ++ case 0x17: bit = 10; break; ++ default: return; ++ } ++ /* ++ * Try to cache the base value so further operations can ++ * avoid RMW. If that faults, do not enable RDS. ++ */ ++ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { ++ setup_force_cpu_cap(X86_FEATURE_RDS); ++ setup_force_cpu_cap(X86_FEATURE_AMD_RDS); ++ x86_amd_ls_cfg_rds_mask = 1ULL << bit; ++ } ++ } + } + + static void early_init_amd(struct cpuinfo_x86 *c) +@@ -827,6 +848,11 @@ static void init_amd(struct cpuinfo_x86 *c) + /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ + if (!cpu_has(c, X86_FEATURE_XENPV)) + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); ++ ++ if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { ++ set_cpu_cap(c, X86_FEATURE_RDS); ++ set_cpu_cap(c, X86_FEATURE_AMD_RDS); ++ } + } + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index b92c469..b3696cc 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -40,6 +40,13 @@ static u64 __ro_after_init x86_spec_ctrl_base; + */ + static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; + ++/* ++ * AMD specific MSR info for Speculative Store Bypass control. ++ * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). ++ */ ++u64 __ro_after_init x86_amd_ls_cfg_base; ++u64 __ro_after_init x86_amd_ls_cfg_rds_mask; ++ + void __init check_bugs(void) + { + identify_boot_cpu(); +@@ -51,7 +58,8 @@ void __init check_bugs(void) + + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may +- * have unknown values. ++ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD ++ * init code as it is not enumerated and depends on the family. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +@@ -153,6 +161,14 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); + ++static void x86_amd_rds_enable(void) ++{ ++ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; ++ ++ if (boot_cpu_has(X86_FEATURE_AMD_RDS)) ++ wrmsrl(MSR_AMD64_LS_CFG, msrval); ++} ++ + #ifdef RETPOLINE + static bool spectre_v2_bad_module; + +@@ -442,6 +458,11 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: ++ /* ++ * AMD platforms by default don't need SSB mitigation. ++ */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ++ break; + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; +@@ -468,6 +489,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + x86_spec_ctrl_set(SPEC_CTRL_RDS); + break; + case X86_VENDOR_AMD: ++ x86_amd_rds_enable(); + break; + } + } +@@ -489,6 +511,9 @@ void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_IBRS)) + x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); ++ ++ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) ++ x86_amd_rds_enable(); + } + + #ifdef CONFIG_SYSFS +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index ab6b3ad..beb1da8 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -895,6 +895,10 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_CENTAUR, 5, }, + { X86_VENDOR_INTEL, 5, }, + { X86_VENDOR_NSC, 5, }, ++ { X86_VENDOR_AMD, 0x12, }, ++ { X86_VENDOR_AMD, 0x11, }, ++ { X86_VENDOR_AMD, 0x10, }, ++ { X86_VENDOR_AMD, 0xf, }, + { X86_VENDOR_ANY, 4, }, + {} + }; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch new file mode 100644 index 00000000..95086730 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch @@ -0,0 +1,65 @@ +From 885a241a441e144391884136534657f8502b2a48 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Thu, 22 Feb 2018 16:43:18 +0100 +Subject: [PATCH 33/33] KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by + marking the RDMSR path as unlikely() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 946fbbc13dce68902f64515b610eeb2a6c3d7a64 upstream. + +vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving +branch hints to the compiler can actually make a substantial cycle +difference by keeping the fast path contiguous in memory. + +With this optimization, the retpoline-guest/retpoline-host case is +about 50 cycles faster. + +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Reviewed-by: Jim Mattson <jmattson@google.com> +Cc: David Woodhouse <dwmw@amazon.co.uk> +Cc: KarimAllah Ahmed <karahmed@amazon.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: kvm@vger.kernel.org +Cc: stable@vger.kernel.org +Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonzini@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/svm.c | 2 +- + arch/x86/kvm/vmx.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 8d33396..b82bb66 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -5017,7 +5017,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ +- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) ++ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + if (svm->spec_ctrl) +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index aa2684a..3c3558b 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -9024,7 +9024,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ +- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) ++ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + if (vmx->spec_ctrl) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch new file mode 100644 index 00000000..743d2a90 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch @@ -0,0 +1,120 @@ +From 8f8f17abbbabcff7ebf353b62bbcfb414f83d77e Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 25 Apr 2018 22:04:25 -0400 +Subject: [PATCH 33/93] x86/KVM/VMX: Expose SPEC_CTRL Bit(2) to the guest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit da39556f66f5cfe8f9c989206974f1cb16ca5d7c upstream + +Expose the CPUID.7.EDX[31] bit to the guest, and also guard against various +combinations of SPEC_CTRL MSR values. + +The handling of the MSR (to take into account the host value of SPEC_CTRL +Bit(2)) is taken care of in patch: + + KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> + +[dwmw2: Handle 4.9 guest CPUID differences, rename + guest_cpu_has_ibrs() → guest_cpu_has_spec_ctrl()] +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/cpuid.c | 2 +- + arch/x86/kvm/cpuid.h | 4 ++-- + arch/x86/kvm/svm.c | 4 ++-- + arch/x86/kvm/vmx.c | 6 +++--- + 4 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index bcebe84..237e926 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = +- F(SPEC_CTRL) | F(ARCH_CAPABILITIES); ++ F(SPEC_CTRL) | F(RDS) | F(ARCH_CAPABILITIES); + + /* all calls to cpuid_count() should be made on the same cpu */ + get_cpu(); +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index 841e80d..39dd457 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -163,7 +163,7 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); + } + +-static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) ++static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) + { + struct kvm_cpuid_entry2 *best; + +@@ -171,7 +171,7 @@ static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) + if (best && (best->ebx & bit(X86_FEATURE_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); +- return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); ++ return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_RDS))); + } + + static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index a07579f..43736dd 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -3540,7 +3540,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && +- !guest_cpuid_has_ibrs(vcpu)) ++ !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + msr_info->data = svm->spec_ctrl; +@@ -3631,7 +3631,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) + break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && +- !guest_cpuid_has_ibrs(vcpu)) ++ !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index c386d13..3210add 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3017,7 +3017,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && +- !guest_cpuid_has_ibrs(vcpu)) ++ !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; +@@ -3129,11 +3129,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && +- !guest_cpuid_has_ibrs(vcpu)) ++ !guest_cpuid_has_spec_ctrl(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ +- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) + return 1; + + vmx->spec_ctrl = data; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch new file mode 100644 index 00000000..36224a56 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch @@ -0,0 +1,141 @@ +From b8380a76b18fa5522368b50c284530fc6e1b1992 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 29 Apr 2018 15:01:37 +0200 +Subject: [PATCH 34/93] x86/speculation: Create spec-ctrl.h to avoid include + hell + +commit 28a2775217b17208811fa43a9e96bd1fdf417b86 upstream + +Having everything in nospec-branch.h creates a hell of dependencies when +adding the prctl based switching mechanism. Move everything which is not +required in nospec-branch.h to spec-ctrl.h and fix up the includes in the +relevant files. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 14 -------------- + arch/x86/include/asm/spec-ctrl.h | 21 +++++++++++++++++++++ + arch/x86/kernel/cpu/amd.c | 2 +- + arch/x86/kernel/cpu/bugs.c | 2 +- + arch/x86/kvm/svm.c | 2 +- + arch/x86/kvm/vmx.c | 2 +- + 6 files changed, 25 insertions(+), 18 deletions(-) + create mode 100644 arch/x86/include/asm/spec-ctrl.h + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 3a1541c..1119f14 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -228,26 +228,12 @@ enum spectre_v2_mitigation { + extern void x86_spec_ctrl_set(u64); + extern u64 x86_spec_ctrl_get_default(void); + +-/* +- * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR +- * the guest has, while on VMEXIT we restore the host view. This +- * would be easier if SPEC_CTRL were architecturally maskable or +- * shadowable for guests but this is not (currently) the case. +- * Takes the guest view of SPEC_CTRL MSR as a parameter. +- */ +-extern void x86_spec_ctrl_set_guest(u64); +-extern void x86_spec_ctrl_restore_host(u64); +- + /* The Speculative Store Bypass disable variants */ + enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, + }; + +-/* AMD specific Speculative Store Bypass MSR data */ +-extern u64 x86_amd_ls_cfg_base; +-extern u64 x86_amd_ls_cfg_rds_mask; +- + extern char __indirect_thunk_start[]; + extern char __indirect_thunk_end[]; + +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +new file mode 100644 +index 0000000..3ad6442 +--- /dev/null ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _ASM_X86_SPECCTRL_H_ ++#define _ASM_X86_SPECCTRL_H_ ++ ++#include <asm/nospec-branch.h> ++ ++/* ++ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR ++ * the guest has, while on VMEXIT we restore the host view. This ++ * would be easier if SPEC_CTRL were architecturally maskable or ++ * shadowable for guests but this is not (currently) the case. ++ * Takes the guest view of SPEC_CTRL MSR as a parameter. ++ */ ++extern void x86_spec_ctrl_set_guest(u64); ++extern void x86_spec_ctrl_restore_host(u64); ++ ++/* AMD specific Speculative Store Bypass MSR data */ ++extern u64 x86_amd_ls_cfg_base; ++extern u64 x86_amd_ls_cfg_rds_mask; ++ ++#endif +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 7551d9ad..a176c81 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -9,7 +9,7 @@ + #include <asm/processor.h> + #include <asm/apic.h> + #include <asm/cpu.h> +-#include <asm/nospec-branch.h> ++#include <asm/spec-ctrl.h> + #include <asm/smp.h> + #include <asm/pci-direct.h> + #include <asm/delay.h> +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index b3696cc..46d01fd 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -12,7 +12,7 @@ + #include <linux/cpu.h> + #include <linux/module.h> + +-#include <asm/nospec-branch.h> ++#include <asm/spec-ctrl.h> + #include <asm/cmdline.h> + #include <asm/bugs.h> + #include <asm/processor.h> +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 43736dd..47779f5 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -45,7 +45,7 @@ + #include <asm/kvm_para.h> + #include <asm/irq_remapping.h> + #include <asm/microcode.h> +-#include <asm/nospec-branch.h> ++#include <asm/spec-ctrl.h> + + #include <asm/virtext.h> + #include "trace.h" +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 3210add..17199dc 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -50,7 +50,7 @@ + #include <asm/apic.h> + #include <asm/irq_remapping.h> + #include <asm/microcode.h> +-#include <asm/nospec-branch.h> ++#include <asm/spec-ctrl.h> + + #include "trace.h" + #include "pmu.h" +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch new file mode 100644 index 00000000..bcbf8f92 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch @@ -0,0 +1,125 @@ +From ac5c35e60743b4260df777cc4ac1e877c2999b1d Mon Sep 17 00:00:00 2001 +From: Kyle Huey <me@kylehuey.com> +Date: Tue, 14 Feb 2017 00:11:02 -0800 +Subject: [PATCH 35/93] x86/process: Optimize TIF checks in __switch_to_xtra() + +commit af8b3cd3934ec60f4c2a420d19a9d416554f140b upstream + +Help the compiler to avoid reevaluating the thread flags for each checked +bit by reordering the bit checks and providing an explicit xor for +evaluation. + +With default defconfigs for each arch, + +x86_64: arch/x86/kernel/process.o +text data bss dec hex +3056 8577 16 11649 2d81 Before +3024 8577 16 11617 2d61 After + +i386: arch/x86/kernel/process.o +text data bss dec hex +2957 8673 8 11638 2d76 Before +2925 8673 8 11606 2d56 After + +Originally-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Kyle Huey <khuey@kylehuey.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Andy Lutomirski <luto@kernel.org> +Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@kylehuey.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +[dwmw2: backported to make TIF_RDS handling simpler. + No deferred TR reload.] +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/process.c | 54 +++++++++++++++++++++++++++-------------------- + 1 file changed, 31 insertions(+), 23 deletions(-) + +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index a55b320..0e1999e 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -192,48 +192,56 @@ int set_tsc_mode(unsigned int val) + return 0; + } + ++static inline void switch_to_bitmap(struct tss_struct *tss, ++ struct thread_struct *prev, ++ struct thread_struct *next, ++ unsigned long tifp, unsigned long tifn) ++{ ++ if (tifn & _TIF_IO_BITMAP) { ++ /* ++ * Copy the relevant range of the IO bitmap. ++ * Normally this is 128 bytes or less: ++ */ ++ memcpy(tss->io_bitmap, next->io_bitmap_ptr, ++ max(prev->io_bitmap_max, next->io_bitmap_max)); ++ } else if (tifp & _TIF_IO_BITMAP) { ++ /* ++ * Clear any possible leftover bits: ++ */ ++ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); ++ } ++} ++ + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) + { + struct thread_struct *prev, *next; ++ unsigned long tifp, tifn; + + prev = &prev_p->thread; + next = &next_p->thread; + +- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ +- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { ++ tifn = READ_ONCE(task_thread_info(next_p)->flags); ++ tifp = READ_ONCE(task_thread_info(prev_p)->flags); ++ switch_to_bitmap(tss, prev, next, tifp, tifn); ++ ++ propagate_user_return_notify(prev_p, next_p); ++ ++ if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; +- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) ++ if (tifn & _TIF_BLOCKSTEP) + debugctl |= DEBUGCTLMSR_BTF; +- + update_debugctlmsr(debugctl); + } + +- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ +- test_tsk_thread_flag(next_p, TIF_NOTSC)) { +- /* prev and next are different */ +- if (test_tsk_thread_flag(next_p, TIF_NOTSC)) ++ if ((tifp ^ tifn) & _TIF_NOTSC) { ++ if (tifn & _TIF_NOTSC) + hard_disable_TSC(); + else + hard_enable_TSC(); + } +- +- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { +- /* +- * Copy the relevant range of the IO bitmap. +- * Normally this is 128 bytes or less: +- */ +- memcpy(tss->io_bitmap, next->io_bitmap_ptr, +- max(prev->io_bitmap_max, next->io_bitmap_max)); +- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { +- /* +- * Clear any possible leftover bits: +- */ +- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); +- } +- propagate_user_return_notify(prev_p, next_p); + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch new file mode 100644 index 00000000..9fd2ab23 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch @@ -0,0 +1,84 @@ +From 19f795b97249d2e81ea918644577ab9669704c28 Mon Sep 17 00:00:00 2001 +From: Kyle Huey <me@kylehuey.com> +Date: Tue, 14 Feb 2017 00:11:03 -0800 +Subject: [PATCH 36/93] x86/process: Correct and optimize TIF_BLOCKSTEP switch + +commit b9894a2f5bd18b1691cb6872c9afe32b148d0132 upstream + +The debug control MSR is "highly magical" as the blockstep bit can be +cleared by hardware under not well documented circumstances. + +So a task switch relying on the bit set by the previous task (according to +the previous tasks thread flags) can trip over this and not update the flag +for the next task. + +To fix this its required to handle DEBUGCTLMSR_BTF when either the previous +or the next or both tasks have the TIF_BLOCKSTEP flag set. + +While at it avoid branching within the TIF_BLOCKSTEP case and evaluating +boot_cpu_data twice in kernels without CONFIG_X86_DEBUGCTLMSR. + +x86_64: arch/x86/kernel/process.o +text data bss dec hex +3024 8577 16 11617 2d61 Before +3008 8577 16 11601 2d51 After + +i386: No change + +[ tglx: Made the shift value explicit, use a local variable to make the +code readable and massaged changelog] + +Originally-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Kyle Huey <khuey@kylehuey.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Andy Lutomirski <luto@kernel.org> +Link: http://lkml.kernel.org/r/20170214081104.9244-3-khuey@kylehuey.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kernel/process.c | 12 +++++++----- + 2 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 9f014c1..4027c33 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -141,6 +141,7 @@ + + /* DEBUGCTLMSR bits (others vary by model): */ + #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ ++#define DEBUGCTLMSR_BTF_SHIFT 1 + #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ + #define DEBUGCTLMSR_TR (1UL << 6) + #define DEBUGCTLMSR_BTS (1UL << 7) +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 0e1999e..496eef6 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -227,13 +227,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + + propagate_user_return_notify(prev_p, next_p); + +- if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { +- unsigned long debugctl = get_debugctlmsr(); ++ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && ++ arch_has_block_step()) { ++ unsigned long debugctl, msk; + ++ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~DEBUGCTLMSR_BTF; +- if (tifn & _TIF_BLOCKSTEP) +- debugctl |= DEBUGCTLMSR_BTF; +- update_debugctlmsr(debugctl); ++ msk = tifn & _TIF_BLOCKSTEP; ++ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; ++ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + } + + if ((tifp ^ tifn) & _TIF_NOTSC) { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-process-Optimize-TIF_NOTSC-switch.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-process-Optimize-TIF_NOTSC-switch.patch new file mode 100644 index 00000000..e5a210ab --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0037-x86-process-Optimize-TIF_NOTSC-switch.patch @@ -0,0 +1,112 @@ +From b72b69b9696975c9279441e4998ceca506280dec Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue, 14 Feb 2017 00:11:04 -0800 +Subject: [PATCH 37/93] x86/process: Optimize TIF_NOTSC switch + +commit 5a920155e388ec22a22e0532fb695b9215c9b34d upstream + +Provide and use a toggle helper instead of doing it with a branch. + +x86_64: arch/x86/kernel/process.o +text data bss dec hex +3008 8577 16 11601 2d51 Before +2976 8577 16 11569 2d31 After + +i386: arch/x86/kernel/process.o +text data bss dec hex +2925 8673 8 11606 2d56 Before +2893 8673 8 11574 2d36 After + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Andy Lutomirski <luto@kernel.org> +Link: http://lkml.kernel.org/r/20170214081104.9244-4-khuey@kylehuey.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tlbflush.h | 10 ++++++++++ + arch/x86/kernel/process.c | 22 ++++------------------ + 2 files changed, 14 insertions(+), 18 deletions(-) + +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 99185a0..686a58d 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -111,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask) + } + } + ++static inline void cr4_toggle_bits(unsigned long mask) ++{ ++ unsigned long cr4; ++ ++ cr4 = this_cpu_read(cpu_tlbstate.cr4); ++ cr4 ^= mask; ++ this_cpu_write(cpu_tlbstate.cr4, cr4); ++ __write_cr4(cr4); ++} ++ + /* Read the CR4 shadow. */ + static inline unsigned long cr4_read_shadow(void) + { +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 496eef6..b7e3822 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -134,11 +134,6 @@ void flush_thread(void) + fpu__clear(&tsk->thread.fpu); + } + +-static void hard_disable_TSC(void) +-{ +- cr4_set_bits(X86_CR4_TSD); +-} +- + void disable_TSC(void) + { + preempt_disable(); +@@ -147,15 +142,10 @@ void disable_TSC(void) + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ +- hard_disable_TSC(); ++ cr4_set_bits(X86_CR4_TSD); + preempt_enable(); + } + +-static void hard_enable_TSC(void) +-{ +- cr4_clear_bits(X86_CR4_TSD); +-} +- + static void enable_TSC(void) + { + preempt_disable(); +@@ -164,7 +154,7 @@ static void enable_TSC(void) + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ +- hard_enable_TSC(); ++ cr4_clear_bits(X86_CR4_TSD); + preempt_enable(); + } + +@@ -238,12 +228,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + } + +- if ((tifp ^ tifn) & _TIF_NOTSC) { +- if (tifn & _TIF_NOTSC) +- hard_disable_TSC(); +- else +- hard_enable_TSC(); +- } ++ if ((tifp ^ tifn) & _TIF_NOTSC) ++ cr4_toggle_bits(X86_CR4_TSD); + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch new file mode 100644 index 00000000..86badf1b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch @@ -0,0 +1,229 @@ +From 4cac5cffd142a19a03aceb9037302e10fe04d566 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 29 Apr 2018 15:21:42 +0200 +Subject: [PATCH 38/93] x86/process: Allow runtime control of Speculative Store + Bypass + +commit 885f82bfbc6fefb6664ea27965c3ab9ac4194b8c upstream + +The Speculative Store Bypass vulnerability can be mitigated with the +Reduced Data Speculation (RDS) feature. To allow finer grained control of +this eventually expensive mitigation a per task mitigation control is +required. + +Add a new TIF_RDS flag and put it into the group of TIF flags which are +evaluated for mismatch in switch_to(). If these bits differ in the previous +and the next task, then the slow path function __switch_to_xtra() is +invoked. Implement the TIF_RDS dependent mitigation control in the slow +path. + +If the prctl for controlling Speculative Store Bypass is disabled or no +task uses the prctl then there is no overhead in the switch_to() fast +path. + +Update the KVM related speculation control functions to take TID_RDS into +account as well. + +Based on a patch from Tim Chen. Completely rewritten. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/msr-index.h | 3 ++- + arch/x86/include/asm/spec-ctrl.h | 17 +++++++++++++++++ + arch/x86/include/asm/thread_info.h | 6 ++++-- + arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++++----- + arch/x86/kernel/process.c | 22 ++++++++++++++++++++++ + 5 files changed, 66 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 4027c33..7ad3ed9 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -40,7 +40,8 @@ + #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ + #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ + #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +-#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ ++#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ ++#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index 3ad6442..45ef00a 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -2,6 +2,7 @@ + #ifndef _ASM_X86_SPECCTRL_H_ + #define _ASM_X86_SPECCTRL_H_ + ++#include <linux/thread_info.h> + #include <asm/nospec-branch.h> + + /* +@@ -18,4 +19,20 @@ extern void x86_spec_ctrl_restore_host(u64); + extern u64 x86_amd_ls_cfg_base; + extern u64 x86_amd_ls_cfg_rds_mask; + ++/* The Intel SPEC CTRL MSR base value cache */ ++extern u64 x86_spec_ctrl_base; ++ ++static inline u64 rds_tif_to_spec_ctrl(u64 tifn) ++{ ++ BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); ++ return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); ++} ++ ++static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) ++{ ++ return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; ++} ++ ++extern void speculative_store_bypass_update(void); ++ + #endif +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index 89978b9..661afac 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -83,6 +83,7 @@ struct thread_info { + #define TIF_SIGPENDING 2 /* signal pending */ + #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ + #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ ++#define TIF_RDS 5 /* Reduced data speculation */ + #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ + #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ + #define TIF_SECCOMP 8 /* secure computing */ +@@ -104,8 +105,9 @@ struct thread_info { + #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) + #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) + #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) ++#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) ++#define _TIF_RDS (1 << TIF_RDS) + #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) + #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) +@@ -139,7 +141,7 @@ struct thread_info { + + /* flags to check in __switch_to() */ + #define _TIF_WORK_CTXSW \ +- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) ++ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) + + #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 46d01fd..4f09576 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -32,7 +32,7 @@ static void __init ssb_select_mitigation(void); + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +-static u64 __ro_after_init x86_spec_ctrl_base; ++u64 __ro_after_init x86_spec_ctrl_base; + + /* + * The vendor and possibly platform specific bits which can be modified in +@@ -139,25 +139,41 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); + + u64 x86_spec_ctrl_get_default(void) + { +- return x86_spec_ctrl_base; ++ u64 msrval = x86_spec_ctrl_base; ++ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); ++ return msrval; + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); + + void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) + { ++ u64 host = x86_spec_ctrl_base; ++ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; +- if (x86_spec_ctrl_base != guest_spec_ctrl) ++ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); ++ ++ if (host != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); + + void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) + { ++ u64 host = x86_spec_ctrl_base; ++ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; +- if (x86_spec_ctrl_base != guest_spec_ctrl) +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); ++ ++ if (host != guest_spec_ctrl) ++ wrmsrl(MSR_IA32_SPEC_CTRL, host); + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); + +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index b7e3822..9c48e18 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -33,6 +33,7 @@ + #include <asm/mce.h> + #include <asm/vm86.h> + #include <asm/switch_to.h> ++#include <asm/spec-ctrl.h> + + /* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, +@@ -202,6 +203,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss, + } + } + ++static __always_inline void __speculative_store_bypass_update(unsigned long tifn) ++{ ++ u64 msr; ++ ++ if (static_cpu_has(X86_FEATURE_AMD_RDS)) { ++ msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++ } else { ++ msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); ++ wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ } ++} ++ ++void speculative_store_bypass_update(void) ++{ ++ __speculative_store_bypass_update(current_thread_info()->flags); ++} ++ + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) + { +@@ -230,6 +249,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + + if ((tifp ^ tifn) & _TIF_NOTSC) + cr4_toggle_bits(X86_CR4_TSD); ++ ++ if ((tifp ^ tifn) & _TIF_RDS) ++ __speculative_store_bypass_update(tifn); + } + + /* +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch new file mode 100644 index 00000000..d1cb5dcd --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch @@ -0,0 +1,222 @@ +From 3495e18cce0a77cb974173998dfecbf22c9df984 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 29 Apr 2018 15:26:40 +0200 +Subject: [PATCH 39/93] x86/speculation: Add prctl for Speculative Store Bypass + mitigation + +commit a73ec77ee17ec556fe7f165d00314cb7c047b1ac upstream + +Add prctl based control for Speculative Store Bypass mitigation and make it +the default mitigation for Intel and AMD. + +Andi Kleen provided the following rationale (slightly redacted): + + There are multiple levels of impact of Speculative Store Bypass: + + 1) JITed sandbox. + It cannot invoke system calls, but can do PRIME+PROBE and may have call + interfaces to other code + + 2) Native code process. + No protection inside the process at this level. + + 3) Kernel. + + 4) Between processes. + + The prctl tries to protect against case (1) doing attacks. + + If the untrusted code can do random system calls then control is already + lost in a much worse way. So there needs to be system call protection in + some way (using a JIT not allowing them or seccomp). Or rather if the + process can subvert its environment somehow to do the prctl it can already + execute arbitrary code, which is much worse than SSB. + + To put it differently, the point of the prctl is to not allow JITed code + to read data it shouldn't read from its JITed sandbox. If it already has + escaped its sandbox then it can already read everything it wants in its + address space, and do much worse. + + The ability to control Speculative Store Bypass allows to enable the + protection selectively without affecting overall system performance. + +Based on an initial patch from Tim Chen. Completely rewritten. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 6 ++- + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 83 +++++++++++++++++++++++++++++++----- + 3 files changed, 79 insertions(+), 11 deletions(-) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 348ca9d..80811df 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -3990,7 +3990,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and +- picks the most appropriate mitigation ++ picks the most appropriate mitigation. ++ prctl - Control Speculative Store Bypass per thread ++ via prctl. Speculative Store Bypass is enabled ++ for a process by default. The state of the control ++ is inherited on fork. + + Not specifying this option is equivalent to + spec_store_bypass_disable=auto. +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 1119f14..71ad014 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -232,6 +232,7 @@ extern u64 x86_spec_ctrl_get_default(void); + enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, ++ SPEC_STORE_BYPASS_PRCTL, + }; + + extern char __indirect_thunk_start[]; +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 4f09576..b7d9adf 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -11,6 +11,8 @@ + #include <linux/utsname.h> + #include <linux/cpu.h> + #include <linux/module.h> ++#include <linux/nospec.h> ++#include <linux/prctl.h> + + #include <asm/spec-ctrl.h> + #include <asm/cmdline.h> +@@ -411,20 +413,23 @@ enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, ++ SPEC_STORE_BYPASS_CMD_PRCTL, + }; + + static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", +- [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" ++ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", ++ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" + }; + + static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; + } ssb_mitigation_options[] = { +- { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ +- { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ +- { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ ++ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ ++ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ ++ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ ++ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + }; + + static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +@@ -474,14 +479,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: +- /* +- * AMD platforms by default don't need SSB mitigation. +- */ +- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) +- break; ++ /* Choose prctl as the default mode */ ++ mode = SPEC_STORE_BYPASS_PRCTL; ++ break; + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; ++ case SPEC_STORE_BYPASS_CMD_PRCTL: ++ mode = SPEC_STORE_BYPASS_PRCTL; ++ break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } +@@ -492,7 +498,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ +- if (mode != SPEC_STORE_BYPASS_NONE) { ++ if (mode == SPEC_STORE_BYPASS_DISABLE) { + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses +@@ -523,6 +529,63 @@ static void ssb_select_mitigation() + + #undef pr_fmt + ++static int ssb_prctl_set(unsigned long ctrl) ++{ ++ bool rds = !!test_tsk_thread_flag(current, TIF_RDS); ++ ++ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) ++ return -ENXIO; ++ ++ if (ctrl == PR_SPEC_ENABLE) ++ clear_tsk_thread_flag(current, TIF_RDS); ++ else ++ set_tsk_thread_flag(current, TIF_RDS); ++ ++ if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) ++ speculative_store_bypass_update(); ++ ++ return 0; ++} ++ ++static int ssb_prctl_get(void) ++{ ++ switch (ssb_mode) { ++ case SPEC_STORE_BYPASS_DISABLE: ++ return PR_SPEC_DISABLE; ++ case SPEC_STORE_BYPASS_PRCTL: ++ if (test_tsk_thread_flag(current, TIF_RDS)) ++ return PR_SPEC_PRCTL | PR_SPEC_DISABLE; ++ return PR_SPEC_PRCTL | PR_SPEC_ENABLE; ++ default: ++ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) ++ return PR_SPEC_ENABLE; ++ return PR_SPEC_NOT_AFFECTED; ++ } ++} ++ ++int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) ++{ ++ if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) ++ return -ERANGE; ++ ++ switch (which) { ++ case PR_SPEC_STORE_BYPASS: ++ return ssb_prctl_set(ctrl); ++ default: ++ return -ENODEV; ++ } ++} ++ ++int arch_prctl_spec_ctrl_get(unsigned long which) ++{ ++ switch (which) { ++ case PR_SPEC_STORE_BYPASS: ++ return ssb_prctl_get(); ++ default: ++ return -ENODEV; ++ } ++} ++ + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_IBRS)) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-nospec-Move-array_index_nospec-parameter-checking-in.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-nospec-Move-array_index_nospec-parameter-checking-in.patch new file mode 100644 index 00000000..973e9188 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0040-nospec-Move-array_index_nospec-parameter-checking-in.patch @@ -0,0 +1,92 @@ +From a3cb1b4823957921fa7a58e51bc8ee3e880bf1c5 Mon Sep 17 00:00:00 2001 +From: Will Deacon <will.deacon@arm.com> +Date: Mon, 5 Feb 2018 14:16:06 +0000 +Subject: [PATCH 40/93] nospec: Move array_index_nospec() parameter checking + into separate macro + +commit 8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 upstream. + +For architectures providing their own implementation of +array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to +complain about out-of-range parameters using WARN_ON() results in a mess +of mutually-dependent include files. + +Rather than unpick the dependencies, simply have the core code in nospec.h +perform the checking for us. + +Signed-off-by: Will Deacon <will.deacon@arm.com> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.deacon@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/nospec.h | 36 +++++++++++++++++++++--------------- + 1 file changed, 21 insertions(+), 15 deletions(-) + +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index b99bced..fbc98e2 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -20,20 +20,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) + { + /* +- * Warn developers about inappropriate array_index_nospec() usage. +- * +- * Even if the CPU speculates past the WARN_ONCE branch, the +- * sign bit of @index is taken into account when generating the +- * mask. +- * +- * This warning is compiled out when the compiler can infer that +- * @index and @size are less than LONG_MAX. +- */ +- if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, +- "array_index_nospec() limited to range of [0, LONG_MAX]\n")) +- return 0; +- +- /* + * Always calculate and emit the mask even if the compiler + * thinks the mask is not needed. The compiler does not take + * into account the value of @index under speculation. +@@ -44,6 +30,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + #endif + + /* ++ * Warn developers about inappropriate array_index_nospec() usage. ++ * ++ * Even if the CPU speculates past the WARN_ONCE branch, the ++ * sign bit of @index is taken into account when generating the ++ * mask. ++ * ++ * This warning is compiled out when the compiler can infer that ++ * @index and @size are less than LONG_MAX. ++ */ ++#define array_index_mask_nospec_check(index, size) \ ++({ \ ++ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ ++ "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ ++ _mask = 0; \ ++ else \ ++ _mask = array_index_mask_nospec(index, size); \ ++ _mask; \ ++}) ++ ++/* + * array_index_nospec - sanitize an array index after a bounds check + * + * For a code sequence like: +@@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + ({ \ + typeof(index) _i = (index); \ + typeof(size) _s = (size); \ +- unsigned long _mask = array_index_mask_nospec(_i, _s); \ ++ unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ + \ + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-nospec-Allow-index-argument-to-have-const-qualified-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-nospec-Allow-index-argument-to-have-const-qualified-.patch new file mode 100644 index 00000000..48dd7bd7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0041-nospec-Allow-index-argument-to-have-const-qualified-.patch @@ -0,0 +1,68 @@ +From 0f31ea4b42fd0a593d539e2278b1baa35a31a122 Mon Sep 17 00:00:00 2001 +From: Rasmus Villemoes <linux@rasmusvillemoes.dk> +Date: Fri, 16 Feb 2018 13:20:48 -0800 +Subject: [PATCH 41/93] nospec: Allow index argument to have const-qualified + type + +commit b98c6a160a057d5686a8c54c79cc6c8c94a7d0c8 upstream. + +The last expression in a statement expression need not be a bare +variable, quoting gcc docs + + The last thing in the compound statement should be an expression + followed by a semicolon; the value of this subexpression serves as the + value of the entire construct. + +and we already use that in e.g. the min/max macros which end with a +ternary expression. + +This way, we can allow index to have const-qualified type, which will in +some cases avoid the need for introducing a local copy of index of +non-const qualified type. That, in turn, can prevent readers not +familiar with the internals of array_index_nospec from wondering about +the seemingly redundant extra variable, and I think that's worthwhile +considering how confusing the whole _nospec business is. + +The expression _i&_mask has type unsigned long (since that is the type +of _mask, and the BUILD_BUG_ONs guarantee that _i will get promoted to +that), so in order not to change the type of the whole expression, add +a cast back to typeof(_i). + +Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Acked-by: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Will Deacon <will.deacon@arm.com> +Cc: linux-arch@vger.kernel.org +Cc: stable@vger.kernel.org +Link: http://lkml.kernel.org/r/151881604837.17395.10812767547837568328.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/nospec.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index fbc98e2..132e3f5 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -72,7 +72,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ + \ +- _i &= _mask; \ +- _i; \ ++ (typeof(_i)) (_i & _mask); \ + }) + #endif /* _LINUX_NOSPEC_H */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-nospec-Kill-array_index_nospec_mask_check.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-nospec-Kill-array_index_nospec_mask_check.patch new file mode 100644 index 00000000..d74a2ba7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0042-nospec-Kill-array_index_nospec_mask_check.patch @@ -0,0 +1,85 @@ +From ae4a53f80d78b49ff776956f133cb59344aa10e9 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Fri, 16 Feb 2018 13:20:42 -0800 +Subject: [PATCH 42/93] nospec: Kill array_index_nospec_mask_check() + +commit 1d91c1d2c80cb70e2e553845e278b87a960c04da upstream. + +There are multiple problems with the dynamic sanity checking in +array_index_nospec_mask_check(): + +* It causes unnecessary overhead in the 32-bit case since integer sized + @index values will no longer cause the check to be compiled away like + in the 64-bit case. + +* In the 32-bit case it may trigger with user controllable input when + the expectation is that should only trigger during development of new + kernel enabling. + +* The macro reuses the input parameter in multiple locations which is + broken if someone passes an expression like 'index++' to + array_index_nospec(). + +Reported-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Will Deacon <will.deacon@arm.com> +Cc: linux-arch@vger.kernel.org +Link: http://lkml.kernel.org/r/151881604278.17395.6605847763178076520.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/nospec.h | 22 +--------------------- + 1 file changed, 1 insertion(+), 21 deletions(-) + +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index 132e3f5..172a19d 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -30,26 +30,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + #endif + + /* +- * Warn developers about inappropriate array_index_nospec() usage. +- * +- * Even if the CPU speculates past the WARN_ONCE branch, the +- * sign bit of @index is taken into account when generating the +- * mask. +- * +- * This warning is compiled out when the compiler can infer that +- * @index and @size are less than LONG_MAX. +- */ +-#define array_index_mask_nospec_check(index, size) \ +-({ \ +- if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ +- "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ +- _mask = 0; \ +- else \ +- _mask = array_index_mask_nospec(index, size); \ +- _mask; \ +-}) +- +-/* + * array_index_nospec - sanitize an array index after a bounds check + * + * For a code sequence like: +@@ -67,7 +47,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + ({ \ + typeof(index) _i = (index); \ + typeof(size) _s = (size); \ +- unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ ++ unsigned long _mask = array_index_mask_nospec(_i, _s); \ + \ + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-nospec-Include-asm-barrier.h-dependency.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-nospec-Include-asm-barrier.h-dependency.patch new file mode 100644 index 00000000..33ce3dd7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0043-nospec-Include-asm-barrier.h-dependency.patch @@ -0,0 +1,51 @@ +From 3997af07cbe06033b93bffe163982e30f86d4ac7 Mon Sep 17 00:00:00 2001 +From: Dan Williams <dan.j.williams@intel.com> +Date: Fri, 16 Feb 2018 13:20:54 -0800 +Subject: [PATCH 43/93] nospec: Include <asm/barrier.h> dependency + +commit eb6174f6d1be16b19cfa43dac296bfed003ce1a6 upstream. + +The nospec.h header expects the per-architecture header file +<asm/barrier.h> to optionally define array_index_mask_nospec(). Include +that dependency to prevent inadvertent fallback to the default +array_index_mask_nospec() implementation. + +The default implementation may not provide a full mitigation +on architectures that perform data value speculation. + +Reported-by: Christian Borntraeger <borntraeger@de.ibm.com> +Signed-off-by: Dan Williams <dan.j.williams@intel.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Dave Hansen <dave.hansen@linux.intel.com> +Cc: David Woodhouse <dwmw2@infradead.org> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Will Deacon <will.deacon@arm.com> +Cc: linux-arch@vger.kernel.org +Link: http://lkml.kernel.org/r/151881605404.17395.1341935530792574707.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/nospec.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index 172a19d..e791ebc 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -5,6 +5,7 @@ + + #ifndef _LINUX_NOSPEC_H + #define _LINUX_NOSPEC_H ++#include <asm/barrier.h> + + /** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-prctl-Add-speculation-control-prctls.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-prctl-Add-speculation-control-prctls.patch new file mode 100644 index 00000000..1baf848c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0044-prctl-Add-speculation-control-prctls.patch @@ -0,0 +1,239 @@ +From 93715f38b4419faa4f84a9bb536f11d89c5c7427 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 29 Apr 2018 15:20:11 +0200 +Subject: [PATCH 44/93] prctl: Add speculation control prctls + +commit b617cfc858161140d69cc0b5cc211996b557a1c7 upstream + +Add two new prctls to control aspects of speculation related vulnerabilites +and their mitigations to provide finer grained control over performance +impacting mitigations. + +PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature +which is selected with arg2 of prctl(2). The return value uses bit 0-2 with +the following meaning: + +Bit Define Description +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled + +If all bits are 0 the CPU is not affected by the speculation misfeature. + +If PR_SPEC_PRCTL is set, then the per task control of the mitigation is +available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation +misfeature will fail. + +PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which +is selected by arg2 of prctl(2) per task. arg3 is used to hand in the +control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. + +The common return values are: + +EINVAL prctl is not implemented by the architecture or the unused prctl() + arguments are not 0 +ENODEV arg2 is selecting a not supported speculation misfeature + +PR_SET_SPECULATION_CTRL has these additional return values: + +ERANGE arg3 is incorrect, i.e. it's not either PR_SPEC_ENABLE or PR_SPEC_DISABLE +ENXIO prctl control of the selected speculation misfeature is disabled + +The first supported controlable speculation misfeature is +PR_SPEC_STORE_BYPASS. Add the define so this can be shared between +architectures. + +Based on an initial patch from Tim Chen and mostly rewritten. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Ingo Molnar <mingo@kernel.org> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/spec_ctrl.txt | 86 +++++++++++++++++++++++++++++++++++++++++++++ + include/linux/nospec.h | 5 +++ + include/uapi/linux/prctl.h | 11 ++++++ + kernel/sys.c | 22 ++++++++++++ + 4 files changed, 124 insertions(+) + create mode 100644 Documentation/spec_ctrl.txt + +diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt +new file mode 100644 +index 0000000..ddbebcd +--- /dev/null ++++ b/Documentation/spec_ctrl.txt +@@ -0,0 +1,86 @@ ++=================== ++Speculation Control ++=================== ++ ++Quite some CPUs have speculation related misfeatures which are in fact ++vulnerabilites causing data leaks in various forms even accross privilege ++domains. ++ ++The kernel provides mitigation for such vulnerabilities in various ++forms. Some of these mitigations are compile time configurable and some on ++the kernel command line. ++ ++There is also a class of mitigations which are very expensive, but they can ++be restricted to a certain set of processes or tasks in controlled ++environments. The mechanism to control these mitigations is via ++:manpage:`prctl(2)`. ++ ++There are two prctl options which are related to this: ++ ++ * PR_GET_SPECULATION_CTRL ++ ++ * PR_SET_SPECULATION_CTRL ++ ++PR_GET_SPECULATION_CTRL ++----------------------- ++ ++PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature ++which is selected with arg2 of prctl(2). The return value uses bits 0-2 with ++the following meaning: ++ ++==== ================ =================================================== ++Bit Define Description ++==== ================ =================================================== ++0 PR_SPEC_PRCTL Mitigation can be controlled per task by ++ PR_SET_SPECULATION_CTRL ++1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is ++ disabled ++2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is ++ enabled ++==== ================ =================================================== ++ ++If all bits are 0 the CPU is not affected by the speculation misfeature. ++ ++If PR_SPEC_PRCTL is set, then the per task control of the mitigation is ++available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation ++misfeature will fail. ++ ++PR_SET_SPECULATION_CTRL ++----------------------- ++PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which ++is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand ++in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. ++ ++Common error codes ++------------------ ++======= ================================================================= ++Value Meaning ++======= ================================================================= ++EINVAL The prctl is not implemented by the architecture or unused ++ prctl(2) arguments are not 0 ++ ++ENODEV arg2 is selecting a not supported speculation misfeature ++======= ================================================================= ++ ++PR_SET_SPECULATION_CTRL error codes ++----------------------------------- ++======= ================================================================= ++Value Meaning ++======= ================================================================= ++0 Success ++ ++ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor ++ PR_SPEC_DISABLE ++ ++ENXIO Control of the selected speculation misfeature is not possible. ++ See PR_GET_SPECULATION_CTRL. ++======= ================================================================= ++ ++Speculation misfeature controls ++------------------------------- ++- PR_SPEC_STORE_BYPASS: Speculative Store Bypass ++ ++ Invocations: ++ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); ++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); ++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index e791ebc..700bb8a 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -55,4 +55,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + \ + (typeof(_i)) (_i & _mask); \ + }) ++ ++/* Speculation control prctl */ ++int arch_prctl_spec_ctrl_get(unsigned long which); ++int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); ++ + #endif /* _LINUX_NOSPEC_H */ +diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h +index a8d0759..3b316be 100644 +--- a/include/uapi/linux/prctl.h ++++ b/include/uapi/linux/prctl.h +@@ -197,4 +197,15 @@ struct prctl_mm_map { + # define PR_CAP_AMBIENT_LOWER 3 + # define PR_CAP_AMBIENT_CLEAR_ALL 4 + ++/* Per task speculation control */ ++#define PR_GET_SPECULATION_CTRL 52 ++#define PR_SET_SPECULATION_CTRL 53 ++/* Speculation control variants */ ++# define PR_SPEC_STORE_BYPASS 0 ++/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ ++# define PR_SPEC_NOT_AFFECTED 0 ++# define PR_SPEC_PRCTL (1UL << 0) ++# define PR_SPEC_ENABLE (1UL << 1) ++# define PR_SPEC_DISABLE (1UL << 2) ++ + #endif /* _LINUX_PRCTL_H */ +diff --git a/kernel/sys.c b/kernel/sys.c +index 89d5be4..312c985 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -53,6 +53,8 @@ + #include <linux/uidgid.h> + #include <linux/cred.h> + ++#include <linux/nospec.h> ++ + #include <linux/kmsg_dump.h> + /* Move somewhere else to avoid recompiling? */ + #include <generated/utsrelease.h> +@@ -2072,6 +2074,16 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) + } + #endif + ++int __weak arch_prctl_spec_ctrl_get(unsigned long which) ++{ ++ return -EINVAL; ++} ++ ++int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) ++{ ++ return -EINVAL; ++} ++ + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + unsigned long, arg4, unsigned long, arg5) + { +@@ -2270,6 +2282,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + case PR_GET_FP_MODE: + error = GET_FP_MODE(me); + break; ++ case PR_GET_SPECULATION_CTRL: ++ if (arg3 || arg4 || arg5) ++ return -EINVAL; ++ error = arch_prctl_spec_ctrl_get(arg2); ++ break; ++ case PR_SET_SPECULATION_CTRL: ++ if (arg4 || arg5) ++ return -EINVAL; ++ error = arch_prctl_spec_ctrl_set(arg2, arg3); ++ break; + default: + error = -EINVAL; + break; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-nospec-Allow-getting-setting-on-non-current-task.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-nospec-Allow-getting-setting-on-non-current-task.patch new file mode 100644 index 00000000..5c1e6d48 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0045-nospec-Allow-getting-setting-on-non-current-task.patch @@ -0,0 +1,162 @@ +From e2a9a40a2a4fbebc999eacc678c2af449db5af11 Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Tue, 1 May 2018 15:19:04 -0700 +Subject: [PATCH 45/93] nospec: Allow getting/setting on non-current task + +commit 7bbf1373e228840bb0295a2ca26d548ef37f448e upstream + +Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than +current. + +This is needed both for /proc/$pid/status queries and for seccomp (since +thread-syncing can trigger seccomp in non-current threads). + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++----------- + include/linux/nospec.h | 7 +++++-- + kernel/sys.c | 9 +++++---- + 3 files changed, 26 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index b7d9adf..3760931 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -529,31 +529,35 @@ static void ssb_select_mitigation() + + #undef pr_fmt + +-static int ssb_prctl_set(unsigned long ctrl) ++static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) + { +- bool rds = !!test_tsk_thread_flag(current, TIF_RDS); ++ bool rds = !!test_tsk_thread_flag(task, TIF_RDS); + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + return -ENXIO; + + if (ctrl == PR_SPEC_ENABLE) +- clear_tsk_thread_flag(current, TIF_RDS); ++ clear_tsk_thread_flag(task, TIF_RDS); + else +- set_tsk_thread_flag(current, TIF_RDS); ++ set_tsk_thread_flag(task, TIF_RDS); + +- if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) ++ /* ++ * If being set on non-current task, delay setting the CPU ++ * mitigation until it is next scheduled. ++ */ ++ if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) + speculative_store_bypass_update(); + + return 0; + } + +-static int ssb_prctl_get(void) ++static int ssb_prctl_get(struct task_struct *task) + { + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_PRCTL: +- if (test_tsk_thread_flag(current, TIF_RDS)) ++ if (test_tsk_thread_flag(task, TIF_RDS)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: +@@ -563,24 +567,25 @@ static int ssb_prctl_get(void) + } + } + +-int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) ++int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, ++ unsigned long ctrl) + { + if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) + return -ERANGE; + + switch (which) { + case PR_SPEC_STORE_BYPASS: +- return ssb_prctl_set(ctrl); ++ return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } + } + +-int arch_prctl_spec_ctrl_get(unsigned long which) ++int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + { + switch (which) { + case PR_SPEC_STORE_BYPASS: +- return ssb_prctl_get(); ++ return ssb_prctl_get(task); + default: + return -ENODEV; + } +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index 700bb8a..a908c95 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -7,6 +7,8 @@ + #define _LINUX_NOSPEC_H + #include <asm/barrier.h> + ++struct task_struct; ++ + /** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index +@@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + }) + + /* Speculation control prctl */ +-int arch_prctl_spec_ctrl_get(unsigned long which); +-int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); ++int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); ++int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, ++ unsigned long ctrl); + + #endif /* _LINUX_NOSPEC_H */ +diff --git a/kernel/sys.c b/kernel/sys.c +index 312c985..143cd63 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -2074,12 +2074,13 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) + } + #endif + +-int __weak arch_prctl_spec_ctrl_get(unsigned long which) ++int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) + { + return -EINVAL; + } + +-int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) ++int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, ++ unsigned long ctrl) + { + return -EINVAL; + } +@@ -2285,12 +2286,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + case PR_GET_SPECULATION_CTRL: + if (arg3 || arg4 || arg5) + return -EINVAL; +- error = arch_prctl_spec_ctrl_get(arg2); ++ error = arch_prctl_spec_ctrl_get(me, arg2); + break; + case PR_SET_SPECULATION_CTRL: + if (arg4 || arg5) + return -EINVAL; +- error = arch_prctl_spec_ctrl_set(arg2, arg3); ++ error = arch_prctl_spec_ctrl_set(me, arg2, arg3); + break; + default: + error = -EINVAL; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-bugs-Make-boot-modes-__ro_after_init.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-bugs-Make-boot-modes-__ro_after_init.patch new file mode 100644 index 00000000..f2e083bc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0046-x86-bugs-Make-boot-modes-__ro_after_init.patch @@ -0,0 +1,43 @@ +From 6dbf11655572182e63051b8ef4e61a07fb4901c0 Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Thu, 3 May 2018 15:03:30 -0700 +Subject: [PATCH 46/93] x86/bugs: Make boot modes __ro_after_init + +commit f9544b2b076ca90d887c5ae5d74fab4c21bb7c13 upstream + +There's no reason for these to be changed after boot. + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 3760931..65114d2 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -128,7 +128,8 @@ static const char *spectre_v2_strings[] = { + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + +-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; ++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = ++ SPECTRE_V2_NONE; + + void x86_spec_ctrl_set(u64 val) + { +@@ -406,7 +407,7 @@ static void __init spectre_v2_select_mitigation(void) + #undef pr_fmt + #define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +-static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; ++static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE; + + /* The kernel command line selection */ + enum ssb_mitigation_cmd { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch new file mode 100644 index 00000000..6f74166c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch @@ -0,0 +1,77 @@ +From 10f154142e83fdb4e9d107e0f72b01864e69e108 Mon Sep 17 00:00:00 2001 +From: John Ogness <john.ogness@linutronix.de> +Date: Thu, 14 Sep 2017 11:42:17 +0200 +Subject: [PATCH 47/93] fs/proc: Report eip/esp in /prod/PID/stat for + coredumping + +commit fd7d56270b526ca3ed0c224362e3c64a0f86687a upstream. + +Commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in +/proc/PID/stat") stopped reporting eip/esp because it is +racy and dangerous for executing tasks. The comment adds: + + As far as I know, there are no use programs that make any + material use of these fields, so just get rid of them. + +However, existing userspace core-dump-handler applications (for +example, minicoredumper) are using these fields since they +provide an excellent cross-platform interface to these valuable +pointers. So that commit introduced a user space visible +regression. + +Partially revert the change and make the readout possible for +tasks with the proper permissions and only if the target task +has the PF_DUMPCORE flag set. + +Fixes: 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in> /proc/PID/stat") +Reported-by: Marco Felsch <marco.felsch@preh.de> +Signed-off-by: John Ogness <john.ogness@linutronix.de> +Reviewed-by: Andy Lutomirski <luto@kernel.org> +Cc: Tycho Andersen <tycho.andersen@canonical.com> +Cc: Kees Cook <keescook@chromium.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Al Viro <viro@zeniv.linux.org.uk> +Cc: Linux API <linux-api@vger.kernel.org> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/r/87poatfwg6.fsf@linutronix.de +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/proc/array.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/fs/proc/array.c b/fs/proc/array.c +index 81818ad..c932ec4 100644 +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -60,6 +60,7 @@ + #include <linux/tty.h> + #include <linux/string.h> + #include <linux/mman.h> ++#include <linux/sched.h> + #include <linux/proc_fs.h> + #include <linux/ioport.h> + #include <linux/uaccess.h> +@@ -416,7 +417,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + * esp and eip are intentionally zeroed out. There is no + * non-racy way to read them without freezing the task. + * Programs that need reliable values can use ptrace(2). ++ * ++ * The only exception is if the task is core dumping because ++ * a program is not able to use ptrace(2) in that case. It is ++ * safe because the task has stopped executing permanently. + */ ++ if (permitted && (task->flags & PF_DUMPCORE)) { ++ eip = KSTK_EIP(task); ++ esp = KSTK_ESP(task); ++ } + } + + get_task_comm(tcomm, task); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-proc-fix-coredump-vs-read-proc-stat-race.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-proc-fix-coredump-vs-read-proc-stat-race.patch new file mode 100644 index 00000000..30c8de50 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0048-proc-fix-coredump-vs-read-proc-stat-race.patch @@ -0,0 +1,105 @@ +From 476f6e1404b0b16c48ae53249ffb362a16bf376c Mon Sep 17 00:00:00 2001 +From: Alexey Dobriyan <adobriyan@gmail.com> +Date: Thu, 18 Jan 2018 16:34:05 -0800 +Subject: [PATCH 48/93] proc: fix coredump vs read /proc/*/stat race + +commit 8bb2ee192e482c5d500df9f2b1b26a560bd3026f upstream. + +do_task_stat() accesses IP and SP of a task without bumping reference +count of a stack (which became an entity with independent lifetime at +some point). + +Steps to reproduce: + + #include <stdio.h> + #include <sys/types.h> + #include <sys/stat.h> + #include <fcntl.h> + #include <sys/time.h> + #include <sys/resource.h> + #include <unistd.h> + #include <sys/wait.h> + + int main(void) + { + setrlimit(RLIMIT_CORE, &(struct rlimit){}); + + while (1) { + char buf[64]; + char buf2[4096]; + pid_t pid; + int fd; + + pid = fork(); + if (pid == 0) { + *(volatile int *)0 = 0; + } + + snprintf(buf, sizeof(buf), "/proc/%u/stat", pid); + fd = open(buf, O_RDONLY); + read(fd, buf2, sizeof(buf2)); + close(fd); + + waitpid(pid, NULL, 0); + } + return 0; + } + + BUG: unable to handle kernel paging request at 0000000000003fd8 + IP: do_task_stat+0x8b4/0xaf0 + PGD 800000003d73e067 P4D 800000003d73e067 PUD 3d558067 PMD 0 + Oops: 0000 [#1] PREEMPT SMP PTI + CPU: 0 PID: 1417 Comm: a.out Not tainted 4.15.0-rc8-dirty #2 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014 + RIP: 0010:do_task_stat+0x8b4/0xaf0 + Call Trace: + proc_single_show+0x43/0x70 + seq_read+0xe6/0x3b0 + __vfs_read+0x1e/0x120 + vfs_read+0x84/0x110 + SyS_read+0x3d/0xa0 + entry_SYSCALL_64_fastpath+0x13/0x6c + RIP: 0033:0x7f4d7928cba0 + RSP: 002b:00007ffddb245158 EFLAGS: 00000246 + Code: 03 b7 a0 01 00 00 4c 8b 4c 24 70 4c 8b 44 24 78 4c 89 74 24 18 e9 91 f9 ff ff f6 45 4d 02 0f 84 fd f7 ff ff 48 8b 45 40 48 89 ef <48> 8b 80 d8 3f 00 00 48 89 44 24 20 e8 9b 97 eb ff 48 89 44 24 + RIP: do_task_stat+0x8b4/0xaf0 RSP: ffffc90000607cc8 + CR2: 0000000000003fd8 + +John Ogness said: for my tests I added an else case to verify that the +race is hit and correctly mitigated. + +Link: http://lkml.kernel.org/r/20180116175054.GA11513@avx2 +Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> +Reported-by: "Kohli, Gaurav" <gkohli@codeaurora.org> +Tested-by: John Ogness <john.ogness@linutronix.de> +Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> +Cc: Ingo Molnar <mingo@elte.hu> +Cc: Oleg Nesterov <oleg@redhat.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/proc/array.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/fs/proc/array.c b/fs/proc/array.c +index c932ec4..794b52a 100644 +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -423,8 +423,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + * safe because the task has stopped executing permanently. + */ + if (permitted && (task->flags & PF_DUMPCORE)) { +- eip = KSTK_EIP(task); +- esp = KSTK_ESP(task); ++ if (try_get_task_stack(task)) { ++ eip = KSTK_EIP(task); ++ esp = KSTK_ESP(task); ++ put_task_stack(task); ++ } + } + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-proc-Provide-details-on-speculation-flaw-mitigations.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-proc-Provide-details-on-speculation-flaw-mitigations.patch new file mode 100644 index 00000000..4c1c8184 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0049-proc-Provide-details-on-speculation-flaw-mitigations.patch @@ -0,0 +1,64 @@ +From a59a45de2d39c0e4f789ab2f05dc4b675ebc7914 Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Tue, 1 May 2018 15:31:45 -0700 +Subject: [PATCH 49/93] proc: Provide details on speculation flaw mitigations + +commit fae1fa0fc6cca8beee3ab8ed71d54f9a78fa3f64 upstream + +As done with seccomp and no_new_privs, also show speculation flaw +mitigation state in /proc/$pid/status. + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/proc/array.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/fs/proc/array.c b/fs/proc/array.c +index 794b52a..64f3f20 100644 +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -80,6 +80,7 @@ + #include <linux/delayacct.h> + #include <linux/seq_file.h> + #include <linux/pid_namespace.h> ++#include <linux/prctl.h> + #include <linux/ptrace.h> + #include <linux/tracehook.h> + #include <linux/string_helpers.h> +@@ -345,8 +346,29 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) + { + #ifdef CONFIG_SECCOMP + seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); +- seq_putc(m, '\n'); + #endif ++ seq_printf(m, "\nSpeculation Store Bypass:\t"); ++ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { ++ case -EINVAL: ++ seq_printf(m, "unknown"); ++ break; ++ case PR_SPEC_NOT_AFFECTED: ++ seq_printf(m, "not vulnerable"); ++ break; ++ case PR_SPEC_PRCTL | PR_SPEC_DISABLE: ++ seq_printf(m, "thread mitigated"); ++ break; ++ case PR_SPEC_PRCTL | PR_SPEC_ENABLE: ++ seq_printf(m, "thread vulnerable"); ++ break; ++ case PR_SPEC_DISABLE: ++ seq_printf(m, "globally mitigated"); ++ break; ++ default: ++ seq_printf(m, "vulnerable"); ++ break; ++ } ++ seq_putc(m, '\n'); + } + + static inline void task_context_switch_counts(struct seq_file *m, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-prctl-Add-force-disable-speculation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-prctl-Add-force-disable-speculation.patch new file mode 100644 index 00000000..acdc260b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0050-prctl-Add-force-disable-speculation.patch @@ -0,0 +1,218 @@ +From 6eca73ee80c5d8b6f8c3d294b3f97b7c8da67791 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 3 May 2018 22:09:15 +0200 +Subject: [PATCH 50/93] prctl: Add force disable speculation + +commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee upstream + +For certain use cases it is desired to enforce mitigations so they cannot +be undone afterwards. That's important for loader stubs which want to +prevent a child from disabling the mitigation again. Will also be used for +seccomp(). The extra state preserving of the prctl state for SSB is a +preparatory step for EBPF dymanic speculation control. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/spec_ctrl.txt | 34 +++++++++++++++++++++------------- + arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++---------- + fs/proc/array.c | 3 +++ + include/linux/sched.h | 9 +++++++++ + include/uapi/linux/prctl.h | 1 + + 5 files changed, 59 insertions(+), 23 deletions(-) + +diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt +index ddbebcd..1b3690d 100644 +--- a/Documentation/spec_ctrl.txt ++++ b/Documentation/spec_ctrl.txt +@@ -25,19 +25,21 @@ PR_GET_SPECULATION_CTRL + ----------------------- + + PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature +-which is selected with arg2 of prctl(2). The return value uses bits 0-2 with ++which is selected with arg2 of prctl(2). The return value uses bits 0-3 with + the following meaning: + +-==== ================ =================================================== +-Bit Define Description +-==== ================ =================================================== +-0 PR_SPEC_PRCTL Mitigation can be controlled per task by +- PR_SET_SPECULATION_CTRL +-1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is +- disabled +-2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is +- enabled +-==== ================ =================================================== ++==== ===================== =================================================== ++Bit Define Description ++==== ===================== =================================================== ++0 PR_SPEC_PRCTL Mitigation can be controlled per task by ++ PR_SET_SPECULATION_CTRL ++1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is ++ disabled ++2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is ++ enabled ++3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A ++ subsequent prctl(..., PR_SPEC_ENABLE) will fail. ++==== ===================== =================================================== + + If all bits are 0 the CPU is not affected by the speculation misfeature. + +@@ -47,9 +49,11 @@ misfeature will fail. + + PR_SET_SPECULATION_CTRL + ----------------------- ++ + PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which + is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand +-in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. ++in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or ++PR_SPEC_FORCE_DISABLE. + + Common error codes + ------------------ +@@ -70,10 +74,13 @@ Value Meaning + 0 Success + + ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor +- PR_SPEC_DISABLE ++ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE + + ENXIO Control of the selected speculation misfeature is not possible. + See PR_GET_SPECULATION_CTRL. ++ ++EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller ++ tried to enable it again. + ======= ================================================================= + + Speculation misfeature controls +@@ -84,3 +91,4 @@ Speculation misfeature controls + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); ++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 65114d2..fdbd8e5 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -532,21 +532,37 @@ static void ssb_select_mitigation() + + static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) + { +- bool rds = !!test_tsk_thread_flag(task, TIF_RDS); ++ bool update; + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + return -ENXIO; + +- if (ctrl == PR_SPEC_ENABLE) +- clear_tsk_thread_flag(task, TIF_RDS); +- else +- set_tsk_thread_flag(task, TIF_RDS); ++ switch (ctrl) { ++ case PR_SPEC_ENABLE: ++ /* If speculation is force disabled, enable is not allowed */ ++ if (task_spec_ssb_force_disable(task)) ++ return -EPERM; ++ task_clear_spec_ssb_disable(task); ++ update = test_and_clear_tsk_thread_flag(task, TIF_RDS); ++ break; ++ case PR_SPEC_DISABLE: ++ task_set_spec_ssb_disable(task); ++ update = !test_and_set_tsk_thread_flag(task, TIF_RDS); ++ break; ++ case PR_SPEC_FORCE_DISABLE: ++ task_set_spec_ssb_disable(task); ++ task_set_spec_ssb_force_disable(task); ++ update = !test_and_set_tsk_thread_flag(task, TIF_RDS); ++ break; ++ default: ++ return -ERANGE; ++ } + + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ +- if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) ++ if (task == current && update) + speculative_store_bypass_update(); + + return 0; +@@ -558,7 +574,9 @@ static int ssb_prctl_get(struct task_struct *task) + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_PRCTL: +- if (test_tsk_thread_flag(task, TIF_RDS)) ++ if (task_spec_ssb_force_disable(task)) ++ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; ++ if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: +@@ -571,9 +589,6 @@ static int ssb_prctl_get(struct task_struct *task) + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) + { +- if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) +- return -ERANGE; +- + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); +diff --git a/fs/proc/array.c b/fs/proc/array.c +index 64f3f20..3e37195 100644 +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -355,6 +355,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) + case PR_SPEC_NOT_AFFECTED: + seq_printf(m, "not vulnerable"); + break; ++ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: ++ seq_printf(m, "thread force mitigated"); ++ break; + case PR_SPEC_PRCTL | PR_SPEC_DISABLE: + seq_printf(m, "thread mitigated"); + break; +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 75d9a57..8e127a3 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2335,6 +2335,8 @@ static inline void memalloc_noio_restore(unsigned int flags) + #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ + #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ + #define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ ++#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */ ++#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/ + + + #define TASK_PFA_TEST(name, func) \ +@@ -2361,6 +2363,13 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) + TASK_PFA_TEST(LMK_WAITING, lmk_waiting) + TASK_PFA_SET(LMK_WAITING, lmk_waiting) + ++TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) ++TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) ++TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) ++ ++TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) ++TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) ++ + /* + * task->jobctl flags + */ +diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h +index 3b316be..64776b7 100644 +--- a/include/uapi/linux/prctl.h ++++ b/include/uapi/linux/prctl.h +@@ -207,5 +207,6 @@ struct prctl_mm_map { + # define PR_SPEC_PRCTL (1UL << 0) + # define PR_SPEC_ENABLE (1UL << 1) + # define PR_SPEC_DISABLE (1UL << 2) ++# define PR_SPEC_FORCE_DISABLE (1UL << 3) + + #endif /* _LINUX_PRCTL_H */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch new file mode 100644 index 00000000..7361acf3 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch @@ -0,0 +1,94 @@ +From 687c8baff48fb1849f5c2e8fdaeb2ff565f6554b Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov <oleg@redhat.com> +Date: Wed, 27 Sep 2017 09:25:30 -0600 +Subject: [PATCH 51/93] seccomp: fix the usage of get/put_seccomp_filter() in + seccomp_get_filter() + +commit 66a733ea6b611aecf0119514d2dddab5f9d6c01e upstream. + +As Chris explains, get_seccomp_filter() and put_seccomp_filter() can end +up using different filters. Once we drop ->siglock it is possible for +task->seccomp.filter to have been replaced by SECCOMP_FILTER_FLAG_TSYNC. + +Fixes: f8e529ed941b ("seccomp, ptrace: add support for dumping seccomp filters") +Reported-by: Chris Salls <chrissalls5@gmail.com> +Signed-off-by: Oleg Nesterov <oleg@redhat.com> +[tycho: add __get_seccomp_filter vs. open coding refcount_inc()] +Signed-off-by: Tycho Andersen <tycho@docker.com> +[kees: tweak commit log] +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/seccomp.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +diff --git a/kernel/seccomp.c b/kernel/seccomp.c +index 0db7c8a..af182a6 100644 +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -457,14 +457,19 @@ static long seccomp_attach_filter(unsigned int flags, + return 0; + } + ++void __get_seccomp_filter(struct seccomp_filter *filter) ++{ ++ /* Reference count is bounded by the number of total processes. */ ++ atomic_inc(&filter->usage); ++} ++ + /* get_seccomp_filter - increments the reference count of the filter on @tsk */ + void get_seccomp_filter(struct task_struct *tsk) + { + struct seccomp_filter *orig = tsk->seccomp.filter; + if (!orig) + return; +- /* Reference count is bounded by the number of total processes. */ +- atomic_inc(&orig->usage); ++ __get_seccomp_filter(orig); + } + + static inline void seccomp_filter_free(struct seccomp_filter *filter) +@@ -475,10 +480,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter) + } + } + +-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ +-void put_seccomp_filter(struct task_struct *tsk) ++static void __put_seccomp_filter(struct seccomp_filter *orig) + { +- struct seccomp_filter *orig = tsk->seccomp.filter; + /* Clean up single-reference branches iteratively. */ + while (orig && atomic_dec_and_test(&orig->usage)) { + struct seccomp_filter *freeme = orig; +@@ -487,6 +490,12 @@ void put_seccomp_filter(struct task_struct *tsk) + } + } + ++/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ ++void put_seccomp_filter(struct task_struct *tsk) ++{ ++ __put_seccomp_filter(tsk->seccomp.filter); ++} ++ + /** + * seccomp_send_sigsys - signals the task to allow in-process syscall emulation + * @syscall: syscall number to send to userland +@@ -892,13 +901,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, + if (!data) + goto out; + +- get_seccomp_filter(task); ++ __get_seccomp_filter(filter); + spin_unlock_irq(&task->sighand->siglock); + + if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) + ret = -EFAULT; + +- put_seccomp_filter(task); ++ __put_seccomp_filter(filter); + return ret; + + out: +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-seccomp-Enable-speculation-flaw-mitigations.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-seccomp-Enable-speculation-flaw-mitigations.patch new file mode 100644 index 00000000..85ed7f13 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0052-seccomp-Enable-speculation-flaw-mitigations.patch @@ -0,0 +1,64 @@ +From 6afc277e9b6b9bf8bb4c8c2e4641a021f9d709e2 Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Tue, 1 May 2018 15:07:31 -0700 +Subject: [PATCH 52/93] seccomp: Enable speculation flaw mitigations + +commit 5c3070890d06ff82eecb808d02d2ca39169533ef upstream + +When speculation flaw mitigations are opt-in (via prctl), using seccomp +will automatically opt-in to these protections, since using seccomp +indicates at least some level of sandboxing is desired. + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/seccomp.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/kernel/seccomp.c b/kernel/seccomp.c +index af182a6..1d3078b 100644 +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -16,6 +16,8 @@ + #include <linux/atomic.h> + #include <linux/audit.h> + #include <linux/compat.h> ++#include <linux/nospec.h> ++#include <linux/prctl.h> + #include <linux/sched.h> + #include <linux/seccomp.h> + #include <linux/slab.h> +@@ -214,6 +216,19 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) + return true; + } + ++/* ++ * If a given speculation mitigation is opt-in (prctl()-controlled), ++ * select it, by disabling speculation (enabling mitigation). ++ */ ++static inline void spec_mitigate(struct task_struct *task, ++ unsigned long which) ++{ ++ int state = arch_prctl_spec_ctrl_get(task, which); ++ ++ if (state > 0 && (state & PR_SPEC_PRCTL)) ++ arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); ++} ++ + static inline void seccomp_assign_mode(struct task_struct *task, + unsigned long seccomp_mode) + { +@@ -225,6 +240,8 @@ static inline void seccomp_assign_mode(struct task_struct *task, + * filter) is set. + */ + smp_mb__before_atomic(); ++ /* Assume seccomp processes want speculation flaw mitigation. */ ++ spec_mitigate(task, PR_SPEC_STORE_BYPASS); + set_tsk_thread_flag(task, TIF_SECCOMP); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch new file mode 100644 index 00000000..a7a60b69 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch @@ -0,0 +1,33 @@ +From 62722a97a6aeb1ebba9b749068ed6e9eaecceb37 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri, 4 May 2018 09:40:03 +0200 +Subject: [PATCH 53/93] seccomp: Use PR_SPEC_FORCE_DISABLE + +commit b849a812f7eb92e96d1c8239b06581b2cfd8b275 upstream + +Use PR_SPEC_FORCE_DISABLE in seccomp() because seccomp does not allow to +widen restrictions. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/seccomp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/seccomp.c b/kernel/seccomp.c +index 1d3078b..a0bd6ea 100644 +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -226,7 +226,7 @@ static inline void spec_mitigate(struct task_struct *task, + int state = arch_prctl_spec_ctrl_get(task, which); + + if (state > 0 && (state & PR_SPEC_PRCTL)) +- arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); ++ arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); + } + + static inline void seccomp_assign_mode(struct task_struct *task, +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch new file mode 100644 index 00000000..17012902 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch @@ -0,0 +1,222 @@ +From ed34265c5f460b645a0669079fbc6ad094c83c96 Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Thu, 3 May 2018 14:56:12 -0700 +Subject: [PATCH 54/93] seccomp: Add filter flag to opt-out of SSB mitigation + +commit 00a02d0c502a06d15e07b857f8ff921e3e402675 upstream + +If a seccomp user is not interested in Speculative Store Bypass mitigation +by default, it can set the new SECCOMP_FILTER_FLAG_SPEC_ALLOW flag when +adding filters. + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/seccomp.h | 3 +- + include/uapi/linux/seccomp.h | 4 +- + kernel/seccomp.c | 19 ++++--- + tools/testing/selftests/seccomp/seccomp_bpf.c | 78 ++++++++++++++++++++++++++- + 4 files changed, 93 insertions(+), 11 deletions(-) + +diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h +index ecc296c..50c460a 100644 +--- a/include/linux/seccomp.h ++++ b/include/linux/seccomp.h +@@ -3,7 +3,8 @@ + + #include <uapi/linux/seccomp.h> + +-#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) ++#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ ++ SECCOMP_FILTER_FLAG_SPEC_ALLOW) + + #ifdef CONFIG_SECCOMP + +diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h +index 0f238a4..e4acb61 100644 +--- a/include/uapi/linux/seccomp.h ++++ b/include/uapi/linux/seccomp.h +@@ -15,7 +15,9 @@ + #define SECCOMP_SET_MODE_FILTER 1 + + /* Valid flags for SECCOMP_SET_MODE_FILTER */ +-#define SECCOMP_FILTER_FLAG_TSYNC 1 ++#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) ++/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */ ++#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) + + /* + * All BPF programs must return a 32-bit value. +diff --git a/kernel/seccomp.c b/kernel/seccomp.c +index a0bd6ea..62a60e7 100644 +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -230,7 +230,8 @@ static inline void spec_mitigate(struct task_struct *task, + } + + static inline void seccomp_assign_mode(struct task_struct *task, +- unsigned long seccomp_mode) ++ unsigned long seccomp_mode, ++ unsigned long flags) + { + assert_spin_locked(&task->sighand->siglock); + +@@ -240,8 +241,9 @@ static inline void seccomp_assign_mode(struct task_struct *task, + * filter) is set. + */ + smp_mb__before_atomic(); +- /* Assume seccomp processes want speculation flaw mitigation. */ +- spec_mitigate(task, PR_SPEC_STORE_BYPASS); ++ /* Assume default seccomp processes want spec flaw mitigation. */ ++ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) ++ spec_mitigate(task, PR_SPEC_STORE_BYPASS); + set_tsk_thread_flag(task, TIF_SECCOMP); + } + +@@ -309,7 +311,7 @@ static inline pid_t seccomp_can_sync_threads(void) + * without dropping the locks. + * + */ +-static inline void seccomp_sync_threads(void) ++static inline void seccomp_sync_threads(unsigned long flags) + { + struct task_struct *thread, *caller; + +@@ -350,7 +352,8 @@ static inline void seccomp_sync_threads(void) + * allow one thread to transition the other. + */ + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) +- seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); ++ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER, ++ flags); + } + } + +@@ -469,7 +472,7 @@ static long seccomp_attach_filter(unsigned int flags, + + /* Now that the new filter is in place, synchronize to all threads. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) +- seccomp_sync_threads(); ++ seccomp_sync_threads(flags); + + return 0; + } +@@ -729,7 +732,7 @@ static long seccomp_set_mode_strict(void) + #ifdef TIF_NOTSC + disable_TSC(); + #endif +- seccomp_assign_mode(current, seccomp_mode); ++ seccomp_assign_mode(current, seccomp_mode, 0); + ret = 0; + + out: +@@ -787,7 +790,7 @@ static long seccomp_set_mode_filter(unsigned int flags, + /* Do not free the successfully attached filter. */ + prepared = NULL; + +- seccomp_assign_mode(current, seccomp_mode); ++ seccomp_assign_mode(current, seccomp_mode, flags); + out: + spin_unlock_irq(¤t->sighand->siglock); + if (flags & SECCOMP_FILTER_FLAG_TSYNC) +diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c +index 03f1fa4..3362f11 100644 +--- a/tools/testing/selftests/seccomp/seccomp_bpf.c ++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c +@@ -1684,7 +1684,11 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) + #endif + + #ifndef SECCOMP_FILTER_FLAG_TSYNC +-#define SECCOMP_FILTER_FLAG_TSYNC 1 ++#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) ++#endif ++ ++#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW ++#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) + #endif + + #ifndef seccomp +@@ -1783,6 +1787,78 @@ TEST(seccomp_syscall_mode_lock) + } + } + ++/* ++ * Test detection of known and unknown filter flags. Userspace needs to be able ++ * to check if a filter flag is supported by the current kernel and a good way ++ * of doing that is by attempting to enter filter mode, with the flag bit in ++ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates ++ * that the flag is valid and EINVAL indicates that the flag is invalid. ++ */ ++TEST(detect_seccomp_filter_flags) ++{ ++ unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, ++ SECCOMP_FILTER_FLAG_SPEC_ALLOW }; ++ unsigned int flag, all_flags; ++ int i; ++ long ret; ++ ++ /* Test detection of known-good filter flags */ ++ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { ++ int bits = 0; ++ ++ flag = flags[i]; ++ /* Make sure the flag is a single bit! */ ++ while (flag) { ++ if (flag & 0x1) ++ bits ++; ++ flag >>= 1; ++ } ++ ASSERT_EQ(1, bits); ++ flag = flags[i]; ++ ++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ++ ASSERT_NE(ENOSYS, errno) { ++ TH_LOG("Kernel does not support seccomp syscall!"); ++ } ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EFAULT, errno) { ++ TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", ++ flag); ++ } ++ ++ all_flags |= flag; ++ } ++ ++ /* Test detection of all known-good filter flags */ ++ ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EFAULT, errno) { ++ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", ++ all_flags); ++ } ++ ++ /* Test detection of an unknown filter flag */ ++ flag = -1; ++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EINVAL, errno) { ++ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", ++ flag); ++ } ++ ++ /* ++ * Test detection of an unknown filter flag that may simply need to be ++ * added to this test ++ */ ++ flag = flags[ARRAY_SIZE(flags) - 1] << 1; ++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EINVAL, errno) { ++ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", ++ flag); ++ } ++} ++ + TEST(TSYNC_first) + { + struct sock_filter filter[] = { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-seccomp-Move-speculation-migitation-control-to-arch-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-seccomp-Move-speculation-migitation-control-to-arch-.patch new file mode 100644 index 00000000..ca98b862 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0055-seccomp-Move-speculation-migitation-control-to-arch-.patch @@ -0,0 +1,121 @@ +From 2a4ae48837c977605ea36a01ed63fa8638e4c881 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri, 4 May 2018 15:12:06 +0200 +Subject: [PATCH 55/93] seccomp: Move speculation migitation control to arch + code + +commit 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc upstream + +The migitation control is simpler to implement in architecture code as it +avoids the extra function call to check the mode. Aside of that having an +explicit seccomp enabled mode in the architecture mitigations would require +even more workarounds. + +Move it into architecture code and provide a weak function in the seccomp +code. Remove the 'which' argument as this allows the architecture to decide +which mitigations are relevant for seccomp. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 29 ++++++++++++++++++----------- + include/linux/nospec.h | 2 ++ + kernel/seccomp.c | 15 ++------------- + 3 files changed, 22 insertions(+), 24 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index fdbd8e5..131617d 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -568,6 +568,24 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) + return 0; + } + ++int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, ++ unsigned long ctrl) ++{ ++ switch (which) { ++ case PR_SPEC_STORE_BYPASS: ++ return ssb_prctl_set(task, ctrl); ++ default: ++ return -ENODEV; ++ } ++} ++ ++#ifdef CONFIG_SECCOMP ++void arch_seccomp_spec_mitigate(struct task_struct *task) ++{ ++ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); ++} ++#endif ++ + static int ssb_prctl_get(struct task_struct *task) + { + switch (ssb_mode) { +@@ -586,17 +604,6 @@ static int ssb_prctl_get(struct task_struct *task) + } + } + +-int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, +- unsigned long ctrl) +-{ +- switch (which) { +- case PR_SPEC_STORE_BYPASS: +- return ssb_prctl_set(task, ctrl); +- default: +- return -ENODEV; +- } +-} +- + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + { + switch (which) { +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index a908c95..0c5ef54 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); ++/* Speculation control for seccomp enforced mitigation */ ++void arch_seccomp_spec_mitigate(struct task_struct *task); + + #endif /* _LINUX_NOSPEC_H */ +diff --git a/kernel/seccomp.c b/kernel/seccomp.c +index 62a60e7..3975856 100644 +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -216,18 +216,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) + return true; + } + +-/* +- * If a given speculation mitigation is opt-in (prctl()-controlled), +- * select it, by disabling speculation (enabling mitigation). +- */ +-static inline void spec_mitigate(struct task_struct *task, +- unsigned long which) +-{ +- int state = arch_prctl_spec_ctrl_get(task, which); +- +- if (state > 0 && (state & PR_SPEC_PRCTL)) +- arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); +-} ++void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } + + static inline void seccomp_assign_mode(struct task_struct *task, + unsigned long seccomp_mode, +@@ -243,7 +232,7 @@ static inline void seccomp_assign_mode(struct task_struct *task, + smp_mb__before_atomic(); + /* Assume default seccomp processes want spec flaw mitigation. */ + if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) +- spec_mitigate(task, PR_SPEC_STORE_BYPASS); ++ arch_seccomp_spec_mitigate(task); + set_tsk_thread_flag(task, TIF_SECCOMP); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch new file mode 100644 index 00000000..21edf610 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch @@ -0,0 +1,166 @@ +From c9379df089e45eab50820798e3e98aee3b1e5adf Mon Sep 17 00:00:00 2001 +From: Kees Cook <keescook@chromium.org> +Date: Thu, 3 May 2018 14:37:54 -0700 +Subject: [PATCH 56/93] x86/speculation: Make "seccomp" the default mode for + Speculative Store Bypass + +commit f21b53b20c754021935ea43364dbf53778eeba32 upstream + +Unless explicitly opted out of, anything running under seccomp will have +SSB mitigations enabled. Choosing the "prctl" mode will disable this. + +[ tglx: Adjusted it to the new arch_seccomp_spec_mitigate() mechanism ] + +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + Documentation/kernel-parameters.txt | 26 +++++++++++++++++--------- + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 32 +++++++++++++++++++++++--------- + 3 files changed, 41 insertions(+), 18 deletions(-) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 80811df..2c5df33 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -3986,19 +3986,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + This parameter controls whether the Speculative Store + Bypass optimization is used. + +- on - Unconditionally disable Speculative Store Bypass +- off - Unconditionally enable Speculative Store Bypass +- auto - Kernel detects whether the CPU model contains an +- implementation of Speculative Store Bypass and +- picks the most appropriate mitigation. +- prctl - Control Speculative Store Bypass per thread +- via prctl. Speculative Store Bypass is enabled +- for a process by default. The state of the control +- is inherited on fork. ++ on - Unconditionally disable Speculative Store Bypass ++ off - Unconditionally enable Speculative Store Bypass ++ auto - Kernel detects whether the CPU model contains an ++ implementation of Speculative Store Bypass and ++ picks the most appropriate mitigation. If the ++ CPU is not vulnerable, "off" is selected. If the ++ CPU is vulnerable the default mitigation is ++ architecture and Kconfig dependent. See below. ++ prctl - Control Speculative Store Bypass per thread ++ via prctl. Speculative Store Bypass is enabled ++ for a process by default. The state of the control ++ is inherited on fork. ++ seccomp - Same as "prctl" above, but all seccomp threads ++ will disable SSB unless they explicitly opt out. + + Not specifying this option is equivalent to + spec_store_bypass_disable=auto. + ++ Default mitigations: ++ X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" ++ + spia_io_base= [HW,MTD] + spia_fio_base= + spia_pedr= +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 71ad014..328ea3c 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -233,6 +233,7 @@ enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, ++ SPEC_STORE_BYPASS_SECCOMP, + }; + + extern char __indirect_thunk_start[]; +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 131617d..9a3bb65 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -415,22 +415,25 @@ enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, ++ SPEC_STORE_BYPASS_CMD_SECCOMP, + }; + + static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", +- [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" ++ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", ++ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", + }; + + static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; + } ssb_mitigation_options[] = { +- { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ +- { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ +- { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ +- { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ ++ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ ++ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ ++ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ ++ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ ++ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ + }; + + static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +@@ -480,8 +483,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: +- /* Choose prctl as the default mode */ +- mode = SPEC_STORE_BYPASS_PRCTL; ++ case SPEC_STORE_BYPASS_CMD_SECCOMP: ++ /* ++ * Choose prctl+seccomp as the default mode if seccomp is ++ * enabled. ++ */ ++ if (IS_ENABLED(CONFIG_SECCOMP)) ++ mode = SPEC_STORE_BYPASS_SECCOMP; ++ else ++ mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; +@@ -529,12 +539,14 @@ static void ssb_select_mitigation() + } + + #undef pr_fmt ++#define pr_fmt(fmt) "Speculation prctl: " fmt + + static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) + { + bool update; + +- if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) ++ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ++ ssb_mode != SPEC_STORE_BYPASS_SECCOMP) + return -ENXIO; + + switch (ctrl) { +@@ -582,7 +594,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + #ifdef CONFIG_SECCOMP + void arch_seccomp_spec_mitigate(struct task_struct *task) + { +- ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); ++ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ++ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + } + #endif + +@@ -591,6 +604,7 @@ static int ssb_prctl_get(struct task_struct *task) + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; ++ case SPEC_STORE_BYPASS_SECCOMP: + case SPEC_STORE_BYPASS_PRCTL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-bugs-Rename-_RDS-to-_SSBD.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-bugs-Rename-_RDS-to-_SSBD.patch new file mode 100644 index 00000000..189588aa --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0057-x86-bugs-Rename-_RDS-to-_SSBD.patch @@ -0,0 +1,405 @@ +From 4bb9a717246aa3019a3d97904e29c4da0bfc37f9 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 9 May 2018 21:41:38 +0200 +Subject: [PATCH 57/93] x86/bugs: Rename _RDS to _SSBD + +commit 9f65fb29374ee37856dbad847b4e121aab72b510 upstream + +Intel collateral will reference the SSB mitigation bit in IA32_SPEC_CTL[2] +as SSBD (Speculative Store Bypass Disable). + +Hence changing it. + +It is unclear yet what the MSR_IA32_ARCH_CAPABILITIES (0x10a) Bit(4) name +is going to be. Following the rename it would be SSBD_NO but that rolls out +to Speculative Store Bypass Disable No. + +Also fixed the missing space in X86_FEATURE_AMD_SSBD. + +[ tglx: Fixup x86_amd_rds_enable() and rds_tif_to_amd_ls_cfg() as well ] + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 4 ++-- + arch/x86/include/asm/msr-index.h | 10 +++++----- + arch/x86/include/asm/spec-ctrl.h | 12 ++++++------ + arch/x86/include/asm/thread_info.h | 6 +++--- + arch/x86/kernel/cpu/amd.c | 14 +++++++------- + arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++------------------ + arch/x86/kernel/cpu/common.c | 2 +- + arch/x86/kernel/cpu/intel.c | 2 +- + arch/x86/kernel/process.c | 8 ++++---- + arch/x86/kvm/cpuid.c | 2 +- + arch/x86/kvm/cpuid.h | 2 +- + arch/x86/kvm/vmx.c | 2 +- + 12 files changed, 50 insertions(+), 50 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 8797069..0ed8ea5 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -205,7 +205,7 @@ + #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ + #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ + #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +-#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ ++#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +@@ -308,7 +308,7 @@ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +-#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ ++#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ + + /* + * BUG word(s) +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 7ad3ed9..0145a0b 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -40,8 +40,8 @@ + #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ + #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ + #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +-#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ +-#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ ++#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ ++#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +@@ -63,10 +63,10 @@ + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ + #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +-#define ARCH_CAP_RDS_NO (1 << 4) /* ++#define ARCH_CAP_SSBD_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass +- * attack, so no Reduced Data Speculation control +- * required. ++ * attack, so no Speculative Store Bypass ++ * control required. + */ + + #define MSR_IA32_BBL_CR_CTL 0x00000119 +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index 45ef00a..dc21209 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -17,20 +17,20 @@ extern void x86_spec_ctrl_restore_host(u64); + + /* AMD specific Speculative Store Bypass MSR data */ + extern u64 x86_amd_ls_cfg_base; +-extern u64 x86_amd_ls_cfg_rds_mask; ++extern u64 x86_amd_ls_cfg_ssbd_mask; + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; + +-static inline u64 rds_tif_to_spec_ctrl(u64 tifn) ++static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) + { +- BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); +- return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); ++ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); ++ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); + } + +-static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) ++static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) + { +- return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; ++ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; + } + + extern void speculative_store_bypass_update(void); +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index 661afac..2d8788a 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -83,7 +83,7 @@ struct thread_info { + #define TIF_SIGPENDING 2 /* signal pending */ + #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ + #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +-#define TIF_RDS 5 /* Reduced data speculation */ ++#define TIF_SSBD 5 /* Reduced data speculation */ + #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ + #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ + #define TIF_SECCOMP 8 /* secure computing */ +@@ -107,7 +107,7 @@ struct thread_info { + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) + #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) + #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +-#define _TIF_RDS (1 << TIF_RDS) ++#define _TIF_SSBD (1 << TIF_SSBD) + #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) + #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) +@@ -141,7 +141,7 @@ struct thread_info { + + /* flags to check in __switch_to() */ + #define _TIF_WORK_CTXSW \ +- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) ++ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) + + #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index a176c81..acb2fcc 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -555,12 +555,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) + } + /* + * Try to cache the base value so further operations can +- * avoid RMW. If that faults, do not enable RDS. ++ * avoid RMW. If that faults, do not enable SSBD. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { +- setup_force_cpu_cap(X86_FEATURE_RDS); +- setup_force_cpu_cap(X86_FEATURE_AMD_RDS); +- x86_amd_ls_cfg_rds_mask = 1ULL << bit; ++ setup_force_cpu_cap(X86_FEATURE_SSBD); ++ setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); ++ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; + } + } + } +@@ -849,9 +849,9 @@ static void init_amd(struct cpuinfo_x86 *c) + if (!cpu_has(c, X86_FEATURE_XENPV)) + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + +- if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { +- set_cpu_cap(c, X86_FEATURE_RDS); +- set_cpu_cap(c, X86_FEATURE_AMD_RDS); ++ if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { ++ set_cpu_cap(c, X86_FEATURE_SSBD); ++ set_cpu_cap(c, X86_FEATURE_AMD_SSBD); + } + } + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 9a3bb65..ae6f9ba 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -44,10 +44,10 @@ static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; + + /* + * AMD specific MSR info for Speculative Store Bypass control. +- * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). ++ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). + */ + u64 __ro_after_init x86_amd_ls_cfg_base; +-u64 __ro_after_init x86_amd_ls_cfg_rds_mask; ++u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; + + void __init check_bugs(void) + { +@@ -145,7 +145,7 @@ u64 x86_spec_ctrl_get_default(void) + u64 msrval = x86_spec_ctrl_base; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) +- msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); ++ msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + return msrval; + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); +@@ -158,7 +158,7 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) + return; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) +- host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); ++ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); +@@ -173,18 +173,18 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) + return; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) +- host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); ++ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, host); + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); + +-static void x86_amd_rds_enable(void) ++static void x86_amd_ssb_disable(void) + { +- u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; ++ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; + +- if (boot_cpu_has(X86_FEATURE_AMD_RDS)) ++ if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); + } + +@@ -472,7 +472,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; + +- if (!boot_cpu_has(X86_FEATURE_RDS)) ++ if (!boot_cpu_has(X86_FEATURE_SSBD)) + return mode; + + cmd = ssb_parse_cmdline(); +@@ -506,7 +506,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. +- * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass ++ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode == SPEC_STORE_BYPASS_DISABLE) { +@@ -517,12 +517,12 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: +- x86_spec_ctrl_base |= SPEC_CTRL_RDS; +- x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; +- x86_spec_ctrl_set(SPEC_CTRL_RDS); ++ x86_spec_ctrl_base |= SPEC_CTRL_SSBD; ++ x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; ++ x86_spec_ctrl_set(SPEC_CTRL_SSBD); + break; + case X86_VENDOR_AMD: +- x86_amd_rds_enable(); ++ x86_amd_ssb_disable(); + break; + } + } +@@ -555,16 +555,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); +- update = test_and_clear_tsk_thread_flag(task, TIF_RDS); ++ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); +- update = !test_and_set_tsk_thread_flag(task, TIF_RDS); ++ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); +- update = !test_and_set_tsk_thread_flag(task, TIF_RDS); ++ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + default: + return -ERANGE; +@@ -634,7 +634,7 @@ void x86_spec_ctrl_setup_ap(void) + x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) +- x86_amd_rds_enable(); ++ x86_amd_ssb_disable(); + } + + #ifdef CONFIG_SYSFS +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index beb1da8..d0dd736 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -911,7 +911,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + if (!x86_match_cpu(cpu_no_spec_store_bypass) && +- !(ia32_cap & ARCH_CAP_RDS_NO)) ++ !(ia32_cap & ARCH_CAP_SSBD_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + + if (x86_match_cpu(cpu_no_speculation)) +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index f15aea6..047adaa 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -154,7 +154,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); +- setup_clear_cpu_cap(X86_FEATURE_RDS); ++ setup_clear_cpu_cap(X86_FEATURE_SSBD); + } + + /* +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 9c48e18..c344230 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -207,11 +207,11 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn + { + u64 msr; + +- if (static_cpu_has(X86_FEATURE_AMD_RDS)) { +- msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); ++ if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { ++ msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + } else { +- msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); ++ msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } + } +@@ -250,7 +250,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + if ((tifp ^ tifn) & _TIF_NOTSC) + cr4_toggle_bits(X86_CR4_TSD); + +- if ((tifp ^ tifn) & _TIF_RDS) ++ if ((tifp ^ tifn) & _TIF_SSBD) + __speculative_store_bypass_update(tifn); + } + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 237e926..db95637 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = +- F(SPEC_CTRL) | F(RDS) | F(ARCH_CAPABILITIES); ++ F(SPEC_CTRL) | F(SSBD) | F(ARCH_CAPABILITIES); + + /* all calls to cpuid_count() should be made on the same cpu */ + get_cpu(); +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index 39dd457..72551c5 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -171,7 +171,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) + if (best && (best->ebx & bit(X86_FEATURE_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); +- return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_RDS))); ++ return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); + } + + static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 17199dc..c7df5c4 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3133,7 +3133,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ +- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + return 1; + + vmx->spec_ctrl = data; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch new file mode 100644 index 00000000..f24bec49 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch @@ -0,0 +1,35 @@ +From 1372f3493fdf1eaaeb82c4f3770a38aad5541f3b Mon Sep 17 00:00:00 2001 +From: Jiri Kosina <jkosina@suse.cz> +Date: Thu, 10 May 2018 22:47:18 +0200 +Subject: [PATCH 58/93] x86/bugs: Fix __ssb_select_mitigation() return type + +commit d66d8ff3d21667b41eddbe86b35ab411e40d8c5f upstream + +__ssb_select_mitigation() returns one of the members of enum ssb_mitigation, +not ssb_mitigation_cmd; fix the prototype to reflect that. + +Fixes: 24f7fc83b9204 ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") +Signed-off-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index ae6f9ba..c7b4d11 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -467,7 +467,7 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) + return cmd; + } + +-static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) ++static enum ssb_mitigation __init __ssb_select_mitigation(void) + { + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-bugs-Make-cpu_show_common-static.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-bugs-Make-cpu_show_common-static.patch new file mode 100644 index 00000000..5dc616b5 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0059-x86-bugs-Make-cpu_show_common-static.patch @@ -0,0 +1,34 @@ +From 3c2ec124e35d5a74d3ed660095591290dc1d549b Mon Sep 17 00:00:00 2001 +From: Jiri Kosina <jkosina@suse.cz> +Date: Thu, 10 May 2018 22:47:32 +0200 +Subject: [PATCH 59/93] x86/bugs: Make cpu_show_common() static + +commit 7bb4d366cba992904bffa4820d24e70a3de93e76 upstream + +cpu_show_common() is not used outside of arch/x86/kernel/cpu/bugs.c, so +make it static. + +Signed-off-by: Jiri Kosina <jkosina@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index c7b4d11..8187642 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -639,7 +639,7 @@ void x86_spec_ctrl_setup_ap(void) + + #ifdef CONFIG_SYSFS + +-ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, ++static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) + { + if (!boot_cpu_has_bug(bug)) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch new file mode 100644 index 00000000..ef9f4216 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch @@ -0,0 +1,42 @@ +From 947d5d98fb1328a22a8b502f8ce6f8e5657a5ec7 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Fri, 11 May 2018 16:50:35 -0400 +Subject: [PATCH 60/93] x86/bugs: Fix the parameters alignment and missing void + +commit ffed645e3be0e32f8e9ab068d257aee8d0fe8eec upstream + +Fixes: 7bb4d366c ("x86/bugs: Make cpu_show_common() static") +Fixes: 24f7fc83b ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 8187642..4f8c88e 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -530,7 +530,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + return mode; + } + +-static void ssb_select_mitigation() ++static void ssb_select_mitigation(void) + { + ssb_mode = __ssb_select_mitigation(); + +@@ -640,7 +640,7 @@ void x86_spec_ctrl_setup_ap(void) + #ifdef CONFIG_SYSFS + + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, +- char *buf, unsigned int bug) ++ char *buf, unsigned int bug) + { + if (!boot_cpu_has_bug(bug)) + return sprintf(buf, "Not affected\n"); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch new file mode 100644 index 00000000..1f830819 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch @@ -0,0 +1,42 @@ +From 76eefada90172bd111371bd2669a50eec64a3b0f Mon Sep 17 00:00:00 2001 +From: Jim Mattson <jmattson@google.com> +Date: Sun, 13 May 2018 17:33:57 -0400 +Subject: [PATCH 61/93] x86/cpu: Make alternative_msr_write work for 32-bit + code + +commit 5f2b745f5e1304f438f9b2cd03ebc8120b6e0d3b upstream + +Cast val and (val >> 32) to (u32), so that they fit in a +general-purpose register in both 32-bit and 64-bit code. + +[ tglx: Made it u32 instead of uintptr_t ] + +Fixes: c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") +Signed-off-by: Jim Mattson <jmattson@google.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 328ea3c..bc258e6 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -265,8 +265,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) + { + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), +- "a" (val), +- "d" (val >> 32), ++ "a" ((u32)val), ++ "d" ((u32)(val >> 32)), + [feature] "i" (feature) + : "memory"); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch new file mode 100644 index 00000000..75caec43 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch @@ -0,0 +1,70 @@ +From 21d2555ad333e693fc6859bff2a60b9b24de8d99 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri, 11 May 2018 15:21:01 +0200 +Subject: [PATCH 62/93] KVM: SVM: Move spec control call after restore of GS + +commit 15e6c22fd8e5a42c5ed6d487b7c9fe44c2517765 upstream + +svm_vcpu_run() invokes x86_spec_ctrl_restore_host() after VMEXIT, but +before the host GS is restored. x86_spec_ctrl_restore_host() uses 'current' +to determine the host SSBD state of the thread. 'current' is GS based, but +host GS is not yet restored and the access causes a triple fault. + +Move the call after the host GS restore. + +Fixes: 885f82bfbc6f x86/process: Allow runtime control of Speculative Store Bypass +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Acked-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/svm.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 47779f5..9991462 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -4999,6 +4999,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + #endif + ); + ++ /* Eliminate branch target predictions from guest mode */ ++ vmexit_fill_RSB(); ++ ++#ifdef CONFIG_X86_64 ++ wrmsrl(MSR_GS_BASE, svm->host.gs_base); ++#else ++ loadsegment(fs, svm->host.fs); ++#ifndef CONFIG_X86_32_LAZY_GS ++ loadsegment(gs, svm->host.gs); ++#endif ++#endif ++ + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and +@@ -5019,18 +5031,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + + x86_spec_ctrl_restore_host(svm->spec_ctrl); + +- /* Eliminate branch target predictions from guest mode */ +- vmexit_fill_RSB(); +- +-#ifdef CONFIG_X86_64 +- wrmsrl(MSR_GS_BASE, svm->host.gs_base); +-#else +- loadsegment(fs, svm->host.fs); +-#ifndef CONFIG_X86_32_LAZY_GS +- loadsegment(gs, svm->host.gs); +-#endif +-#endif +- + reload_tss(vcpu); + + local_irq_disable(); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch new file mode 100644 index 00000000..a004c9a0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch @@ -0,0 +1,156 @@ +From 471e61fb50a8b552bf18db27c7ff9808182008dd Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Wed, 2 May 2018 18:15:14 +0200 +Subject: [PATCH 63/93] x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit e7c587da125291db39ddf1f49b18e5970adbac17 upstream + +Intel and AMD have different CPUID bits hence for those use synthetic bits +which get set on the respective vendor's in init_speculation_control(). So +that debacles like what the commit message of + + c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") + +talks about don't happen anymore. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Tested-by: Jörg Otte <jrg.otte@gmail.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> +Link: https://lkml.kernel.org/r/20180504161815.GG9257@pd.tnic +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 12 ++++++++---- + arch/x86/kernel/cpu/common.c | 14 ++++++++++---- + arch/x86/kvm/cpuid.c | 10 +++++----- + arch/x86/kvm/cpuid.h | 4 ++-- + 4 files changed, 25 insertions(+), 15 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 0ed8ea5..059437a 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -205,7 +205,10 @@ + #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ + #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ + #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +-#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ ++#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ ++#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ ++#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ ++#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +@@ -263,9 +266,9 @@ + /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ + #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ + #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +-#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +-#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +-#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ ++#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ ++#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ ++#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +@@ -301,6 +304,7 @@ + #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ + #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ + ++ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ + #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index d0dd736..67bfa3c 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -725,17 +725,23 @@ static void init_speculation_control(struct cpuinfo_x86 *c) + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. +- * +- * We use the AMD bits in 0x8000_0008 EBX as the generic hardware +- * features, which are visible in /proc/cpuinfo and used by the +- * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } ++ + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); ++ ++ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) ++ set_cpu_cap(c, X86_FEATURE_IBRS); ++ ++ if (cpu_has(c, X86_FEATURE_AMD_IBPB)) ++ set_cpu_cap(c, X86_FEATURE_IBPB); ++ ++ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) ++ set_cpu_cap(c, X86_FEATURE_STIBP); + } + + void get_cpu_cap(struct cpuinfo_x86 *c) +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index db95637..4ccdfbe 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = +- F(IBPB) | F(IBRS); ++ F(AMD_IBPB) | F(AMD_IBRS); + + /* cpuid 0xC0000001.edx */ + const u32 kvm_cpuid_C000_0001_edx_x86_features = +@@ -619,10 +619,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + entry->eax = g_phys_as | (virt_as << 8); + entry->edx = 0; + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ +- if (boot_cpu_has(X86_FEATURE_IBPB)) +- entry->ebx |= F(IBPB); +- if (boot_cpu_has(X86_FEATURE_IBRS)) +- entry->ebx |= F(IBRS); ++ if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) ++ entry->ebx |= F(AMD_IBPB); ++ if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) ++ entry->ebx |= F(AMD_IBRS); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + break; +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index 72551c5..410070c 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -157,7 +157,7 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); +- if (best && (best->ebx & bit(X86_FEATURE_IBPB))) ++ if (best && (best->ebx & bit(X86_FEATURE_AMD_IBPB))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +@@ -168,7 +168,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); +- if (best && (best->ebx & bit(X86_FEATURE_IBRS))) ++ if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch new file mode 100644 index 00000000..b84bc768 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch @@ -0,0 +1,155 @@ +From 7731d9040d16874cb3fe11f52c4a238ab3fd658d Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 10 May 2018 19:13:18 +0200 +Subject: [PATCH 64/93] x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration + from IBRS + +commit 7eb8956a7fec3c1f0abc2a5517dada99ccc8a961 upstream + +The availability of the SPEC_CTRL MSR is enumerated by a CPUID bit on +Intel and implied by IBRS or STIBP support on AMD. That's just confusing +and in case an AMD CPU has IBRS not supported because the underlying +problem has been fixed but has another bit valid in the SPEC_CTRL MSR, +the thing falls apart. + +Add a synthetic feature bit X86_FEATURE_MSR_SPEC_CTRL to denote the +availability on both Intel and AMD. + +While at it replace the boot_cpu_has() checks with static_cpu_has() where +possible. This prevents late microcode loading from exposing SPEC_CTRL, but +late loading is already very limited as it does not reevaluate the +mitigation options and other bits and pieces. Having static_cpu_has() is +the simplest and least fragile solution. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 2 ++ + arch/x86/kernel/cpu/bugs.c | 18 +++++++++++------- + arch/x86/kernel/cpu/common.c | 9 +++++++-- + arch/x86/kernel/cpu/intel.c | 1 + + 4 files changed, 21 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 059437a..ca0f33f 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -197,6 +197,8 @@ + #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + ++#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ ++ + #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ + + /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 4f8c88e..59649310 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -63,7 +63,7 @@ void __init check_bugs(void) + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. + */ +- if (boot_cpu_has(X86_FEATURE_IBRS)) ++ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + /* Select the proper spectre mitigation before patching alternatives */ +@@ -144,7 +144,7 @@ u64 x86_spec_ctrl_get_default(void) + { + u64 msrval = x86_spec_ctrl_base; + +- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + return msrval; + } +@@ -154,10 +154,12 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) + { + u64 host = x86_spec_ctrl_base; + +- if (!boot_cpu_has(X86_FEATURE_IBRS)) ++ /* Is MSR_SPEC_CTRL implemented ? */ ++ if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + return; + +- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ /* Intel controls SSB in MSR_SPEC_CTRL */ ++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) +@@ -169,10 +171,12 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) + { + u64 host = x86_spec_ctrl_base; + +- if (!boot_cpu_has(X86_FEATURE_IBRS)) ++ /* Is MSR_SPEC_CTRL implemented ? */ ++ if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + return; + +- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ /* Intel controls SSB in MSR_SPEC_CTRL */ ++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) +@@ -630,7 +634,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + + void x86_spec_ctrl_setup_ap(void) + { +- if (boot_cpu_has(X86_FEATURE_IBRS)) ++ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 67bfa3c..04362282 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -729,19 +729,24 @@ static void init_speculation_control(struct cpuinfo_x86 *c) + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); ++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } + + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + +- if (cpu_has(c, X86_FEATURE_AMD_IBRS)) ++ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { + set_cpu_cap(c, X86_FEATURE_IBRS); ++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); ++ } + + if (cpu_has(c, X86_FEATURE_AMD_IBPB)) + set_cpu_cap(c, X86_FEATURE_IBPB); + +- if (cpu_has(c, X86_FEATURE_AMD_STIBP)) ++ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { + set_cpu_cap(c, X86_FEATURE_STIBP); ++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); ++ } + } + + void get_cpu_cap(struct cpuinfo_x86 *c) +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 047adaa..7f495e8 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -153,6 +153,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); ++ setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SSBD); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch new file mode 100644 index 00000000..84d35057 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch @@ -0,0 +1,163 @@ +From f8a3968ae9a100977e28f434f303fd74a0a8591b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 10 May 2018 20:21:36 +0200 +Subject: [PATCH 65/93] x86/cpufeatures: Disentangle SSBD enumeration + +commit 52817587e706686fcdb27f14c1b000c92f266c96 upstream + +The SSBD enumeration is similarly to the other bits magically shared +between Intel and AMD though the mechanisms are different. + +Make X86_FEATURE_SSBD synthetic and set it depending on the vendor specific +features or family dependent setup. + +Change the Intel bit to X86_FEATURE_SPEC_CTRL_SSBD to denote that SSBD is +controlled via MSR_SPEC_CTRL and fix up the usage sites. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 5 +++-- + arch/x86/kernel/cpu/amd.c | 7 +------ + arch/x86/kernel/cpu/bugs.c | 10 +++++----- + arch/x86/kernel/cpu/common.c | 3 +++ + arch/x86/kernel/cpu/intel.c | 1 + + arch/x86/kernel/process.c | 2 +- + 6 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index ca0f33f..d071767 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -198,6 +198,7 @@ + #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ ++#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ + + #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ + +@@ -207,7 +208,7 @@ + #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ + #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ + #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +-#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ ++#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ + #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ + #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ + #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +@@ -314,7 +315,7 @@ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +-#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ ++#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ + + /* + * BUG word(s) +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index acb2fcc..179d572 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -558,8 +558,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) + * avoid RMW. If that faults, do not enable SSBD. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { ++ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); + setup_force_cpu_cap(X86_FEATURE_SSBD); +- setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; + } + } +@@ -848,11 +848,6 @@ static void init_amd(struct cpuinfo_x86 *c) + /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ + if (!cpu_has(c, X86_FEATURE_XENPV)) + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); +- +- if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { +- set_cpu_cap(c, X86_FEATURE_SSBD); +- set_cpu_cap(c, X86_FEATURE_AMD_SSBD); +- } + } + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 59649310..15a6c58 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -158,8 +158,8 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + return; + +- /* Intel controls SSB in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) ++ /* SSBD controlled in MSR_SPEC_CTRL */ ++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) +@@ -175,8 +175,8 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + return; + +- /* Intel controls SSB in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) ++ /* SSBD controlled in MSR_SPEC_CTRL */ ++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) +@@ -188,7 +188,7 @@ static void x86_amd_ssb_disable(void) + { + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; + +- if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) ++ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); + } + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 04362282..945e841 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -735,6 +735,9 @@ static void init_speculation_control(struct cpuinfo_x86 *c) + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + ++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) ++ set_cpu_cap(c, X86_FEATURE_SSBD); ++ + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 7f495e8..93781e3 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -156,6 +156,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) + setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SSBD); ++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); + } + + /* +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index c344230..b3cd08e 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -207,7 +207,7 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn + { + u64 msr; + +- if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { ++ if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + } else { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch new file mode 100644 index 00000000..b9d9a567 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch @@ -0,0 +1,55 @@ +From b6aa89b4ab638e59beab4c2d264c02dfc887187f Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Thu, 7 Sep 2017 19:08:21 +0200 +Subject: [PATCH 66/93] x86/cpu/AMD: Fix erratum 1076 (CPB bit) + +commit f7f3dc00f61261cdc9ccd8b886f21bc4dffd6fd9 upstream + +CPUID Fn8000_0007_EDX[CPB] is wrongly 0 on models up to B1. But they do +support CPB (AMD's Core Performance Boosting cpufreq CPU feature), so fix that. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Sherry Hurwitz <sherry.hurwitz@amd.com> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20170907170821.16021-1-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/amd.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 179d572..21367b5 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -749,6 +749,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c) + } + } + ++static void init_amd_zn(struct cpuinfo_x86 *c) ++{ ++ /* ++ * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects ++ * all up to and including B1. ++ */ ++ if (c->x86_model <= 1 && c->x86_stepping <= 1) ++ set_cpu_cap(c, X86_FEATURE_CPB); ++} ++ + static void init_amd(struct cpuinfo_x86 *c) + { + u32 dummy; +@@ -779,6 +789,7 @@ static void init_amd(struct cpuinfo_x86 *c) + case 0x10: init_amd_gh(c); break; + case 0x12: init_amd_ln(c); break; + case 0x15: init_amd_bd(c); break; ++ case 0x17: init_amd_zn(c); break; + } + + /* Enable workaround for FXSAVE leak */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-cpufeatures-Add-FEATURE_ZEN.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-cpufeatures-Add-FEATURE_ZEN.patch new file mode 100644 index 00000000..4dc85820 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0067-x86-cpufeatures-Add-FEATURE_ZEN.patch @@ -0,0 +1,48 @@ +From c9b69035094a1cadce0c634ad76ded5a4a033ff6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 10 May 2018 16:26:00 +0200 +Subject: [PATCH 67/93] x86/cpufeatures: Add FEATURE_ZEN + +commit d1035d971829dcf80e8686ccde26f94b0a069472 upstream + +Add a ZEN feature bit so family-dependent static_cpu_has() optimizations +can be built for ZEN. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 2 ++ + arch/x86/kernel/cpu/amd.c | 1 + + 2 files changed, 3 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index d071767..ec87b8c 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -212,6 +212,8 @@ + #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ + #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ + #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ ++#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ ++ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 21367b5..4c2be99 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -751,6 +751,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) + + static void init_amd_zn(struct cpuinfo_x86 *c) + { ++ set_cpu_cap(c, X86_FEATURE_ZEN); + /* + * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects + * all up to and including B1. +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-speculation-Handle-HT-correctly-on-AMD.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-speculation-Handle-HT-correctly-on-AMD.patch new file mode 100644 index 00000000..cb74bad4 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0068-x86-speculation-Handle-HT-correctly-on-AMD.patch @@ -0,0 +1,240 @@ +From cbf0028f2c499e981af020c1cdb6bff7d0b4e192 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Wed, 9 May 2018 21:53:09 +0200 +Subject: [PATCH 68/93] x86/speculation: Handle HT correctly on AMD + +commit 1f50ddb4f4189243c05926b842dc1a0332195f31 upstream + +The AMD64_LS_CFG MSR is a per core MSR on Family 17H CPUs. That means when +hyperthreading is enabled the SSBD bit toggle needs to take both cores into +account. Otherwise the following situation can happen: + +CPU0 CPU1 + +disable SSB + disable SSB + enable SSB <- Enables it for the Core, i.e. for CPU0 as well + +So after the SSB enable on CPU1 the task on CPU0 runs with SSB enabled +again. + +On Intel the SSBD control is per core as well, but the synchronization +logic is implemented behind the per thread SPEC_CTRL MSR. It works like +this: + + CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL + +i.e. if one of the threads enables a mitigation then this affects both and +the mitigation is only disabled in the core when both threads disabled it. + +Add the necessary synchronization logic for AMD family 17H. Unfortunately +that requires a spinlock to serialize the access to the MSR, but the locks +are only shared between siblings. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/spec-ctrl.h | 6 ++ + arch/x86/kernel/process.c | 125 +++++++++++++++++++++++++++++++++++++-- + arch/x86/kernel/smpboot.c | 5 ++ + 3 files changed, 130 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index dc21209..0cb49c4 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -33,6 +33,12 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; + } + ++#ifdef CONFIG_SMP ++extern void speculative_store_bypass_ht_init(void); ++#else ++static inline void speculative_store_bypass_ht_init(void) { } ++#endif ++ + extern void speculative_store_bypass_update(void); + + #endif +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index b3cd08e..1e9d155 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -203,22 +203,135 @@ static inline void switch_to_bitmap(struct tss_struct *tss, + } + } + +-static __always_inline void __speculative_store_bypass_update(unsigned long tifn) ++#ifdef CONFIG_SMP ++ ++struct ssb_state { ++ struct ssb_state *shared_state; ++ raw_spinlock_t lock; ++ unsigned int disable_state; ++ unsigned long local_state; ++}; ++ ++#define LSTATE_SSB 0 ++ ++static DEFINE_PER_CPU(struct ssb_state, ssb_state); ++ ++void speculative_store_bypass_ht_init(void) + { +- u64 msr; ++ struct ssb_state *st = this_cpu_ptr(&ssb_state); ++ unsigned int this_cpu = smp_processor_id(); ++ unsigned int cpu; ++ ++ st->local_state = 0; ++ ++ /* ++ * Shared state setup happens once on the first bringup ++ * of the CPU. It's not destroyed on CPU hotunplug. ++ */ ++ if (st->shared_state) ++ return; ++ ++ raw_spin_lock_init(&st->lock); + +- if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { +- msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); ++ /* ++ * Go over HT siblings and check whether one of them has set up the ++ * shared state pointer already. ++ */ ++ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { ++ if (cpu == this_cpu) ++ continue; ++ ++ if (!per_cpu(ssb_state, cpu).shared_state) ++ continue; ++ ++ /* Link it to the state of the sibling: */ ++ st->shared_state = per_cpu(ssb_state, cpu).shared_state; ++ return; ++ } ++ ++ /* ++ * First HT sibling to come up on the core. Link shared state of ++ * the first HT sibling to itself. The siblings on the same core ++ * which come up later will see the shared state pointer and link ++ * themself to the state of this CPU. ++ */ ++ st->shared_state = st; ++} ++ ++/* ++ * Logic is: First HT sibling enables SSBD for both siblings in the core ++ * and last sibling to disable it, disables it for the whole core. This how ++ * MSR_SPEC_CTRL works in "hardware": ++ * ++ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL ++ */ ++static __always_inline void amd_set_core_ssb_state(unsigned long tifn) ++{ ++ struct ssb_state *st = this_cpu_ptr(&ssb_state); ++ u64 msr = x86_amd_ls_cfg_base; ++ ++ if (!static_cpu_has(X86_FEATURE_ZEN)) { ++ msr |= ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); ++ return; ++ } ++ ++ if (tifn & _TIF_SSBD) { ++ /* ++ * Since this can race with prctl(), block reentry on the ++ * same CPU. ++ */ ++ if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) ++ return; ++ ++ msr |= x86_amd_ls_cfg_ssbd_mask; ++ ++ raw_spin_lock(&st->shared_state->lock); ++ /* First sibling enables SSBD: */ ++ if (!st->shared_state->disable_state) ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++ st->shared_state->disable_state++; ++ raw_spin_unlock(&st->shared_state->lock); + } else { +- msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); +- wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) ++ return; ++ ++ raw_spin_lock(&st->shared_state->lock); ++ st->shared_state->disable_state--; ++ if (!st->shared_state->disable_state) ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++ raw_spin_unlock(&st->shared_state->lock); + } + } ++#else ++static __always_inline void amd_set_core_ssb_state(unsigned long tifn) ++{ ++ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); ++ ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++} ++#endif ++ ++static __always_inline void intel_set_ssb_state(unsigned long tifn) ++{ ++ u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); ++ ++ wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++} ++ ++static __always_inline void __speculative_store_bypass_update(unsigned long tifn) ++{ ++ if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) ++ amd_set_core_ssb_state(tifn); ++ else ++ intel_set_ssb_state(tifn); ++} + + void speculative_store_bypass_update(void) + { ++ preempt_disable(); + __speculative_store_bypass_update(current_thread_info()->flags); ++ preempt_enable(); + } + + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index 36171bc..c898a69 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -75,6 +75,7 @@ + #include <asm/i8259.h> + #include <asm/realmode.h> + #include <asm/misc.h> ++#include <asm/spec-ctrl.h> + + /* Number of siblings per CPU package */ + int smp_num_siblings = 1; +@@ -237,6 +238,8 @@ static void notrace start_secondary(void *unused) + */ + check_tsc_sync_target(); + ++ speculative_store_bypass_ht_init(); ++ + /* + * Lock vector_lock and initialize the vectors on this cpu + * before setting the cpu online. We must set it online with +@@ -1333,6 +1336,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) + set_mtrr_aps_delayed_init(); + + smp_quirk_init_udelay(); ++ ++ speculative_store_bypass_ht_init(); + } + + void arch_enable_nonboot_cpus_begin(void) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch new file mode 100644 index 00000000..e298d3bc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch @@ -0,0 +1,163 @@ +From 77aaa77d68bbabee027737671cdc1318e8dfe763 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Wed, 9 May 2018 23:01:01 +0200 +Subject: [PATCH 69/93] x86/bugs, KVM: Extend speculation control for + VIRT_SPEC_CTRL + +commit ccbcd2674472a978b48c91c1fbfb66c0ff959f24 upstream + +AMD is proposing a VIRT_SPEC_CTRL MSR to handle the Speculative Store +Bypass Disable via MSR_AMD64_LS_CFG so that guests do not have to care +about the bit position of the SSBD bit and thus facilitate migration. +Also, the sibling coordination on Family 17H CPUs can only be done on +the host. + +Extend x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() with an +extra argument for the VIRT_SPEC_CTRL MSR. + +Hand in 0 from VMX and in SVM add a new virt_spec_ctrl member to the CPU +data structure which is going to be used in later patches for the actual +implementation. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/spec-ctrl.h | 9 ++++++--- + arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- + arch/x86/kvm/svm.c | 11 +++++++++-- + arch/x86/kvm/vmx.c | 5 +++-- + 4 files changed, 36 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index 0cb49c4..6e28740 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -10,10 +10,13 @@ + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. +- * Takes the guest view of SPEC_CTRL MSR as a parameter. ++ * Takes the guest view of SPEC_CTRL MSR as a parameter and also ++ * the guest's version of VIRT_SPEC_CTRL, if emulated. + */ +-extern void x86_spec_ctrl_set_guest(u64); +-extern void x86_spec_ctrl_restore_host(u64); ++extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, ++ u64 guest_virt_spec_ctrl); ++extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, ++ u64 guest_virt_spec_ctrl); + + /* AMD specific Speculative Store Bypass MSR data */ + extern u64 x86_amd_ls_cfg_base; +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 15a6c58..d00e246 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -150,7 +150,15 @@ u64 x86_spec_ctrl_get_default(void) + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); + +-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) ++/** ++ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest ++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL ++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL ++ * (may get translated to MSR_AMD64_LS_CFG bits) ++ * ++ * Avoids writing to the MSR if the content/bits are the same ++ */ ++void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) + { + u64 host = x86_spec_ctrl_base; + +@@ -167,7 +175,15 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); + +-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) ++/** ++ * x86_spec_ctrl_restore_host - Restore host speculation control registers ++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL ++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL ++ * (may get translated to MSR_AMD64_LS_CFG bits) ++ * ++ * Avoids writing to the MSR if the content/bits are the same ++ */ ++void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) + { + u64 host = x86_spec_ctrl_base; + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 9991462..481b106 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -185,6 +185,12 @@ struct vcpu_svm { + } host; + + u64 spec_ctrl; ++ /* ++ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be ++ * translated into the appropriate L2_CFG bits on the host to ++ * perform speculative control. ++ */ ++ u64 virt_spec_ctrl; + + u32 *msrpm; + +@@ -1558,6 +1564,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) + u32 eax = 1; + + svm->spec_ctrl = 0; ++ svm->virt_spec_ctrl = 0; + + if (!init_event) { + svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | +@@ -4905,7 +4912,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ +- x86_spec_ctrl_set_guest(svm->spec_ctrl); ++ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + + asm volatile ( + "push %%" _ASM_BP "; \n\t" +@@ -5029,7 +5036,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + +- x86_spec_ctrl_restore_host(svm->spec_ctrl); ++ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + + reload_tss(vcpu); + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index c7df5c4..55af4b6 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -8898,9 +8898,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ +- x86_spec_ctrl_set_guest(vmx->spec_ctrl); ++ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + + vmx->__launched = vmx->loaded_vmcs->launched; ++ + asm( + /* Store host registers */ + "push %%" _ASM_DX "; push %%" _ASM_BP ";" +@@ -9036,7 +9037,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + +- x86_spec_ctrl_restore_host(vmx->spec_ctrl); ++ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-speculation-Add-virtualized-speculative-store-by.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-speculation-Add-virtualized-speculative-store-by.patch new file mode 100644 index 00000000..f7f668b1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0070-x86-speculation-Add-virtualized-speculative-store-by.patch @@ -0,0 +1,104 @@ +From fa6ec76841319858ad2046107420a63feda4a0bb Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Thu, 17 May 2018 17:09:18 +0200 +Subject: [PATCH 70/93] x86/speculation: Add virtualized speculative store + bypass disable support + +commit 11fb0683493b2da112cd64c9dada221b52463bf7 upstream + +Some AMD processors only support a non-architectural means of enabling +speculative store bypass disable (SSBD). To allow a simplified view of +this to a guest, an architectural definition has been created through a new +CPUID bit, 0x80000008_EBX[25], and a new MSR, 0xc001011f. With this, a +hypervisor can virtualize the existence of this definition and provide an +architectural method for using SSBD to a guest. + +Add the new CPUID feature, the new MSR and update the existing SSBD +support to use this MSR when present. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/msr-index.h | 2 ++ + arch/x86/kernel/cpu/bugs.c | 4 +++- + arch/x86/kernel/process.c | 13 ++++++++++++- + 4 files changed, 18 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index ec87b8c..c278f27 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -274,6 +274,7 @@ + #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ + #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ + #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ ++#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 0145a0b..ad5d0d8 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -323,6 +323,8 @@ + #define MSR_AMD64_IBSOPDATA4 0xc001103d + #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ + ++#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f ++ + /* Fam 17h MSRs */ + #define MSR_F17H_IRPERF 0xc00000e9 + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index d00e246..97987b5 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -204,7 +204,9 @@ static void x86_amd_ssb_disable(void) + { + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; + +- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) ++ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) ++ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); ++ else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); + } + +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 1e9d155..6d9e1ee 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -312,6 +312,15 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn) + } + #endif + ++static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) ++{ ++ /* ++ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, ++ * so ssbd_tif_to_spec_ctrl() just works. ++ */ ++ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); ++} ++ + static __always_inline void intel_set_ssb_state(unsigned long tifn) + { + u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); +@@ -321,7 +330,9 @@ static __always_inline void intel_set_ssb_state(unsigned long tifn) + + static __always_inline void __speculative_store_bypass_update(unsigned long tifn) + { +- if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) ++ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) ++ amd_set_ssb_virt_state(tifn); ++ else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + amd_set_core_ssb_state(tifn); + else + intel_set_ssb_state(tifn); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-speculation-Rework-speculative_store_bypass_upda.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-speculation-Rework-speculative_store_bypass_upda.patch new file mode 100644 index 00000000..daf64371 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0071-x86-speculation-Rework-speculative_store_bypass_upda.patch @@ -0,0 +1,75 @@ +From 10bd199ba2af68b40deb854851b3db51bd97531a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 10 May 2018 20:31:44 +0200 +Subject: [PATCH 71/93] x86/speculation: Rework + speculative_store_bypass_update() + +commit 0270be3e34efb05a88bc4c422572ece038ef3608 upstream + +The upcoming support for the virtual SPEC_CTRL MSR on AMD needs to reuse +speculative_store_bypass_update() to avoid code duplication. Add an +argument for supplying a thread info (TIF) value and create a wrapper +speculative_store_bypass_update_current() which is used at the existing +call site. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/spec-ctrl.h | 7 ++++++- + arch/x86/kernel/cpu/bugs.c | 2 +- + arch/x86/kernel/process.c | 4 ++-- + 3 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index 6e28740..82b6c5a 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -42,6 +42,11 @@ extern void speculative_store_bypass_ht_init(void); + static inline void speculative_store_bypass_ht_init(void) { } + #endif + +-extern void speculative_store_bypass_update(void); ++extern void speculative_store_bypass_update(unsigned long tif); ++ ++static inline void speculative_store_bypass_update_current(void) ++{ ++ speculative_store_bypass_update(current_thread_info()->flags); ++} + + #endif +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 97987b5..eddbdc8 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -597,7 +597,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) + * mitigation until it is next scheduled. + */ + if (task == current && update) +- speculative_store_bypass_update(); ++ speculative_store_bypass_update_current(); + + return 0; + } +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 6d9e1ee..00a9047 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -338,10 +338,10 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn + intel_set_ssb_state(tifn); + } + +-void speculative_store_bypass_update(void) ++void speculative_store_bypass_update(unsigned long tif) + { + preempt_disable(); +- __speculative_store_bypass_update(current_thread_info()->flags); ++ __speculative_store_bypass_update(tif); + preempt_enable(); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch new file mode 100644 index 00000000..e3e0a67d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch @@ -0,0 +1,145 @@ +From f30cba1d35ebb9a07ebd54253086280080b366a6 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Sat, 12 May 2018 00:14:51 +0200 +Subject: [PATCH 72/93] x86/bugs: Unify x86_spec_ctrl_{set_guest,restore_host} + +commit cc69b34989210f067b2c51d5539b5f96ebcc3a01 upstream + +Function bodies are very similar and are going to grow more almost +identical code. Add a bool arg to determine whether SPEC_CTRL is being set +for the guest or restored to the host. + +No functional changes. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/spec-ctrl.h | 33 +++++++++++++++++++--- + arch/x86/kernel/cpu/bugs.c | 60 ++++++++++------------------------------ + 2 files changed, 44 insertions(+), 49 deletions(-) + +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index 82b6c5a..9cecbe5 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -13,10 +13,35 @@ + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. + */ +-extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, +- u64 guest_virt_spec_ctrl); +-extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, +- u64 guest_virt_spec_ctrl); ++extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); ++ ++/** ++ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest ++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL ++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL ++ * (may get translated to MSR_AMD64_LS_CFG bits) ++ * ++ * Avoids writing to the MSR if the content/bits are the same ++ */ ++static inline ++void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) ++{ ++ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); ++} ++ ++/** ++ * x86_spec_ctrl_restore_host - Restore host speculation control registers ++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL ++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL ++ * (may get translated to MSR_AMD64_LS_CFG bits) ++ * ++ * Avoids writing to the MSR if the content/bits are the same ++ */ ++static inline ++void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) ++{ ++ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); ++} + + /* AMD specific Speculative Store Bypass MSR data */ + extern u64 x86_amd_ls_cfg_base; +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index eddbdc8..9203150 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -150,55 +150,25 @@ u64 x86_spec_ctrl_get_default(void) + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); + +-/** +- * x86_spec_ctrl_set_guest - Set speculation control registers for the guest +- * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL +- * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL +- * (may get translated to MSR_AMD64_LS_CFG bits) +- * +- * Avoids writing to the MSR if the content/bits are the same +- */ +-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) ++void ++x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 host = x86_spec_ctrl_base; ++ struct thread_info *ti = current_thread_info(); ++ u64 msr, host = x86_spec_ctrl_base; + + /* Is MSR_SPEC_CTRL implemented ? */ +- if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- return; +- +- /* SSBD controlled in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) +- host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); +- +- if (host != guest_spec_ctrl) +- wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); +-} +-EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); +- +-/** +- * x86_spec_ctrl_restore_host - Restore host speculation control registers +- * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL +- * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL +- * (may get translated to MSR_AMD64_LS_CFG bits) +- * +- * Avoids writing to the MSR if the content/bits are the same +- */ +-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +-{ +- u64 host = x86_spec_ctrl_base; +- +- /* Is MSR_SPEC_CTRL implemented ? */ +- if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- return; +- +- /* SSBD controlled in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) +- host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); +- +- if (host != guest_spec_ctrl) +- wrmsrl(MSR_IA32_SPEC_CTRL, host); ++ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { ++ /* SSBD controlled in MSR_SPEC_CTRL */ ++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) ++ host |= ssbd_tif_to_spec_ctrl(ti->flags); ++ ++ if (host != guest_spec_ctrl) { ++ msr = setguest ? guest_spec_ctrl : host; ++ wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ } ++ } + } +-EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); ++EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); + + static void x86_amd_ssb_disable(void) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch new file mode 100644 index 00000000..49224dbb --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch @@ -0,0 +1,120 @@ +From 22a75daea25a170892d8c6cbf0b740ef35219cc8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sat, 12 May 2018 20:49:16 +0200 +Subject: [PATCH 73/93] x86/bugs: Expose x86_spec_ctrl_base directly + +commit fa8ac4988249c38476f6ad678a4848a736373403 upstream + +x86_spec_ctrl_base is the system wide default value for the SPEC_CTRL MSR. +x86_spec_ctrl_get_default() returns x86_spec_ctrl_base and was intended to +prevent modification to that variable. Though the variable is read only +after init and globaly visible already. + +Remove the function and export the variable instead. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 16 +++++----------- + arch/x86/include/asm/spec-ctrl.h | 3 --- + arch/x86/kernel/cpu/bugs.c | 11 +---------- + 3 files changed, 6 insertions(+), 24 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index bc258e6..8d9deec 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -217,16 +217,7 @@ enum spectre_v2_mitigation { + SPECTRE_V2_IBRS, + }; + +-/* +- * The Intel specification for the SPEC_CTRL MSR requires that we +- * preserve any already set reserved bits at boot time (e.g. for +- * future additions that this kernel is not currently aware of). +- * We then set any additional mitigation bits that we want +- * ourselves and always use this as the base for SPEC_CTRL. +- * We also use this when handling guest entry/exit as below. +- */ + extern void x86_spec_ctrl_set(u64); +-extern u64 x86_spec_ctrl_get_default(void); + + /* The Speculative Store Bypass disable variants */ + enum ssb_mitigation { +@@ -278,6 +269,9 @@ static inline void indirect_branch_prediction_barrier(void) + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); + } + ++/* The Intel SPEC CTRL MSR base value cache */ ++extern u64 x86_spec_ctrl_base; ++ + /* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. +@@ -286,7 +280,7 @@ static inline void indirect_branch_prediction_barrier(void) + */ + #define firmware_restrict_branch_speculation_start() \ + do { \ +- u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ ++ u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ + \ + preempt_disable(); \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ +@@ -295,7 +289,7 @@ do { \ + + #define firmware_restrict_branch_speculation_end() \ + do { \ +- u64 val = x86_spec_ctrl_get_default(); \ ++ u64 val = x86_spec_ctrl_base; \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + X86_FEATURE_USE_IBRS_FW); \ +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index 9cecbe5..763d497 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -47,9 +47,6 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) + extern u64 x86_amd_ls_cfg_base; + extern u64 x86_amd_ls_cfg_ssbd_mask; + +-/* The Intel SPEC CTRL MSR base value cache */ +-extern u64 x86_spec_ctrl_base; +- + static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) + { + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 9203150..47b7f4f 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -35,6 +35,7 @@ static void __init ssb_select_mitigation(void); + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ + u64 __ro_after_init x86_spec_ctrl_base; ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + + /* + * The vendor and possibly platform specific bits which can be modified in +@@ -140,16 +141,6 @@ void x86_spec_ctrl_set(u64 val) + } + EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); + +-u64 x86_spec_ctrl_get_default(void) +-{ +- u64 msrval = x86_spec_ctrl_base; +- +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) +- msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); +- return msrval; +-} +-EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); +- + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-bugs-Remove-x86_spec_ctrl_set.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-bugs-Remove-x86_spec_ctrl_set.patch new file mode 100644 index 00000000..40bf45d2 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0074-x86-bugs-Remove-x86_spec_ctrl_set.patch @@ -0,0 +1,76 @@ +From ac97f3ffd444941e88a86ea4cd8033b686ab9170 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sat, 12 May 2018 20:53:14 +0200 +Subject: [PATCH 74/93] x86/bugs: Remove x86_spec_ctrl_set() + +commit 4b59bdb569453a60b752b274ca61f009e37f4dae upstream + +x86_spec_ctrl_set() is only used in bugs.c and the extra mask checks there +provide no real value as both call sites can just write x86_spec_ctrl_base +to MSR_SPEC_CTRL. x86_spec_ctrl_base is valid and does not need any extra +masking or checking. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/nospec-branch.h | 2 -- + arch/x86/kernel/cpu/bugs.c | 13 ++----------- + 2 files changed, 2 insertions(+), 13 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 8d9deec..8b38df9 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -217,8 +217,6 @@ enum spectre_v2_mitigation { + SPECTRE_V2_IBRS, + }; + +-extern void x86_spec_ctrl_set(u64); +- + /* The Speculative Store Bypass disable variants */ + enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 47b7f4f..82a99d0 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -132,15 +132,6 @@ static const char *spectre_v2_strings[] = { + static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + +-void x86_spec_ctrl_set(u64 val) +-{ +- if (val & x86_spec_ctrl_mask) +- WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); +- else +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); +-} +-EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); +- + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +@@ -502,7 +493,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; + x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; +- x86_spec_ctrl_set(SPEC_CTRL_SSBD); ++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + break; + case X86_VENDOR_AMD: + x86_amd_ssb_disable(); +@@ -614,7 +605,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); ++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch new file mode 100644 index 00000000..27bd0430 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch @@ -0,0 +1,95 @@ +From 96c9747df6b51ecfe781ba6c09ded9f406d20093 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sat, 12 May 2018 20:10:00 +0200 +Subject: [PATCH 75/93] x86/bugs: Rework spec_ctrl base and mask logic + +commit be6fcb5478e95bb1c91f489121238deb3abca46a upstream + +x86_spec_ctrL_mask is intended to mask out bits from a MSR_SPEC_CTRL value +which are not to be modified. However the implementation is not really used +and the bitmask was inverted to make a check easier, which was removed in +"x86/bugs: Remove x86_spec_ctrl_set()" + +Aside of that it is missing the STIBP bit if it is supported by the +platform, so if the mask would be used in x86_virt_spec_ctrl() then it +would prevent a guest from setting STIBP. + +Add the STIBP bit if supported and use the mask in x86_virt_spec_ctrl() to +sanitize the value which is supplied by the guest. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++------- + 1 file changed, 19 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 82a99d0..2ae3586 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +-static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; ++static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; + + /* + * AMD specific MSR info for Speculative Store Bypass control. +@@ -67,6 +67,10 @@ void __init check_bugs(void) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + ++ /* Allow STIBP in MSR_SPEC_CTRL if supported */ ++ if (boot_cpu_has(X86_FEATURE_STIBP)) ++ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; ++ + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + +@@ -135,18 +139,26 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { ++ u64 msrval, guestval, hostval = x86_spec_ctrl_base; + struct thread_info *ti = current_thread_info(); +- u64 msr, host = x86_spec_ctrl_base; + + /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { ++ /* ++ * Restrict guest_spec_ctrl to supported values. Clear the ++ * modifiable bits in the host base value and or the ++ * modifiable bits from the guest value. ++ */ ++ guestval = hostval & ~x86_spec_ctrl_mask; ++ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; ++ + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) +- host |= ssbd_tif_to_spec_ctrl(ti->flags); ++ hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + +- if (host != guest_spec_ctrl) { +- msr = setguest ? guest_spec_ctrl : host; +- wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ if (hostval != guestval) { ++ msrval = setguest ? guestval : hostval; ++ wrmsrl(MSR_IA32_SPEC_CTRL, msrval); + } + } + } +@@ -492,7 +504,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; ++ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + break; + case X86_VENDOR_AMD: +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch new file mode 100644 index 00000000..d7ddca7e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch @@ -0,0 +1,84 @@ +From d63bb88a1ae9c702ddf7477b0e96be1fc20f8d28 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 10 May 2018 20:42:48 +0200 +Subject: [PATCH 76/93] x86/speculation, KVM: Implement support for + VIRT_SPEC_CTRL/LS_CFG + +commit 47c61b3955cf712cadfc25635bf9bc174af030ea upstream + +Add the necessary logic for supporting the emulated VIRT_SPEC_CTRL MSR to +x86_virt_spec_ctrl(). If either X86_FEATURE_LS_CFG_SSBD or +X86_FEATURE_VIRT_SPEC_CTRL is set then use the new guest_virt_spec_ctrl +argument to check whether the state must be modified on the host. The +update reuses speculative_store_bypass_update() so the ZEN-specific sibling +coordination can be reused. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/spec-ctrl.h | 6 ++++++ + arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++++ + 2 files changed, 36 insertions(+) + +diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h +index 763d497..ae7c2c5 100644 +--- a/arch/x86/include/asm/spec-ctrl.h ++++ b/arch/x86/include/asm/spec-ctrl.h +@@ -53,6 +53,12 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); + } + ++static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) ++{ ++ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); ++ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); ++} ++ + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) + { + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 2ae3586..86af9b1 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -161,6 +161,36 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); + } + } ++ ++ /* ++ * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update ++ * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. ++ */ ++ if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && ++ !static_cpu_has(X86_FEATURE_VIRT_SSBD)) ++ return; ++ ++ /* ++ * If the host has SSBD mitigation enabled, force it in the host's ++ * virtual MSR value. If its not permanently enabled, evaluate ++ * current's TIF_SSBD thread flag. ++ */ ++ if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) ++ hostval = SPEC_CTRL_SSBD; ++ else ++ hostval = ssbd_tif_to_spec_ctrl(ti->flags); ++ ++ /* Sanitize the guest value */ ++ guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; ++ ++ if (hostval != guestval) { ++ unsigned long tif; ++ ++ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ++ ssbd_spec_ctrl_to_tif(hostval); ++ ++ speculative_store_bypass_update(tif); ++ } + } + EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch new file mode 100644 index 00000000..de5ae0c2 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch @@ -0,0 +1,241 @@ +From 708128a64a6b750b63a5f1ca1e943c48023145b9 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Thu, 10 May 2018 22:06:39 +0200 +Subject: [PATCH 77/93] KVM: SVM: Implement VIRT_SPEC_CTRL support for SSBD + +commit bc226f07dcd3c9ef0b7f6236fe356ea4a9cb4769 upstream + +Expose the new virtualized architectural mechanism, VIRT_SSBD, for using +speculative store bypass disable (SSBD) under SVM. This will allow guests +to use SSBD on hardware that uses non-architectural mechanisms for enabling +SSBD. + +[ tglx: Folded the migration fixup from Paolo Bonzini ] + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kvm_host.h | 2 +- + arch/x86/kernel/cpu/common.c | 3 ++- + arch/x86/kvm/cpuid.c | 11 +++++++++-- + arch/x86/kvm/cpuid.h | 9 +++++++++ + arch/x86/kvm/svm.c | 21 +++++++++++++++++++-- + arch/x86/kvm/vmx.c | 18 +++++++++++++++--- + arch/x86/kvm/x86.c | 13 ++++--------- + 7 files changed, 59 insertions(+), 18 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 6f6ee68..fd3a854 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -864,7 +864,7 @@ struct kvm_x86_ops { + int (*hardware_setup)(void); /* __init */ + void (*hardware_unsetup)(void); /* __exit */ + bool (*cpu_has_accelerated_tpr)(void); +- bool (*cpu_has_high_real_mode_segbase)(void); ++ bool (*has_emulated_msr)(int index); + void (*cpuid_update)(struct kvm_vcpu *vcpu); + + int (*vm_init)(struct kvm *kvm); +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 945e841..40fc748 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -735,7 +735,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c) + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + +- if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) ++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || ++ cpu_has(c, X86_FEATURE_VIRT_SSBD)) + set_cpu_cap(c, X86_FEATURE_SSBD); + + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 4ccdfbe..4d3269b 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = +- F(AMD_IBPB) | F(AMD_IBRS); ++ F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + + /* cpuid 0xC0000001.edx */ + const u32 kvm_cpuid_C000_0001_edx_x86_features = +@@ -618,13 +618,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + g_phys_as = phys_as; + entry->eax = g_phys_as | (virt_as << 8); + entry->edx = 0; +- /* IBRS and IBPB aren't necessarily present in hardware cpuid */ ++ /* ++ * IBRS, IBPB and VIRT_SSBD aren't necessarily present in ++ * hardware cpuid ++ */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); ++ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) ++ entry->ebx |= F(VIRT_SSBD); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); ++ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) ++ entry->ebx |= F(VIRT_SSBD); + break; + } + case 0x80000019: +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index 410070c..d22695c 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -182,6 +182,15 @@ static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); + } + ++static inline bool guest_cpuid_has_virt_ssbd(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_cpuid_entry2 *best; ++ ++ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); ++ return best && (best->ebx & bit(X86_FEATURE_VIRT_SSBD)); ++} ++ ++ + + /* + * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 481b106..c60d8fc 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -3552,6 +3552,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + + msr_info->data = svm->spec_ctrl; + break; ++ case MSR_AMD64_VIRT_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has_virt_ssbd(vcpu)) ++ return 1; ++ ++ msr_info->data = svm->virt_spec_ctrl; ++ break; + case MSR_IA32_UCODE_REV: + msr_info->data = 0x01000065; + break; +@@ -3679,6 +3686,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; ++ case MSR_AMD64_VIRT_SPEC_CTRL: ++ if (!msr->host_initiated && ++ !guest_cpuid_has_virt_ssbd(vcpu)) ++ return 1; ++ ++ if (data & ~SPEC_CTRL_SSBD) ++ return 1; ++ ++ svm->virt_spec_ctrl = data; ++ break; + case MSR_STAR: + svm->vmcb->save.star = data; + break; +@@ -5138,7 +5155,7 @@ static bool svm_cpu_has_accelerated_tpr(void) + return false; + } + +-static bool svm_has_high_real_mode_segbase(void) ++static bool svm_has_emulated_msr(int index) + { + return true; + } +@@ -5455,7 +5472,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { + .hardware_enable = svm_hardware_enable, + .hardware_disable = svm_hardware_disable, + .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, +- .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, ++ .has_emulated_msr = svm_has_emulated_msr, + + .vcpu_create = svm_create_vcpu, + .vcpu_free = svm_free_vcpu, +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 55af4b6..7b4739c 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -8673,9 +8673,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) + } + } + +-static bool vmx_has_high_real_mode_segbase(void) ++static bool vmx_has_emulated_msr(int index) + { +- return enable_unrestricted_guest || emulate_invalid_guest_state; ++ switch (index) { ++ case MSR_IA32_SMBASE: ++ /* ++ * We cannot do SMM unless we can run the guest in big ++ * real mode. ++ */ ++ return enable_unrestricted_guest || emulate_invalid_guest_state; ++ case MSR_AMD64_VIRT_SPEC_CTRL: ++ /* This is AMD only. */ ++ return false; ++ default: ++ return true; ++ } + } + + static bool vmx_mpx_supported(void) +@@ -11304,7 +11316,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { + .hardware_enable = hardware_enable, + .hardware_disable = hardware_disable, + .cpu_has_accelerated_tpr = report_flexpriority, +- .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, ++ .has_emulated_msr = vmx_has_emulated_msr, + + .vcpu_create = vmx_create_vcpu, + .vcpu_free = vmx_free_vcpu, +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index b27b93d..c531231 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1002,6 +1002,7 @@ static u32 emulated_msrs[] = { + MSR_IA32_MCG_CTL, + MSR_IA32_MCG_EXT_CTL, + MSR_IA32_SMBASE, ++ MSR_AMD64_VIRT_SPEC_CTRL, + }; + + static unsigned num_emulated_msrs; +@@ -2650,7 +2651,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + * fringe case that is not enabled except via specific settings + * of the module parameters. + */ +- r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); ++ r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); + break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; +@@ -4201,14 +4202,8 @@ static void kvm_init_msr_list(void) + num_msrs_to_save = j; + + for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { +- switch (emulated_msrs[i]) { +- case MSR_IA32_SMBASE: +- if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) +- continue; +- break; +- default: +- break; +- } ++ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) ++ continue; + + if (j < i) + emulated_msrs[j] = emulated_msrs[i]; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch new file mode 100644 index 00000000..f2131e66 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch @@ -0,0 +1,48 @@ +From b5380d0ef78780a08140c0b4e8d050752e91104a Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 16 May 2018 23:18:09 -0400 +Subject: [PATCH 78/93] x86/bugs: Rename SSBD_NO to SSB_NO + +commit 240da953fcc6a9008c92fae5b1f727ee5ed167ab upstream + +The "336996 Speculative Execution Side Channel Mitigations" from +May defines this as SSB_NO, hence lets sync-up. + +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/msr-index.h | 2 +- + arch/x86/kernel/cpu/common.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index ad5d0d8..ca41d8f 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -63,7 +63,7 @@ + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ + #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +-#define ARCH_CAP_SSBD_NO (1 << 4) /* ++#define ARCH_CAP_SSB_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 40fc748..b0fd028 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -926,7 +926,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + if (!x86_match_cpu(cpu_no_spec_store_bypass) && +- !(ia32_cap & ARCH_CAP_SSBD_NO)) ++ !(ia32_cap & ARCH_CAP_SSB_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + + if (x86_match_cpu(cpu_no_speculation)) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch new file mode 100644 index 00000000..b3f12503 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch @@ -0,0 +1,106 @@ +From f4e4c29205e3747d4cc2d033e1c46ad9725e9886 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> +Date: Wed, 9 May 2018 19:42:20 +0900 +Subject: [PATCH 79/93] x86/kexec: Avoid double free_page() upon + do_kexec_load() failure + +commit a466ef76b815b86748d9870ef2a430af7b39c710 upstream. + +>From ff82bedd3e12f0d3353282054ae48c3bd8c72012 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> +Date: Wed, 9 May 2018 12:12:39 +0900 +Subject: x86/kexec: Avoid double free_page() upon do_kexec_load() failure + +syzbot is reporting crashes after memory allocation failure inside +do_kexec_load() [1]. This is because free_transition_pgtable() is called +by both init_transition_pgtable() and machine_kexec_cleanup() when memory +allocation failed inside init_transition_pgtable(). + +Regarding 32bit code, machine_kexec_free_page_tables() is called by both +machine_kexec_alloc_page_tables() and machine_kexec_cleanup() when memory +allocation failed inside machine_kexec_alloc_page_tables(). + +Fix this by leaving the error handling to machine_kexec_cleanup() +(and optionally setting NULL after free_page()). + +[1] https://syzkaller.appspot.com/bug?id=91e52396168cf2bdd572fe1e1bc0bc645c1c6b40 + +Fixes: f5deb79679af6eb4 ("x86: kexec: Use one page table in x86_64 machine_kexec") +Fixes: 92be3d6bdf2cb349 ("kexec/i386: allocate page table pages dynamically") +Reported-by: syzbot <syzbot+d96f60296ef613fe1d69@syzkaller.appspotmail.com> +Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Baoquan He <bhe@redhat.com> +Cc: thomas.lendacky@amd.com +Cc: prudo@linux.vnet.ibm.com +Cc: Huang Ying <ying.huang@intel.com> +Cc: syzkaller-bugs@googlegroups.com +Cc: takahiro.akashi@linaro.org +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: akpm@linux-foundation.org +Cc: dyoung@redhat.com +Cc: kirill.shutemov@linux.intel.com +Link: https://lkml.kernel.org/r/201805091942.DGG12448.tMFVFSJFQOOLHO@I-love.SAKURA.ne.jp +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/machine_kexec_32.c | 6 +++++- + arch/x86/kernel/machine_kexec_64.c | 4 +++- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c +index 469b23d..fd7e993 100644 +--- a/arch/x86/kernel/machine_kexec_32.c ++++ b/arch/x86/kernel/machine_kexec_32.c +@@ -71,12 +71,17 @@ static void load_segments(void) + static void machine_kexec_free_page_tables(struct kimage *image) + { + free_page((unsigned long)image->arch.pgd); ++ image->arch.pgd = NULL; + #ifdef CONFIG_X86_PAE + free_page((unsigned long)image->arch.pmd0); ++ image->arch.pmd0 = NULL; + free_page((unsigned long)image->arch.pmd1); ++ image->arch.pmd1 = NULL; + #endif + free_page((unsigned long)image->arch.pte0); ++ image->arch.pte0 = NULL; + free_page((unsigned long)image->arch.pte1); ++ image->arch.pte1 = NULL; + } + + static int machine_kexec_alloc_page_tables(struct kimage *image) +@@ -93,7 +98,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image) + !image->arch.pmd0 || !image->arch.pmd1 || + #endif + !image->arch.pte0 || !image->arch.pte1) { +- machine_kexec_free_page_tables(image); + return -ENOMEM; + } + return 0; +diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c +index 8c1f218..26242cd 100644 +--- a/arch/x86/kernel/machine_kexec_64.c ++++ b/arch/x86/kernel/machine_kexec_64.c +@@ -37,8 +37,11 @@ static struct kexec_file_ops *kexec_file_loaders[] = { + static void free_transition_pgtable(struct kimage *image) + { + free_page((unsigned long)image->arch.pud); ++ image->arch.pud = NULL; + free_page((unsigned long)image->arch.pmd); ++ image->arch.pmd = NULL; + free_page((unsigned long)image->arch.pte); ++ image->arch.pte = NULL; + } + + static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) +@@ -79,7 +82,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); + return 0; + err: +- free_transition_pgtable(image); + return result; + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch new file mode 100644 index 00000000..ce234269 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch @@ -0,0 +1,44 @@ +From 546e325d7b773ae3c0df848b95f06206ebc7cd87 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Mon, 21 May 2018 17:54:49 -0400 +Subject: [PATCH 80/93] KVM/VMX: Expose SSBD properly to guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 0aa48468d00959c8a37cd3ac727284f4f7359151 upstream. + +The X86_FEATURE_SSBD is an synthetic CPU feature - that is +it bit location has no relevance to the real CPUID 0x7.EBX[31] +bit position. For that we need the new CPU feature name. + +Fixes: 52817587e706 ("x86/cpufeatures: Disentangle SSBD enumeration") +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: kvm@vger.kernel.org +Cc: "Radim Krčmář" <rkrcmar@redhat.com> +Cc: stable@vger.kernel.org +Cc: "H. Peter Anvin" <hpa@zytor.com> +Cc: Paolo Bonzini <pbonzini@redhat.com> +Link: https://lkml.kernel.org/r/20180521215449.26423-2-konrad.wilk@oracle.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/cpuid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 4d3269b..8510b7b 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = +- F(SPEC_CTRL) | F(SSBD) | F(ARCH_CAPABILITIES); ++ F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); + + /* all calls to cpuid_count() should be made on the same cpu */ + get_cpu(); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch new file mode 100644 index 00000000..f44b77a1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch @@ -0,0 +1,63 @@ +From a41340930388022d17c5acfa7c00edc80fa486f6 Mon Sep 17 00:00:00 2001 +From: Wei Huang <wei@redhat.com> +Date: Tue, 1 May 2018 09:49:54 -0500 +Subject: [PATCH 81/93] KVM: x86: Update cpuid properly when CR4.OSXAVE or + CR4.PKE is changed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit c4d2188206bafa177ea58e9a25b952baa0bf7712 upstream. + +The CPUID bits of OSXSAVE (function=0x1) and OSPKE (func=0x7, leaf=0x0) +allows user apps to detect if OS has set CR4.OSXSAVE or CR4.PKE. KVM is +supposed to update these CPUID bits when CR4 is updated. Current KVM +code doesn't handle some special cases when updates come from emulator. +Here is one example: + + Step 1: guest boots + Step 2: guest OS enables XSAVE ==> CR4.OSXSAVE=1 and CPUID.OSXSAVE=1 + Step 3: guest hot reboot ==> QEMU reset CR4 to 0, but CPUID.OSXAVE==1 + Step 4: guest os checks CPUID.OSXAVE, detects 1, then executes xgetbv + +Step 4 above will cause an #UD and guest crash because guest OS hasn't +turned on OSXAVE yet. This patch solves the problem by comparing the the +old_cr4 with cr4. If the related bits have been changed, +kvm_update_cpuid() needs to be called. + +Signed-off-by: Wei Huang <wei@redhat.com> +Reviewed-by: Bandan Das <bsd@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/x86.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index c531231..27e6cf0 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -7201,6 +7201,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + { + struct msr_data apic_base_msr; + int mmu_reset_needed = 0; ++ int cpuid_update_needed = 0; + int pending_vec, max_bits, idx; + struct desc_ptr dt; + +@@ -7232,8 +7233,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + vcpu->arch.cr0 = sregs->cr0; + + mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; ++ cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & ++ (X86_CR4_OSXSAVE | X86_CR4_PKE)); + kvm_x86_ops->set_cr4(vcpu, sregs->cr4); +- if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) ++ if (cpuid_update_needed) + kvm_update_cpuid(vcpu); + + idx = srcu_read_lock(&vcpu->kvm->srcu); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch new file mode 100644 index 00000000..313f2577 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch @@ -0,0 +1,54 @@ +From e34ebcda27df86037fd748254208aff7e442ff0b Mon Sep 17 00:00:00 2001 +From: Jim Mattson <jmattson@google.com> +Date: Wed, 9 May 2018 14:29:35 -0700 +Subject: [PATCH 82/93] kvm: x86: IA32_ARCH_CAPABILITIES is always supported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 1eaafe91a0df4157521b6417b3dd8430bf5f52f0 upstream. + +If there is a possibility that a VM may migrate to a Skylake host, +then the hypervisor should report IA32_ARCH_CAPABILITIES.RSBA[bit 2] +as being set (future work, of course). This implies that +CPUID.(EAX=7,ECX=0):EDX.ARCH_CAPABILITIES[bit 29] should be +set. Therefore, kvm should report this CPUID bit as being supported +whether or not the host supports it. Userspace is still free to clear +the bit if it chooses. + +For more information on RSBA, see Intel's white paper, "Retpoline: A +Branch Target Injection Mitigation" (Document Number 337131-001), +currently available at https://bugzilla.kernel.org/show_bug.cgi?id=199511. + +Since the IA32_ARCH_CAPABILITIES MSR is emulated in kvm, there is no +dependency on hardware support for this feature. + +Signed-off-by: Jim Mattson <jmattson@google.com> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Fixes: 28c1c9fabf48 ("KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES") +Cc: stable@vger.kernel.org +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/cpuid.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c +index 8510b7b..fbd6c62 100644 +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -468,6 +468,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + entry->ecx &= ~F(PKU); + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); ++ /* ++ * We emulate ARCH_CAPABILITIES in software even ++ * if the host doesn't support it. ++ */ ++ entry->edx |= F(ARCH_CAPABILITIES); + } else { + entry->ebx = 0; + entry->ecx = 0; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch new file mode 100644 index 00000000..b4bec832 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch @@ -0,0 +1,57 @@ +From 91702980566c39210225154c2a8b1cef41942737 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Thu, 26 Oct 2017 15:45:47 +0200 +Subject: [PATCH 83/93] kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 51776043afa415435c7e4636204fbe4f7edc4501 ] + +This ioctl is obsolete (it was used by Xenner as far as I know) but +still let's not break it gratuitously... Its handler is copying +directly into struct kvm. Go through a bounce buffer instead, with +the added benefit that we can actually do something useful with the +flags argument---the previous code was exiting with -EINVAL but still +doing the copy. + +This technically is a userspace ABI breakage, but since no one should be +using the ioctl, it's a good occasion to see if someone actually +complains. + +Cc: kernel-hardening@lists.openwall.com +Cc: Kees Cook <keescook@chromium.org> +Cc: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/x86.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 27e6cf0..d7974fc 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4106,13 +4106,14 @@ long kvm_arch_vm_ioctl(struct file *filp, + mutex_unlock(&kvm->lock); + break; + case KVM_XEN_HVM_CONFIG: { ++ struct kvm_xen_hvm_config xhc; + r = -EFAULT; +- if (copy_from_user(&kvm->arch.xen_hvm_config, argp, +- sizeof(struct kvm_xen_hvm_config))) ++ if (copy_from_user(&xhc, argp, sizeof(xhc))) + goto out; + r = -EINVAL; +- if (kvm->arch.xen_hvm_config.flags) ++ if (xhc.flags) + goto out; ++ memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); + r = 0; + break; + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch new file mode 100644 index 00000000..a2280307 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch @@ -0,0 +1,90 @@ +From 075696ba348a4c1eb20a641157f84f8b81220510 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson <sean.j.christopherson@intel.com> +Date: Fri, 23 Mar 2018 09:34:00 -0700 +Subject: [PATCH 84/93] KVM: VMX: raise internal error for exception during + invalid protected mode state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit add5ff7a216ee545a214013f26d1ef2f44a9c9f8 ] + +Exit to userspace with KVM_INTERNAL_ERROR_EMULATION if we encounter +an exception in Protected Mode while emulating guest due to invalid +guest state. Unlike Big RM, KVM doesn't support emulating exceptions +in PM, i.e. PM exceptions are always injected via the VMCS. Because +we will never do VMRESUME due to emulation_required, the exception is +never realized and we'll keep emulating the faulting instruction over +and over until we receive a signal. + +Exit to userspace iff there is a pending exception, i.e. don't exit +simply on a requested event. The purpose of this check and exit is to +aid in debugging a guest that is in all likelihood already doomed. +Invalid guest state in PM is extremely limited in normal operation, +e.g. it generally only occurs for a few instructions early in BIOS, +and any exception at this time is all but guaranteed to be fatal. +Non-vectored interrupts, e.g. INIT, SIPI and SMI, can be cleanly +handled/emulated, while checking for vectored interrupts, e.g. INTR +and NMI, without hitting false positives would add a fair amount of +complexity for almost no benefit (getting hit by lightning seems +more likely than encountering this specific scenario). + +Add a WARN_ON_ONCE to vmx_queue_exception() if we try to inject an +exception via the VMCS and emulation_required is true. + +Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/vmx.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 7b4739c..9307c0d 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2555,6 +2555,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, + return; + } + ++ WARN_ON_ONCE(vmx->emulation_required); ++ + if (kvm_exception_is_soft(nr)) { + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmx->vcpu.arch.event_exit_inst_len); +@@ -6405,12 +6407,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) + goto out; + } + +- if (err != EMULATE_DONE) { +- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; +- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; +- vcpu->run->internal.ndata = 0; +- return 0; +- } ++ if (err != EMULATE_DONE) ++ goto emulation_error; ++ ++ if (vmx->emulation_required && !vmx->rmode.vm86_active && ++ vcpu->arch.exception.pending) ++ goto emulation_error; + + if (vcpu->arch.halt_request) { + vcpu->arch.halt_request = 0; +@@ -6426,6 +6428,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) + + out: + return ret; ++ ++emulation_error: ++ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; ++ vcpu->run->internal.ndata = 0; ++ return 0; + } + + static int __grow_ple_window(int val) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch new file mode 100644 index 00000000..db300b21 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch @@ -0,0 +1,56 @@ +From 2ece92e70fbd29fd14c1add63648b7154521b473 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov <vkuznets@redhat.com> +Date: Fri, 9 Feb 2018 14:01:33 +0100 +Subject: [PATCH 85/93] KVM: lapic: stop advertising DIRECTED_EOI when + in-kernel IOAPIC is in use +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit 0bcc3fb95b97ac2ca223a5a870287b37f56265ac ] + +Devices which use level-triggered interrupts under Windows 2016 with +Hyper-V role enabled don't work: Windows disables EOI broadcast in SPIV +unconditionally. Our in-kernel IOAPIC implementation emulates an old IOAPIC +version which has no EOI register so EOI never happens. + +The issue was discovered and discussed a while ago: +https://www.spinics.net/lists/kvm/msg148098.html + +While this is a guest OS bug (it should check that IOAPIC has the required +capabilities before disabling EOI broadcast) we can workaround it in KVM: +advertising DIRECTED_EOI with in-kernel IOAPIC makes little sense anyway. + +Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> +Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kvm/lapic.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c +index 650ff4a..d99e13d 100644 +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -294,8 +294,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) + if (!lapic_in_kernel(vcpu)) + return; + ++ /* ++ * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) ++ * which doesn't have EOI register; Some buggy OSes (e.g. Windows with ++ * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC ++ * version first and level-triggered interrupts never get EOIed in ++ * IOAPIC. ++ */ + feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); +- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) ++ if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && ++ !ioapic_in_kernel(vcpu->kvm)) + v |= APIC_LVR_DIRECTED_EOI; + kvm_lapic_set_reg(apic, APIC_LVR, v); + } +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch new file mode 100644 index 00000000..f659e885 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch @@ -0,0 +1,217 @@ +From 655125acee5c084743a8bae4ffe2b723856594ce Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Tue, 21 Feb 2017 15:35:32 -0600 +Subject: [PATCH 86/93] objtool: Improve detection of BUG() and other dead ends + +commit d1091c7fa3d52ebce4dd3f15d04155b3469b2f90 upstream. + +The BUG() macro's use of __builtin_unreachable() via the unreachable() +macro tells gcc that the instruction is a dead end, and that it's safe +to assume the current code path will not execute past the previous +instruction. + +On x86, the BUG() macro is implemented with the 'ud2' instruction. When +objtool's branch analysis sees that instruction, it knows the current +code path has come to a dead end. + +Peter Zijlstra has been working on a patch to change the WARN macros to +use 'ud2'. That patch will break objtool's assumption that 'ud2' is +always a dead end. + +Generally it's best for objtool to avoid making those kinds of +assumptions anyway. The more ignorant it is of kernel code internals, +the better. + +So create a more generic way for objtool to detect dead ends by adding +an annotation to the unreachable() macro. The annotation stores a +pointer to the end of the unreachable code path in an '__unreachable' +section. Objtool can read that section to find the dead ends. + +Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/41a6d33971462ebd944a1c60ad4bf5be86c17b77.1487712920.git.jpoimboe@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/vmlinux.lds.S | 1 + + include/linux/compiler-gcc.h | 13 ++++++++- + tools/objtool/arch.h | 5 ++-- + tools/objtool/arch/x86/decode.c | 3 --- + tools/objtool/builtin-check.c | 60 ++++++++++++++++++++++++++++++++++++++--- + 5 files changed, 71 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index c7194e9..4ef267f 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -353,6 +353,7 @@ SECTIONS + /DISCARD/ : { + *(.eh_frame) + *(__func_stack_frame_non_standard) ++ *(__unreachable) + } + } + +diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h +index 362a1e17..b69d102 100644 +--- a/include/linux/compiler-gcc.h ++++ b/include/linux/compiler-gcc.h +@@ -199,6 +199,17 @@ + #endif + #endif + ++#ifdef CONFIG_STACK_VALIDATION ++#define annotate_unreachable() ({ \ ++ asm("1:\t\n" \ ++ ".pushsection __unreachable, \"a\"\t\n" \ ++ ".long 1b\t\n" \ ++ ".popsection\t\n"); \ ++}) ++#else ++#define annotate_unreachable() ++#endif ++ + /* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer +@@ -208,7 +219,7 @@ + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +-#define unreachable() __builtin_unreachable() ++#define unreachable() annotate_unreachable(); __builtin_unreachable() + + /* Mark a function definition as prohibited from being cloned. */ + #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) +diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h +index f7350fc..a59e061 100644 +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -31,9 +31,8 @@ + #define INSN_CALL_DYNAMIC 8 + #define INSN_RETURN 9 + #define INSN_CONTEXT_SWITCH 10 +-#define INSN_BUG 11 +-#define INSN_NOP 12 +-#define INSN_OTHER 13 ++#define INSN_NOP 11 ++#define INSN_OTHER 12 + #define INSN_LAST INSN_OTHER + + int arch_decode_instruction(struct elf *elf, struct section *sec, +diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c +index 5e0dea2..9fb487f 100644 +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -118,9 +118,6 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, + op2 == 0x35) + /* sysenter, sysret */ + *type = INSN_CONTEXT_SWITCH; +- else if (op2 == 0x0b || op2 == 0xb9) +- /* ud2 */ +- *type = INSN_BUG; + else if (op2 == 0x0d || op2 == 0x1f) + /* nopl/nopw */ + *type = INSN_NOP; +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index 377bff0..ad9eda9 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -51,7 +51,7 @@ struct instruction { + unsigned int len, state; + unsigned char type; + unsigned long immediate; +- bool alt_group, visited, ignore_alts; ++ bool alt_group, visited, dead_end, ignore_alts; + struct symbol *call_dest; + struct instruction *jump_dest; + struct list_head alts; +@@ -330,6 +330,54 @@ static int decode_instructions(struct objtool_file *file) + } + + /* ++ * Find all uses of the unreachable() macro, which are code path dead ends. ++ */ ++static int add_dead_ends(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct rela *rela; ++ struct instruction *insn; ++ bool found; ++ ++ sec = find_section_by_name(file->elf, ".rela__unreachable"); ++ if (!sec) ++ return 0; ++ ++ list_for_each_entry(rela, &sec->rela_list, list) { ++ if (rela->sym->type != STT_SECTION) { ++ WARN("unexpected relocation symbol type in .rela__unreachable"); ++ return -1; ++ } ++ insn = find_insn(file, rela->sym->sec, rela->addend); ++ if (insn) ++ insn = list_prev_entry(insn, list); ++ else if (rela->addend == rela->sym->sec->len) { ++ found = false; ++ list_for_each_entry_reverse(insn, &file->insn_list, list) { ++ if (insn->sec == rela->sym->sec) { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) { ++ WARN("can't find unreachable insn at %s+0x%x", ++ rela->sym->sec->name, rela->addend); ++ return -1; ++ } ++ } else { ++ WARN("can't find unreachable insn at %s+0x%x", ++ rela->sym->sec->name, rela->addend); ++ return -1; ++ } ++ ++ insn->dead_end = true; ++ } ++ ++ return 0; ++} ++ ++/* + * Warnings shouldn't be reported for ignored functions. + */ + static void add_ignores(struct objtool_file *file) +@@ -896,6 +944,10 @@ static int decode_sections(struct objtool_file *file) + if (ret) + return ret; + ++ ret = add_dead_ends(file); ++ if (ret) ++ return ret; ++ + add_ignores(file); + + ret = add_nospec_ignores(file); +@@ -1094,13 +1146,13 @@ static int validate_branch(struct objtool_file *file, + + return 0; + +- case INSN_BUG: +- return 0; +- + default: + break; + } + ++ if (insn->dead_end) ++ return 0; ++ + insn = next_insn_same_sec(file, insn); + if (!insn) { + WARN("%s: unexpected end of section", sec->name); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-objtool-Move-checking-code-to-check.c.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-objtool-Move-checking-code-to-check.c.patch new file mode 100644 index 00000000..076eb364 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0087-objtool-Move-checking-code-to-check.c.patch @@ -0,0 +1,2802 @@ +From 1c6b7026213ec74f811957627c80513e75f6fb96 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Wed, 28 Jun 2017 10:11:05 -0500 +Subject: [PATCH 87/93] objtool: Move checking code to check.c + +commit dcc914f44f065ef73685b37e59877a5bb3cb7358 upstream. + +In preparation for the new 'objtool undwarf generate' command, which +will rely on 'objtool check', move the checking code from +builtin-check.c to check.c where it can be used by other commands. + +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Reviewed-by: Jiri Slaby <jslaby@suse.cz> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: live-patching@vger.kernel.org +Link: http://lkml.kernel.org/r/294c5c695fd73c1a5000bbe5960a7c9bec4ee6b4.1498659915.git.jpoimboe@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +[backported by hand to 4.9, this was a pain... - gregkh] +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + tools/objtool/Build | 1 + + tools/objtool/builtin-check.c | 1337 +---------------------------------------- + tools/objtool/check.c | 1327 ++++++++++++++++++++++++++++++++++++++++ + tools/objtool/check.h | 51 ++ + 4 files changed, 1392 insertions(+), 1324 deletions(-) + create mode 100644 tools/objtool/check.c + create mode 100644 tools/objtool/check.h + +diff --git a/tools/objtool/Build b/tools/objtool/Build +index d6cdece..6f2e198 100644 +--- a/tools/objtool/Build ++++ b/tools/objtool/Build +@@ -1,5 +1,6 @@ + objtool-y += arch/$(SRCARCH)/ + objtool-y += builtin-check.o ++objtool-y += check.o + objtool-y += elf.o + objtool-y += special.o + objtool-y += objtool.o +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index ad9eda9..365c34e 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> ++ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License +@@ -25,1343 +25,32 @@ + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ + +-#include <string.h> +-#include <stdlib.h> + #include <subcmd/parse-options.h> +- + #include "builtin.h" +-#include "elf.h" +-#include "special.h" +-#include "arch.h" +-#include "warn.h" +- +-#include <linux/hashtable.h> +- +-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +- +-#define STATE_FP_SAVED 0x1 +-#define STATE_FP_SETUP 0x2 +-#define STATE_FENTRY 0x4 +- +-struct instruction { +- struct list_head list; +- struct hlist_node hash; +- struct section *sec; +- unsigned long offset; +- unsigned int len, state; +- unsigned char type; +- unsigned long immediate; +- bool alt_group, visited, dead_end, ignore_alts; +- struct symbol *call_dest; +- struct instruction *jump_dest; +- struct list_head alts; +- struct symbol *func; +-}; +- +-struct alternative { +- struct list_head list; +- struct instruction *insn; +-}; +- +-struct objtool_file { +- struct elf *elf; +- struct list_head insn_list; +- DECLARE_HASHTABLE(insn_hash, 16); +- struct section *rodata, *whitelist; +- bool ignore_unreachables, c_file; +-}; +- +-const char *objname; +-static bool nofp; +- +-static struct instruction *find_insn(struct objtool_file *file, +- struct section *sec, unsigned long offset) +-{ +- struct instruction *insn; +- +- hash_for_each_possible(file->insn_hash, insn, hash, offset) +- if (insn->sec == sec && insn->offset == offset) +- return insn; +- +- return NULL; +-} +- +-static struct instruction *next_insn_same_sec(struct objtool_file *file, +- struct instruction *insn) +-{ +- struct instruction *next = list_next_entry(insn, list); +- +- if (&next->list == &file->insn_list || next->sec != insn->sec) +- return NULL; +- +- return next; +-} +- +-static bool gcov_enabled(struct objtool_file *file) +-{ +- struct section *sec; +- struct symbol *sym; +- +- list_for_each_entry(sec, &file->elf->sections, list) +- list_for_each_entry(sym, &sec->symbol_list, list) +- if (!strncmp(sym->name, "__gcov_.", 8)) +- return true; +- +- return false; +-} +- +-#define for_each_insn(file, insn) \ +- list_for_each_entry(insn, &file->insn_list, list) +- +-#define func_for_each_insn(file, func, insn) \ +- for (insn = find_insn(file, func->sec, func->offset); \ +- insn && &insn->list != &file->insn_list && \ +- insn->sec == func->sec && \ +- insn->offset < func->offset + func->len; \ +- insn = list_next_entry(insn, list)) +- +-#define func_for_each_insn_continue_reverse(file, func, insn) \ +- for (insn = list_prev_entry(insn, list); \ +- &insn->list != &file->insn_list && \ +- insn->sec == func->sec && insn->offset >= func->offset; \ +- insn = list_prev_entry(insn, list)) +- +-#define sec_for_each_insn_from(file, insn) \ +- for (; insn; insn = next_insn_same_sec(file, insn)) +- +- +-/* +- * Check if the function has been manually whitelisted with the +- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted +- * due to its use of a context switching instruction. +- */ +-static bool ignore_func(struct objtool_file *file, struct symbol *func) +-{ +- struct rela *rela; +- struct instruction *insn; +- +- /* check for STACK_FRAME_NON_STANDARD */ +- if (file->whitelist && file->whitelist->rela) +- list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) { +- if (rela->sym->type == STT_SECTION && +- rela->sym->sec == func->sec && +- rela->addend == func->offset) +- return true; +- if (rela->sym->type == STT_FUNC && rela->sym == func) +- return true; +- } +- +- /* check if it has a context switching instruction */ +- func_for_each_insn(file, func, insn) +- if (insn->type == INSN_CONTEXT_SWITCH) +- return true; +- +- return false; +-} +- +-/* +- * This checks to see if the given function is a "noreturn" function. +- * +- * For global functions which are outside the scope of this object file, we +- * have to keep a manual list of them. +- * +- * For local functions, we have to detect them manually by simply looking for +- * the lack of a return instruction. +- * +- * Returns: +- * -1: error +- * 0: no dead end +- * 1: dead end +- */ +-static int __dead_end_function(struct objtool_file *file, struct symbol *func, +- int recursion) +-{ +- int i; +- struct instruction *insn; +- bool empty = true; +- +- /* +- * Unfortunately these have to be hard coded because the noreturn +- * attribute isn't provided in ELF data. +- */ +- static const char * const global_noreturns[] = { +- "__stack_chk_fail", +- "panic", +- "do_exit", +- "do_task_dead", +- "__module_put_and_exit", +- "complete_and_exit", +- "kvm_spurious_fault", +- "__reiserfs_panic", +- "lbug_with_loc" +- }; +- +- if (func->bind == STB_WEAK) +- return 0; +- +- if (func->bind == STB_GLOBAL) +- for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) +- if (!strcmp(func->name, global_noreturns[i])) +- return 1; +- +- if (!func->sec) +- return 0; +- +- func_for_each_insn(file, func, insn) { +- empty = false; +- +- if (insn->type == INSN_RETURN) +- return 0; +- } +- +- if (empty) +- return 0; +- +- /* +- * A function can have a sibling call instead of a return. In that +- * case, the function's dead-end status depends on whether the target +- * of the sibling call returns. +- */ +- func_for_each_insn(file, func, insn) { +- if (insn->sec != func->sec || +- insn->offset >= func->offset + func->len) +- break; +- +- if (insn->type == INSN_JUMP_UNCONDITIONAL) { +- struct instruction *dest = insn->jump_dest; +- struct symbol *dest_func; +- +- if (!dest) +- /* sibling call to another file */ +- return 0; +- +- if (dest->sec != func->sec || +- dest->offset < func->offset || +- dest->offset >= func->offset + func->len) { +- /* local sibling call */ +- dest_func = find_symbol_by_offset(dest->sec, +- dest->offset); +- if (!dest_func) +- continue; +- +- if (recursion == 5) { +- WARN_FUNC("infinite recursion (objtool bug!)", +- dest->sec, dest->offset); +- return -1; +- } +- +- return __dead_end_function(file, dest_func, +- recursion + 1); +- } +- } +- +- if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) +- /* sibling call */ +- return 0; +- } +- +- return 1; +-} +- +-static int dead_end_function(struct objtool_file *file, struct symbol *func) +-{ +- return __dead_end_function(file, func, 0); +-} +- +-/* +- * Call the arch-specific instruction decoder for all the instructions and add +- * them to the global instruction list. +- */ +-static int decode_instructions(struct objtool_file *file) +-{ +- struct section *sec; +- struct symbol *func; +- unsigned long offset; +- struct instruction *insn; +- int ret; +- +- list_for_each_entry(sec, &file->elf->sections, list) { +- +- if (!(sec->sh.sh_flags & SHF_EXECINSTR)) +- continue; +- +- for (offset = 0; offset < sec->len; offset += insn->len) { +- insn = malloc(sizeof(*insn)); +- memset(insn, 0, sizeof(*insn)); +- +- INIT_LIST_HEAD(&insn->alts); +- insn->sec = sec; +- insn->offset = offset; +- +- ret = arch_decode_instruction(file->elf, sec, offset, +- sec->len - offset, +- &insn->len, &insn->type, +- &insn->immediate); +- if (ret) +- return ret; +- +- if (!insn->type || insn->type > INSN_LAST) { +- WARN_FUNC("invalid instruction type %d", +- insn->sec, insn->offset, insn->type); +- return -1; +- } +- +- hash_add(file->insn_hash, &insn->hash, insn->offset); +- list_add_tail(&insn->list, &file->insn_list); +- } +- +- list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->type != STT_FUNC) +- continue; +- +- if (!find_insn(file, sec, func->offset)) { +- WARN("%s(): can't find starting instruction", +- func->name); +- return -1; +- } +- +- func_for_each_insn(file, func, insn) +- if (!insn->func) +- insn->func = func; +- } +- } +- +- return 0; +-} +- +-/* +- * Find all uses of the unreachable() macro, which are code path dead ends. +- */ +-static int add_dead_ends(struct objtool_file *file) +-{ +- struct section *sec; +- struct rela *rela; +- struct instruction *insn; +- bool found; +- +- sec = find_section_by_name(file->elf, ".rela__unreachable"); +- if (!sec) +- return 0; +- +- list_for_each_entry(rela, &sec->rela_list, list) { +- if (rela->sym->type != STT_SECTION) { +- WARN("unexpected relocation symbol type in .rela__unreachable"); +- return -1; +- } +- insn = find_insn(file, rela->sym->sec, rela->addend); +- if (insn) +- insn = list_prev_entry(insn, list); +- else if (rela->addend == rela->sym->sec->len) { +- found = false; +- list_for_each_entry_reverse(insn, &file->insn_list, list) { +- if (insn->sec == rela->sym->sec) { +- found = true; +- break; +- } +- } +- +- if (!found) { +- WARN("can't find unreachable insn at %s+0x%x", +- rela->sym->sec->name, rela->addend); +- return -1; +- } +- } else { +- WARN("can't find unreachable insn at %s+0x%x", +- rela->sym->sec->name, rela->addend); +- return -1; +- } +- +- insn->dead_end = true; +- } +- +- return 0; +-} +- +-/* +- * Warnings shouldn't be reported for ignored functions. +- */ +-static void add_ignores(struct objtool_file *file) +-{ +- struct instruction *insn; +- struct section *sec; +- struct symbol *func; +- +- list_for_each_entry(sec, &file->elf->sections, list) { +- list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->type != STT_FUNC) +- continue; +- +- if (!ignore_func(file, func)) +- continue; +- +- func_for_each_insn(file, func, insn) +- insn->visited = true; +- } +- } +-} +- +-/* +- * FIXME: For now, just ignore any alternatives which add retpolines. This is +- * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. +- * But it at least allows objtool to understand the control flow *around* the +- * retpoline. +- */ +-static int add_nospec_ignores(struct objtool_file *file) +-{ +- struct section *sec; +- struct rela *rela; +- struct instruction *insn; +- +- sec = find_section_by_name(file->elf, ".rela.discard.nospec"); +- if (!sec) +- return 0; +- +- list_for_each_entry(rela, &sec->rela_list, list) { +- if (rela->sym->type != STT_SECTION) { +- WARN("unexpected relocation symbol type in %s", sec->name); +- return -1; +- } +- +- insn = find_insn(file, rela->sym->sec, rela->addend); +- if (!insn) { +- WARN("bad .discard.nospec entry"); +- return -1; +- } +- +- insn->ignore_alts = true; +- } +- +- return 0; +-} +- +-/* +- * Find the destination instructions for all jumps. +- */ +-static int add_jump_destinations(struct objtool_file *file) +-{ +- struct instruction *insn; +- struct rela *rela; +- struct section *dest_sec; +- unsigned long dest_off; +- +- for_each_insn(file, insn) { +- if (insn->type != INSN_JUMP_CONDITIONAL && +- insn->type != INSN_JUMP_UNCONDITIONAL) +- continue; +- +- /* skip ignores */ +- if (insn->visited) +- continue; +- +- rela = find_rela_by_dest_range(insn->sec, insn->offset, +- insn->len); +- if (!rela) { +- dest_sec = insn->sec; +- dest_off = insn->offset + insn->len + insn->immediate; +- } else if (rela->sym->type == STT_SECTION) { +- dest_sec = rela->sym->sec; +- dest_off = rela->addend + 4; +- } else if (rela->sym->sec->idx) { +- dest_sec = rela->sym->sec; +- dest_off = rela->sym->sym.st_value + rela->addend + 4; +- } else if (strstr(rela->sym->name, "_indirect_thunk_")) { +- /* +- * Retpoline jumps are really dynamic jumps in +- * disguise, so convert them accordingly. +- */ +- insn->type = INSN_JUMP_DYNAMIC; +- continue; +- } else { +- /* sibling call */ +- insn->jump_dest = 0; +- continue; +- } +- +- insn->jump_dest = find_insn(file, dest_sec, dest_off); +- if (!insn->jump_dest) { +- +- /* +- * This is a special case where an alt instruction +- * jumps past the end of the section. These are +- * handled later in handle_group_alt(). +- */ +- if (!strcmp(insn->sec->name, ".altinstr_replacement")) +- continue; +- +- WARN_FUNC("can't find jump dest instruction at %s+0x%lx", +- insn->sec, insn->offset, dest_sec->name, +- dest_off); +- return -1; +- } +- } +- +- return 0; +-} +- +-/* +- * Find the destination instructions for all calls. +- */ +-static int add_call_destinations(struct objtool_file *file) +-{ +- struct instruction *insn; +- unsigned long dest_off; +- struct rela *rela; +- +- for_each_insn(file, insn) { +- if (insn->type != INSN_CALL) +- continue; +- +- rela = find_rela_by_dest_range(insn->sec, insn->offset, +- insn->len); +- if (!rela) { +- dest_off = insn->offset + insn->len + insn->immediate; +- insn->call_dest = find_symbol_by_offset(insn->sec, +- dest_off); +- /* +- * FIXME: Thanks to retpolines, it's now considered +- * normal for a function to call within itself. So +- * disable this warning for now. +- */ +-#if 0 +- if (!insn->call_dest) { +- WARN_FUNC("can't find call dest symbol at offset 0x%lx", +- insn->sec, insn->offset, dest_off); +- return -1; +- } +-#endif +- } else if (rela->sym->type == STT_SECTION) { +- insn->call_dest = find_symbol_by_offset(rela->sym->sec, +- rela->addend+4); +- if (!insn->call_dest || +- insn->call_dest->type != STT_FUNC) { +- WARN_FUNC("can't find call dest symbol at %s+0x%x", +- insn->sec, insn->offset, +- rela->sym->sec->name, +- rela->addend + 4); +- return -1; +- } +- } else +- insn->call_dest = rela->sym; +- } +- +- return 0; +-} +- +-/* +- * The .alternatives section requires some extra special care, over and above +- * what other special sections require: +- * +- * 1. Because alternatives are patched in-place, we need to insert a fake jump +- * instruction at the end so that validate_branch() skips all the original +- * replaced instructions when validating the new instruction path. +- * +- * 2. An added wrinkle is that the new instruction length might be zero. In +- * that case the old instructions are replaced with noops. We simulate that +- * by creating a fake jump as the only new instruction. +- * +- * 3. In some cases, the alternative section includes an instruction which +- * conditionally jumps to the _end_ of the entry. We have to modify these +- * jumps' destinations to point back to .text rather than the end of the +- * entry in .altinstr_replacement. +- * +- * 4. It has been requested that we don't validate the !POPCNT feature path +- * which is a "very very small percentage of machines". +- */ +-static int handle_group_alt(struct objtool_file *file, +- struct special_alt *special_alt, +- struct instruction *orig_insn, +- struct instruction **new_insn) +-{ +- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; +- unsigned long dest_off; +- +- last_orig_insn = NULL; +- insn = orig_insn; +- sec_for_each_insn_from(file, insn) { +- if (insn->offset >= special_alt->orig_off + special_alt->orig_len) +- break; +- +- if (special_alt->skip_orig) +- insn->type = INSN_NOP; +- +- insn->alt_group = true; +- last_orig_insn = insn; +- } +- +- if (!next_insn_same_sec(file, last_orig_insn)) { +- WARN("%s: don't know how to handle alternatives at end of section", +- special_alt->orig_sec->name); +- return -1; +- } +- +- fake_jump = malloc(sizeof(*fake_jump)); +- if (!fake_jump) { +- WARN("malloc failed"); +- return -1; +- } +- memset(fake_jump, 0, sizeof(*fake_jump)); +- INIT_LIST_HEAD(&fake_jump->alts); +- fake_jump->sec = special_alt->new_sec; +- fake_jump->offset = -1; +- fake_jump->type = INSN_JUMP_UNCONDITIONAL; +- fake_jump->jump_dest = list_next_entry(last_orig_insn, list); +- +- if (!special_alt->new_len) { +- *new_insn = fake_jump; +- return 0; +- } +- +- last_new_insn = NULL; +- insn = *new_insn; +- sec_for_each_insn_from(file, insn) { +- if (insn->offset >= special_alt->new_off + special_alt->new_len) +- break; +- +- last_new_insn = insn; +- +- if (insn->type != INSN_JUMP_CONDITIONAL && +- insn->type != INSN_JUMP_UNCONDITIONAL) +- continue; +- +- if (!insn->immediate) +- continue; +- +- dest_off = insn->offset + insn->len + insn->immediate; +- if (dest_off == special_alt->new_off + special_alt->new_len) +- insn->jump_dest = fake_jump; +- +- if (!insn->jump_dest) { +- WARN_FUNC("can't find alternative jump destination", +- insn->sec, insn->offset); +- return -1; +- } +- } +- +- if (!last_new_insn) { +- WARN_FUNC("can't find last new alternative instruction", +- special_alt->new_sec, special_alt->new_off); +- return -1; +- } +- +- list_add(&fake_jump->list, &last_new_insn->list); +- +- return 0; +-} +- +-/* +- * A jump table entry can either convert a nop to a jump or a jump to a nop. +- * If the original instruction is a jump, make the alt entry an effective nop +- * by just skipping the original instruction. +- */ +-static int handle_jump_alt(struct objtool_file *file, +- struct special_alt *special_alt, +- struct instruction *orig_insn, +- struct instruction **new_insn) +-{ +- if (orig_insn->type == INSN_NOP) +- return 0; +- +- if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { +- WARN_FUNC("unsupported instruction at jump label", +- orig_insn->sec, orig_insn->offset); +- return -1; +- } +- +- *new_insn = list_next_entry(orig_insn, list); +- return 0; +-} +- +-/* +- * Read all the special sections which have alternate instructions which can be +- * patched in or redirected to at runtime. Each instruction having alternate +- * instruction(s) has them added to its insn->alts list, which will be +- * traversed in validate_branch(). +- */ +-static int add_special_section_alts(struct objtool_file *file) +-{ +- struct list_head special_alts; +- struct instruction *orig_insn, *new_insn; +- struct special_alt *special_alt, *tmp; +- struct alternative *alt; +- int ret; +- +- ret = special_get_alts(file->elf, &special_alts); +- if (ret) +- return ret; +- +- list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { +- +- orig_insn = find_insn(file, special_alt->orig_sec, +- special_alt->orig_off); +- if (!orig_insn) { +- WARN_FUNC("special: can't find orig instruction", +- special_alt->orig_sec, special_alt->orig_off); +- ret = -1; +- goto out; +- } +- +- /* Ignore retpoline alternatives. */ +- if (orig_insn->ignore_alts) +- continue; +- +- new_insn = NULL; +- if (!special_alt->group || special_alt->new_len) { +- new_insn = find_insn(file, special_alt->new_sec, +- special_alt->new_off); +- if (!new_insn) { +- WARN_FUNC("special: can't find new instruction", +- special_alt->new_sec, +- special_alt->new_off); +- ret = -1; +- goto out; +- } +- } +- +- if (special_alt->group) { +- ret = handle_group_alt(file, special_alt, orig_insn, +- &new_insn); +- if (ret) +- goto out; +- } else if (special_alt->jump_or_nop) { +- ret = handle_jump_alt(file, special_alt, orig_insn, +- &new_insn); +- if (ret) +- goto out; +- } +- +- alt = malloc(sizeof(*alt)); +- if (!alt) { +- WARN("malloc failed"); +- ret = -1; +- goto out; +- } +- +- alt->insn = new_insn; +- list_add_tail(&alt->list, &orig_insn->alts); +- +- list_del(&special_alt->list); +- free(special_alt); +- } +- +-out: +- return ret; +-} +- +-static int add_switch_table(struct objtool_file *file, struct symbol *func, +- struct instruction *insn, struct rela *table, +- struct rela *next_table) +-{ +- struct rela *rela = table; +- struct instruction *alt_insn; +- struct alternative *alt; +- +- list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { +- if (rela == next_table) +- break; +- +- if (rela->sym->sec != insn->sec || +- rela->addend <= func->offset || +- rela->addend >= func->offset + func->len) +- break; +- +- alt_insn = find_insn(file, insn->sec, rela->addend); +- if (!alt_insn) { +- WARN("%s: can't find instruction at %s+0x%x", +- file->rodata->rela->name, insn->sec->name, +- rela->addend); +- return -1; +- } +- +- alt = malloc(sizeof(*alt)); +- if (!alt) { +- WARN("malloc failed"); +- return -1; +- } +- +- alt->insn = alt_insn; +- list_add_tail(&alt->list, &insn->alts); +- } +- +- return 0; +-} +- +-/* +- * find_switch_table() - Given a dynamic jump, find the switch jump table in +- * .rodata associated with it. +- * +- * There are 3 basic patterns: +- * +- * 1. jmpq *[rodata addr](,%reg,8) +- * +- * This is the most common case by far. It jumps to an address in a simple +- * jump table which is stored in .rodata. +- * +- * 2. jmpq *[rodata addr](%rip) +- * +- * This is caused by a rare GCC quirk, currently only seen in three driver +- * functions in the kernel, only with certain obscure non-distro configs. +- * +- * As part of an optimization, GCC makes a copy of an existing switch jump +- * table, modifies it, and then hard-codes the jump (albeit with an indirect +- * jump) to use a single entry in the table. The rest of the jump table and +- * some of its jump targets remain as dead code. +- * +- * In such a case we can just crudely ignore all unreachable instruction +- * warnings for the entire object file. Ideally we would just ignore them +- * for the function, but that would require redesigning the code quite a +- * bit. And honestly that's just not worth doing: unreachable instruction +- * warnings are of questionable value anyway, and this is such a rare issue. +- * +- * 3. mov [rodata addr],%reg1 +- * ... some instructions ... +- * jmpq *(%reg1,%reg2,8) +- * +- * This is a fairly uncommon pattern which is new for GCC 6. As of this +- * writing, there are 11 occurrences of it in the allmodconfig kernel. +- * +- * TODO: Once we have DWARF CFI and smarter instruction decoding logic, +- * ensure the same register is used in the mov and jump instructions. +- */ +-static struct rela *find_switch_table(struct objtool_file *file, +- struct symbol *func, +- struct instruction *insn) +-{ +- struct rela *text_rela, *rodata_rela; +- struct instruction *orig_insn = insn; +- +- text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); +- if (text_rela && text_rela->sym == file->rodata->sym) { +- /* case 1 */ +- rodata_rela = find_rela_by_dest(file->rodata, +- text_rela->addend); +- if (rodata_rela) +- return rodata_rela; +- +- /* case 2 */ +- rodata_rela = find_rela_by_dest(file->rodata, +- text_rela->addend + 4); +- if (!rodata_rela) +- return NULL; +- file->ignore_unreachables = true; +- return rodata_rela; +- } +- +- /* case 3 */ +- func_for_each_insn_continue_reverse(file, func, insn) { +- if (insn->type == INSN_JUMP_DYNAMIC) +- break; +- +- /* allow small jumps within the range */ +- if (insn->type == INSN_JUMP_UNCONDITIONAL && +- insn->jump_dest && +- (insn->jump_dest->offset <= insn->offset || +- insn->jump_dest->offset > orig_insn->offset)) +- break; +- +- text_rela = find_rela_by_dest_range(insn->sec, insn->offset, +- insn->len); +- if (text_rela && text_rela->sym == file->rodata->sym) +- return find_rela_by_dest(file->rodata, +- text_rela->addend); +- } +- +- return NULL; +-} +- +-static int add_func_switch_tables(struct objtool_file *file, +- struct symbol *func) +-{ +- struct instruction *insn, *prev_jump = NULL; +- struct rela *rela, *prev_rela = NULL; +- int ret; +- +- func_for_each_insn(file, func, insn) { +- if (insn->type != INSN_JUMP_DYNAMIC) +- continue; +- +- rela = find_switch_table(file, func, insn); +- if (!rela) +- continue; +- +- /* +- * We found a switch table, but we don't know yet how big it +- * is. Don't add it until we reach the end of the function or +- * the beginning of another switch table in the same function. +- */ +- if (prev_jump) { +- ret = add_switch_table(file, func, prev_jump, prev_rela, +- rela); +- if (ret) +- return ret; +- } +- +- prev_jump = insn; +- prev_rela = rela; +- } +- +- if (prev_jump) { +- ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); +- if (ret) +- return ret; +- } +- +- return 0; +-} +- +-/* +- * For some switch statements, gcc generates a jump table in the .rodata +- * section which contains a list of addresses within the function to jump to. +- * This finds these jump tables and adds them to the insn->alts lists. +- */ +-static int add_switch_table_alts(struct objtool_file *file) +-{ +- struct section *sec; +- struct symbol *func; +- int ret; +- +- if (!file->rodata || !file->rodata->rela) +- return 0; +- +- list_for_each_entry(sec, &file->elf->sections, list) { +- list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->type != STT_FUNC) +- continue; +- +- ret = add_func_switch_tables(file, func); +- if (ret) +- return ret; +- } +- } +- +- return 0; +-} +- +-static int decode_sections(struct objtool_file *file) +-{ +- int ret; ++#include "check.h" + +- ret = decode_instructions(file); +- if (ret) +- return ret; ++bool nofp; + +- ret = add_dead_ends(file); +- if (ret) +- return ret; +- +- add_ignores(file); +- +- ret = add_nospec_ignores(file); +- if (ret) +- return ret; +- +- ret = add_jump_destinations(file); +- if (ret) +- return ret; +- +- ret = add_call_destinations(file); +- if (ret) +- return ret; +- +- ret = add_special_section_alts(file); +- if (ret) +- return ret; +- +- ret = add_switch_table_alts(file); +- if (ret) +- return ret; +- +- return 0; +-} +- +-static bool is_fentry_call(struct instruction *insn) +-{ +- if (insn->type == INSN_CALL && +- insn->call_dest->type == STT_NOTYPE && +- !strcmp(insn->call_dest->name, "__fentry__")) +- return true; +- +- return false; +-} +- +-static bool has_modified_stack_frame(struct instruction *insn) +-{ +- return (insn->state & STATE_FP_SAVED) || +- (insn->state & STATE_FP_SETUP); +-} +- +-static bool has_valid_stack_frame(struct instruction *insn) +-{ +- return (insn->state & STATE_FP_SAVED) && +- (insn->state & STATE_FP_SETUP); +-} +- +-static unsigned int frame_state(unsigned long state) +-{ +- return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); +-} +- +-/* +- * Follow the branch starting at the given instruction, and recursively follow +- * any other branches (jumps). Meanwhile, track the frame pointer state at +- * each instruction and validate all the rules described in +- * tools/objtool/Documentation/stack-validation.txt. +- */ +-static int validate_branch(struct objtool_file *file, +- struct instruction *first, unsigned char first_state) +-{ +- struct alternative *alt; +- struct instruction *insn; +- struct section *sec; +- struct symbol *func = NULL; +- unsigned char state; +- int ret; +- +- insn = first; +- sec = insn->sec; +- state = first_state; +- +- if (insn->alt_group && list_empty(&insn->alts)) { +- WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", +- sec, insn->offset); +- return 1; +- } +- +- while (1) { +- if (file->c_file && insn->func) { +- if (func && func != insn->func) { +- WARN("%s() falls through to next function %s()", +- func->name, insn->func->name); +- return 1; +- } +- +- func = insn->func; +- } +- +- if (insn->visited) { +- if (frame_state(insn->state) != frame_state(state)) { +- WARN_FUNC("frame pointer state mismatch", +- sec, insn->offset); +- return 1; +- } +- +- return 0; +- } +- +- insn->visited = true; +- insn->state = state; +- +- list_for_each_entry(alt, &insn->alts, list) { +- ret = validate_branch(file, alt->insn, state); +- if (ret) +- return 1; +- } +- +- switch (insn->type) { +- +- case INSN_FP_SAVE: +- if (!nofp) { +- if (state & STATE_FP_SAVED) { +- WARN_FUNC("duplicate frame pointer save", +- sec, insn->offset); +- return 1; +- } +- state |= STATE_FP_SAVED; +- } +- break; +- +- case INSN_FP_SETUP: +- if (!nofp) { +- if (state & STATE_FP_SETUP) { +- WARN_FUNC("duplicate frame pointer setup", +- sec, insn->offset); +- return 1; +- } +- state |= STATE_FP_SETUP; +- } +- break; +- +- case INSN_FP_RESTORE: +- if (!nofp) { +- if (has_valid_stack_frame(insn)) +- state &= ~STATE_FP_SETUP; +- +- state &= ~STATE_FP_SAVED; +- } +- break; +- +- case INSN_RETURN: +- if (!nofp && has_modified_stack_frame(insn)) { +- WARN_FUNC("return without frame pointer restore", +- sec, insn->offset); +- return 1; +- } +- return 0; +- +- case INSN_CALL: +- if (is_fentry_call(insn)) { +- state |= STATE_FENTRY; +- break; +- } +- +- ret = dead_end_function(file, insn->call_dest); +- if (ret == 1) +- return 0; +- if (ret == -1) +- return 1; +- +- /* fallthrough */ +- case INSN_CALL_DYNAMIC: +- if (!nofp && !has_valid_stack_frame(insn)) { +- WARN_FUNC("call without frame pointer save/setup", +- sec, insn->offset); +- return 1; +- } +- break; +- +- case INSN_JUMP_CONDITIONAL: +- case INSN_JUMP_UNCONDITIONAL: +- if (insn->jump_dest) { +- ret = validate_branch(file, insn->jump_dest, +- state); +- if (ret) +- return 1; +- } else if (has_modified_stack_frame(insn)) { +- WARN_FUNC("sibling call from callable instruction with changed frame pointer", +- sec, insn->offset); +- return 1; +- } /* else it's a sibling call */ +- +- if (insn->type == INSN_JUMP_UNCONDITIONAL) +- return 0; +- +- break; +- +- case INSN_JUMP_DYNAMIC: +- if (list_empty(&insn->alts) && +- has_modified_stack_frame(insn)) { +- WARN_FUNC("sibling call from callable instruction with changed frame pointer", +- sec, insn->offset); +- return 1; +- } +- +- return 0; +- +- default: +- break; +- } +- +- if (insn->dead_end) +- return 0; +- +- insn = next_insn_same_sec(file, insn); +- if (!insn) { +- WARN("%s: unexpected end of section", sec->name); +- return 1; +- } +- } +- +- return 0; +-} +- +-static bool is_kasan_insn(struct instruction *insn) +-{ +- return (insn->type == INSN_CALL && +- !strcmp(insn->call_dest->name, "__asan_handle_no_return")); +-} +- +-static bool is_ubsan_insn(struct instruction *insn) +-{ +- return (insn->type == INSN_CALL && +- !strcmp(insn->call_dest->name, +- "__ubsan_handle_builtin_unreachable")); +-} +- +-static bool ignore_unreachable_insn(struct symbol *func, +- struct instruction *insn) +-{ +- int i; +- +- if (insn->type == INSN_NOP) +- return true; +- +- /* +- * Check if this (or a subsequent) instruction is related to +- * CONFIG_UBSAN or CONFIG_KASAN. +- * +- * End the search at 5 instructions to avoid going into the weeds. +- */ +- for (i = 0; i < 5; i++) { +- +- if (is_kasan_insn(insn) || is_ubsan_insn(insn)) +- return true; +- +- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { +- insn = insn->jump_dest; +- continue; +- } +- +- if (insn->offset + insn->len >= func->offset + func->len) +- break; +- insn = list_next_entry(insn, list); +- } +- +- return false; +-} +- +-static int validate_functions(struct objtool_file *file) +-{ +- struct section *sec; +- struct symbol *func; +- struct instruction *insn; +- int ret, warnings = 0; +- +- list_for_each_entry(sec, &file->elf->sections, list) { +- list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->type != STT_FUNC) +- continue; +- +- insn = find_insn(file, sec, func->offset); +- if (!insn) +- continue; +- +- ret = validate_branch(file, insn, 0); +- warnings += ret; +- } +- } +- +- list_for_each_entry(sec, &file->elf->sections, list) { +- list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->type != STT_FUNC) +- continue; +- +- func_for_each_insn(file, func, insn) { +- if (insn->visited) +- continue; +- +- insn->visited = true; +- +- if (file->ignore_unreachables || warnings || +- ignore_unreachable_insn(func, insn)) +- continue; +- +- /* +- * gcov produces a lot of unreachable +- * instructions. If we get an unreachable +- * warning and the file has gcov enabled, just +- * ignore it, and all other such warnings for +- * the file. +- */ +- if (!file->ignore_unreachables && +- gcov_enabled(file)) { +- file->ignore_unreachables = true; +- continue; +- } +- +- WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); +- warnings++; +- } +- } +- } +- +- return warnings; +-} +- +-static int validate_uncallable_instructions(struct objtool_file *file) +-{ +- struct instruction *insn; +- int warnings = 0; +- +- for_each_insn(file, insn) { +- if (!insn->visited && insn->type == INSN_RETURN) { +- +- /* +- * Don't warn about call instructions in unvisited +- * retpoline alternatives. +- */ +- if (!strcmp(insn->sec->name, ".altinstr_replacement")) +- continue; +- +- WARN_FUNC("return instruction outside of a callable function", +- insn->sec, insn->offset); +- warnings++; +- } +- } +- +- return warnings; +-} +- +-static void cleanup(struct objtool_file *file) +-{ +- struct instruction *insn, *tmpinsn; +- struct alternative *alt, *tmpalt; +- +- list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { +- list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { +- list_del(&alt->list); +- free(alt); +- } +- list_del(&insn->list); +- hash_del(&insn->hash); +- free(insn); +- } +- elf_close(file->elf); +-} +- +-const char * const check_usage[] = { ++static const char * const check_usage[] = { + "objtool check [<options>] file.o", + NULL, + }; + ++const struct option check_options[] = { ++ OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), ++ OPT_END(), ++}; ++ + int cmd_check(int argc, const char **argv) + { +- struct objtool_file file; +- int ret, warnings = 0; ++ const char *objname; + +- const struct option options[] = { +- OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), +- OPT_END(), +- }; +- +- argc = parse_options(argc, argv, options, check_usage, 0); ++ argc = parse_options(argc, argv, check_options, check_usage, 0); + + if (argc != 1) +- usage_with_options(check_usage, options); ++ usage_with_options(check_usage, check_options); + + objname = argv[0]; + +- file.elf = elf_open(objname); +- if (!file.elf) { +- fprintf(stderr, "error reading elf file %s\n", objname); +- return 1; +- } +- +- INIT_LIST_HEAD(&file.insn_list); +- hash_init(file.insn_hash); +- file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); +- file.rodata = find_section_by_name(file.elf, ".rodata"); +- file.ignore_unreachables = false; +- file.c_file = find_section_by_name(file.elf, ".comment"); +- +- ret = decode_sections(&file); +- if (ret < 0) +- goto out; +- warnings += ret; +- +- ret = validate_functions(&file); +- if (ret < 0) +- goto out; +- warnings += ret; +- +- ret = validate_uncallable_instructions(&file); +- if (ret < 0) +- goto out; +- warnings += ret; +- +-out: +- cleanup(&file); +- +- /* ignore warnings for now until we get all the code cleaned up */ +- if (ret || warnings) +- return 0; +- return 0; ++ return check(objname, nofp); + } +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +new file mode 100644 +index 0000000..b7a0af5 +--- /dev/null ++++ b/tools/objtool/check.c +@@ -0,0 +1,1327 @@ ++/* ++ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#include <string.h> ++#include <stdlib.h> ++ ++#include "check.h" ++#include "elf.h" ++#include "special.h" ++#include "arch.h" ++#include "warn.h" ++ ++#include <linux/hashtable.h> ++ ++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) ++ ++#define STATE_FP_SAVED 0x1 ++#define STATE_FP_SETUP 0x2 ++#define STATE_FENTRY 0x4 ++ ++struct alternative { ++ struct list_head list; ++ struct instruction *insn; ++}; ++ ++const char *objname; ++static bool nofp; ++ ++static struct instruction *find_insn(struct objtool_file *file, ++ struct section *sec, unsigned long offset) ++{ ++ struct instruction *insn; ++ ++ hash_for_each_possible(file->insn_hash, insn, hash, offset) ++ if (insn->sec == sec && insn->offset == offset) ++ return insn; ++ ++ return NULL; ++} ++ ++static struct instruction *next_insn_same_sec(struct objtool_file *file, ++ struct instruction *insn) ++{ ++ struct instruction *next = list_next_entry(insn, list); ++ ++ if (&next->list == &file->insn_list || next->sec != insn->sec) ++ return NULL; ++ ++ return next; ++} ++ ++static bool gcov_enabled(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct symbol *sym; ++ ++ list_for_each_entry(sec, &file->elf->sections, list) ++ list_for_each_entry(sym, &sec->symbol_list, list) ++ if (!strncmp(sym->name, "__gcov_.", 8)) ++ return true; ++ ++ return false; ++} ++ ++#define for_each_insn(file, insn) \ ++ list_for_each_entry(insn, &file->insn_list, list) ++ ++#define func_for_each_insn(file, func, insn) \ ++ for (insn = find_insn(file, func->sec, func->offset); \ ++ insn && &insn->list != &file->insn_list && \ ++ insn->sec == func->sec && \ ++ insn->offset < func->offset + func->len; \ ++ insn = list_next_entry(insn, list)) ++ ++#define func_for_each_insn_continue_reverse(file, func, insn) \ ++ for (insn = list_prev_entry(insn, list); \ ++ &insn->list != &file->insn_list && \ ++ insn->sec == func->sec && insn->offset >= func->offset; \ ++ insn = list_prev_entry(insn, list)) ++ ++#define sec_for_each_insn_from(file, insn) \ ++ for (; insn; insn = next_insn_same_sec(file, insn)) ++ ++ ++/* ++ * Check if the function has been manually whitelisted with the ++ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted ++ * due to its use of a context switching instruction. ++ */ ++static bool ignore_func(struct objtool_file *file, struct symbol *func) ++{ ++ struct rela *rela; ++ struct instruction *insn; ++ ++ /* check for STACK_FRAME_NON_STANDARD */ ++ if (file->whitelist && file->whitelist->rela) ++ list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) { ++ if (rela->sym->type == STT_SECTION && ++ rela->sym->sec == func->sec && ++ rela->addend == func->offset) ++ return true; ++ if (rela->sym->type == STT_FUNC && rela->sym == func) ++ return true; ++ } ++ ++ /* check if it has a context switching instruction */ ++ func_for_each_insn(file, func, insn) ++ if (insn->type == INSN_CONTEXT_SWITCH) ++ return true; ++ ++ return false; ++} ++ ++/* ++ * This checks to see if the given function is a "noreturn" function. ++ * ++ * For global functions which are outside the scope of this object file, we ++ * have to keep a manual list of them. ++ * ++ * For local functions, we have to detect them manually by simply looking for ++ * the lack of a return instruction. ++ * ++ * Returns: ++ * -1: error ++ * 0: no dead end ++ * 1: dead end ++ */ ++static int __dead_end_function(struct objtool_file *file, struct symbol *func, ++ int recursion) ++{ ++ int i; ++ struct instruction *insn; ++ bool empty = true; ++ ++ /* ++ * Unfortunately these have to be hard coded because the noreturn ++ * attribute isn't provided in ELF data. ++ */ ++ static const char * const global_noreturns[] = { ++ "__stack_chk_fail", ++ "panic", ++ "do_exit", ++ "do_task_dead", ++ "__module_put_and_exit", ++ "complete_and_exit", ++ "kvm_spurious_fault", ++ "__reiserfs_panic", ++ "lbug_with_loc" ++ }; ++ ++ if (func->bind == STB_WEAK) ++ return 0; ++ ++ if (func->bind == STB_GLOBAL) ++ for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) ++ if (!strcmp(func->name, global_noreturns[i])) ++ return 1; ++ ++ if (!func->sec) ++ return 0; ++ ++ func_for_each_insn(file, func, insn) { ++ empty = false; ++ ++ if (insn->type == INSN_RETURN) ++ return 0; ++ } ++ ++ if (empty) ++ return 0; ++ ++ /* ++ * A function can have a sibling call instead of a return. In that ++ * case, the function's dead-end status depends on whether the target ++ * of the sibling call returns. ++ */ ++ func_for_each_insn(file, func, insn) { ++ if (insn->sec != func->sec || ++ insn->offset >= func->offset + func->len) ++ break; ++ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL) { ++ struct instruction *dest = insn->jump_dest; ++ struct symbol *dest_func; ++ ++ if (!dest) ++ /* sibling call to another file */ ++ return 0; ++ ++ if (dest->sec != func->sec || ++ dest->offset < func->offset || ++ dest->offset >= func->offset + func->len) { ++ /* local sibling call */ ++ dest_func = find_symbol_by_offset(dest->sec, ++ dest->offset); ++ if (!dest_func) ++ continue; ++ ++ if (recursion == 5) { ++ WARN_FUNC("infinite recursion (objtool bug!)", ++ dest->sec, dest->offset); ++ return -1; ++ } ++ ++ return __dead_end_function(file, dest_func, ++ recursion + 1); ++ } ++ } ++ ++ if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) ++ /* sibling call */ ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int dead_end_function(struct objtool_file *file, struct symbol *func) ++{ ++ return __dead_end_function(file, func, 0); ++} ++ ++/* ++ * Call the arch-specific instruction decoder for all the instructions and add ++ * them to the global instruction list. ++ */ ++static int decode_instructions(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct symbol *func; ++ unsigned long offset; ++ struct instruction *insn; ++ int ret; ++ ++ list_for_each_entry(sec, &file->elf->sections, list) { ++ ++ if (!(sec->sh.sh_flags & SHF_EXECINSTR)) ++ continue; ++ ++ for (offset = 0; offset < sec->len; offset += insn->len) { ++ insn = malloc(sizeof(*insn)); ++ memset(insn, 0, sizeof(*insn)); ++ ++ INIT_LIST_HEAD(&insn->alts); ++ insn->sec = sec; ++ insn->offset = offset; ++ ++ ret = arch_decode_instruction(file->elf, sec, offset, ++ sec->len - offset, ++ &insn->len, &insn->type, ++ &insn->immediate); ++ if (ret) ++ return ret; ++ ++ if (!insn->type || insn->type > INSN_LAST) { ++ WARN_FUNC("invalid instruction type %d", ++ insn->sec, insn->offset, insn->type); ++ return -1; ++ } ++ ++ hash_add(file->insn_hash, &insn->hash, insn->offset); ++ list_add_tail(&insn->list, &file->insn_list); ++ } ++ ++ list_for_each_entry(func, &sec->symbol_list, list) { ++ if (func->type != STT_FUNC) ++ continue; ++ ++ if (!find_insn(file, sec, func->offset)) { ++ WARN("%s(): can't find starting instruction", ++ func->name); ++ return -1; ++ } ++ ++ func_for_each_insn(file, func, insn) ++ if (!insn->func) ++ insn->func = func; ++ } ++ } ++ ++ return 0; ++} ++ ++/* ++ * Find all uses of the unreachable() macro, which are code path dead ends. ++ */ ++static int add_dead_ends(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct rela *rela; ++ struct instruction *insn; ++ bool found; ++ ++ sec = find_section_by_name(file->elf, ".rela__unreachable"); ++ if (!sec) ++ return 0; ++ ++ list_for_each_entry(rela, &sec->rela_list, list) { ++ if (rela->sym->type != STT_SECTION) { ++ WARN("unexpected relocation symbol type in .rela__unreachable"); ++ return -1; ++ } ++ insn = find_insn(file, rela->sym->sec, rela->addend); ++ if (insn) ++ insn = list_prev_entry(insn, list); ++ else if (rela->addend == rela->sym->sec->len) { ++ found = false; ++ list_for_each_entry_reverse(insn, &file->insn_list, list) { ++ if (insn->sec == rela->sym->sec) { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) { ++ WARN("can't find unreachable insn at %s+0x%x", ++ rela->sym->sec->name, rela->addend); ++ return -1; ++ } ++ } else { ++ WARN("can't find unreachable insn at %s+0x%x", ++ rela->sym->sec->name, rela->addend); ++ return -1; ++ } ++ ++ insn->dead_end = true; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Warnings shouldn't be reported for ignored functions. ++ */ ++static void add_ignores(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct section *sec; ++ struct symbol *func; ++ ++ list_for_each_entry(sec, &file->elf->sections, list) { ++ list_for_each_entry(func, &sec->symbol_list, list) { ++ if (func->type != STT_FUNC) ++ continue; ++ ++ if (!ignore_func(file, func)) ++ continue; ++ ++ func_for_each_insn(file, func, insn) ++ insn->visited = true; ++ } ++ } ++} ++ ++/* ++ * FIXME: For now, just ignore any alternatives which add retpolines. This is ++ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. ++ * But it at least allows objtool to understand the control flow *around* the ++ * retpoline. ++ */ ++static int add_nospec_ignores(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct rela *rela; ++ struct instruction *insn; ++ ++ sec = find_section_by_name(file->elf, ".rela.discard.nospec"); ++ if (!sec) ++ return 0; ++ ++ list_for_each_entry(rela, &sec->rela_list, list) { ++ if (rela->sym->type != STT_SECTION) { ++ WARN("unexpected relocation symbol type in %s", sec->name); ++ return -1; ++ } ++ ++ insn = find_insn(file, rela->sym->sec, rela->addend); ++ if (!insn) { ++ WARN("bad .discard.nospec entry"); ++ return -1; ++ } ++ ++ insn->ignore_alts = true; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Find the destination instructions for all jumps. ++ */ ++static int add_jump_destinations(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct rela *rela; ++ struct section *dest_sec; ++ unsigned long dest_off; ++ ++ for_each_insn(file, insn) { ++ if (insn->type != INSN_JUMP_CONDITIONAL && ++ insn->type != INSN_JUMP_UNCONDITIONAL) ++ continue; ++ ++ /* skip ignores */ ++ if (insn->visited) ++ continue; ++ ++ rela = find_rela_by_dest_range(insn->sec, insn->offset, ++ insn->len); ++ if (!rela) { ++ dest_sec = insn->sec; ++ dest_off = insn->offset + insn->len + insn->immediate; ++ } else if (rela->sym->type == STT_SECTION) { ++ dest_sec = rela->sym->sec; ++ dest_off = rela->addend + 4; ++ } else if (rela->sym->sec->idx) { ++ dest_sec = rela->sym->sec; ++ dest_off = rela->sym->sym.st_value + rela->addend + 4; ++ } else if (strstr(rela->sym->name, "_indirect_thunk_")) { ++ /* ++ * Retpoline jumps are really dynamic jumps in ++ * disguise, so convert them accordingly. ++ */ ++ insn->type = INSN_JUMP_DYNAMIC; ++ continue; ++ } else { ++ /* sibling call */ ++ insn->jump_dest = 0; ++ continue; ++ } ++ ++ insn->jump_dest = find_insn(file, dest_sec, dest_off); ++ if (!insn->jump_dest) { ++ ++ /* ++ * This is a special case where an alt instruction ++ * jumps past the end of the section. These are ++ * handled later in handle_group_alt(). ++ */ ++ if (!strcmp(insn->sec->name, ".altinstr_replacement")) ++ continue; ++ ++ WARN_FUNC("can't find jump dest instruction at %s+0x%lx", ++ insn->sec, insn->offset, dest_sec->name, ++ dest_off); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ ++/* ++ * Find the destination instructions for all calls. ++ */ ++static int add_call_destinations(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ unsigned long dest_off; ++ struct rela *rela; ++ ++ for_each_insn(file, insn) { ++ if (insn->type != INSN_CALL) ++ continue; ++ ++ rela = find_rela_by_dest_range(insn->sec, insn->offset, ++ insn->len); ++ if (!rela) { ++ dest_off = insn->offset + insn->len + insn->immediate; ++ insn->call_dest = find_symbol_by_offset(insn->sec, ++ dest_off); ++ /* ++ * FIXME: Thanks to retpolines, it's now considered ++ * normal for a function to call within itself. So ++ * disable this warning for now. ++ */ ++#if 0 ++ if (!insn->call_dest) { ++ WARN_FUNC("can't find call dest symbol at offset 0x%lx", ++ insn->sec, insn->offset, dest_off); ++ return -1; ++ } ++#endif ++ } else if (rela->sym->type == STT_SECTION) { ++ insn->call_dest = find_symbol_by_offset(rela->sym->sec, ++ rela->addend+4); ++ if (!insn->call_dest || ++ insn->call_dest->type != STT_FUNC) { ++ WARN_FUNC("can't find call dest symbol at %s+0x%x", ++ insn->sec, insn->offset, ++ rela->sym->sec->name, ++ rela->addend + 4); ++ return -1; ++ } ++ } else ++ insn->call_dest = rela->sym; ++ } ++ ++ return 0; ++} ++ ++/* ++ * The .alternatives section requires some extra special care, over and above ++ * what other special sections require: ++ * ++ * 1. Because alternatives are patched in-place, we need to insert a fake jump ++ * instruction at the end so that validate_branch() skips all the original ++ * replaced instructions when validating the new instruction path. ++ * ++ * 2. An added wrinkle is that the new instruction length might be zero. In ++ * that case the old instructions are replaced with noops. We simulate that ++ * by creating a fake jump as the only new instruction. ++ * ++ * 3. In some cases, the alternative section includes an instruction which ++ * conditionally jumps to the _end_ of the entry. We have to modify these ++ * jumps' destinations to point back to .text rather than the end of the ++ * entry in .altinstr_replacement. ++ * ++ * 4. It has been requested that we don't validate the !POPCNT feature path ++ * which is a "very very small percentage of machines". ++ */ ++static int handle_group_alt(struct objtool_file *file, ++ struct special_alt *special_alt, ++ struct instruction *orig_insn, ++ struct instruction **new_insn) ++{ ++ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; ++ unsigned long dest_off; ++ ++ last_orig_insn = NULL; ++ insn = orig_insn; ++ sec_for_each_insn_from(file, insn) { ++ if (insn->offset >= special_alt->orig_off + special_alt->orig_len) ++ break; ++ ++ if (special_alt->skip_orig) ++ insn->type = INSN_NOP; ++ ++ insn->alt_group = true; ++ last_orig_insn = insn; ++ } ++ ++ if (!next_insn_same_sec(file, last_orig_insn)) { ++ WARN("%s: don't know how to handle alternatives at end of section", ++ special_alt->orig_sec->name); ++ return -1; ++ } ++ ++ fake_jump = malloc(sizeof(*fake_jump)); ++ if (!fake_jump) { ++ WARN("malloc failed"); ++ return -1; ++ } ++ memset(fake_jump, 0, sizeof(*fake_jump)); ++ INIT_LIST_HEAD(&fake_jump->alts); ++ fake_jump->sec = special_alt->new_sec; ++ fake_jump->offset = -1; ++ fake_jump->type = INSN_JUMP_UNCONDITIONAL; ++ fake_jump->jump_dest = list_next_entry(last_orig_insn, list); ++ ++ if (!special_alt->new_len) { ++ *new_insn = fake_jump; ++ return 0; ++ } ++ ++ last_new_insn = NULL; ++ insn = *new_insn; ++ sec_for_each_insn_from(file, insn) { ++ if (insn->offset >= special_alt->new_off + special_alt->new_len) ++ break; ++ ++ last_new_insn = insn; ++ ++ if (insn->type != INSN_JUMP_CONDITIONAL && ++ insn->type != INSN_JUMP_UNCONDITIONAL) ++ continue; ++ ++ if (!insn->immediate) ++ continue; ++ ++ dest_off = insn->offset + insn->len + insn->immediate; ++ if (dest_off == special_alt->new_off + special_alt->new_len) ++ insn->jump_dest = fake_jump; ++ ++ if (!insn->jump_dest) { ++ WARN_FUNC("can't find alternative jump destination", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ } ++ ++ if (!last_new_insn) { ++ WARN_FUNC("can't find last new alternative instruction", ++ special_alt->new_sec, special_alt->new_off); ++ return -1; ++ } ++ ++ list_add(&fake_jump->list, &last_new_insn->list); ++ ++ return 0; ++} ++ ++/* ++ * A jump table entry can either convert a nop to a jump or a jump to a nop. ++ * If the original instruction is a jump, make the alt entry an effective nop ++ * by just skipping the original instruction. ++ */ ++static int handle_jump_alt(struct objtool_file *file, ++ struct special_alt *special_alt, ++ struct instruction *orig_insn, ++ struct instruction **new_insn) ++{ ++ if (orig_insn->type == INSN_NOP) ++ return 0; ++ ++ if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { ++ WARN_FUNC("unsupported instruction at jump label", ++ orig_insn->sec, orig_insn->offset); ++ return -1; ++ } ++ ++ *new_insn = list_next_entry(orig_insn, list); ++ return 0; ++} ++ ++/* ++ * Read all the special sections which have alternate instructions which can be ++ * patched in or redirected to at runtime. Each instruction having alternate ++ * instruction(s) has them added to its insn->alts list, which will be ++ * traversed in validate_branch(). ++ */ ++static int add_special_section_alts(struct objtool_file *file) ++{ ++ struct list_head special_alts; ++ struct instruction *orig_insn, *new_insn; ++ struct special_alt *special_alt, *tmp; ++ struct alternative *alt; ++ int ret; ++ ++ ret = special_get_alts(file->elf, &special_alts); ++ if (ret) ++ return ret; ++ ++ list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { ++ orig_insn = find_insn(file, special_alt->orig_sec, ++ special_alt->orig_off); ++ if (!orig_insn) { ++ WARN_FUNC("special: can't find orig instruction", ++ special_alt->orig_sec, special_alt->orig_off); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Ignore retpoline alternatives. */ ++ if (orig_insn->ignore_alts) ++ continue; ++ ++ new_insn = NULL; ++ if (!special_alt->group || special_alt->new_len) { ++ new_insn = find_insn(file, special_alt->new_sec, ++ special_alt->new_off); ++ if (!new_insn) { ++ WARN_FUNC("special: can't find new instruction", ++ special_alt->new_sec, ++ special_alt->new_off); ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ if (special_alt->group) { ++ ret = handle_group_alt(file, special_alt, orig_insn, ++ &new_insn); ++ if (ret) ++ goto out; ++ } else if (special_alt->jump_or_nop) { ++ ret = handle_jump_alt(file, special_alt, orig_insn, ++ &new_insn); ++ if (ret) ++ goto out; ++ } ++ ++ alt = malloc(sizeof(*alt)); ++ if (!alt) { ++ WARN("malloc failed"); ++ ret = -1; ++ goto out; ++ } ++ ++ alt->insn = new_insn; ++ list_add_tail(&alt->list, &orig_insn->alts); ++ ++ list_del(&special_alt->list); ++ free(special_alt); ++ } ++ ++out: ++ return ret; ++} ++ ++static int add_switch_table(struct objtool_file *file, struct symbol *func, ++ struct instruction *insn, struct rela *table, ++ struct rela *next_table) ++{ ++ struct rela *rela = table; ++ struct instruction *alt_insn; ++ struct alternative *alt; ++ ++ list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { ++ if (rela == next_table) ++ break; ++ ++ if (rela->sym->sec != insn->sec || ++ rela->addend <= func->offset || ++ rela->addend >= func->offset + func->len) ++ break; ++ ++ alt_insn = find_insn(file, insn->sec, rela->addend); ++ if (!alt_insn) { ++ WARN("%s: can't find instruction at %s+0x%x", ++ file->rodata->rela->name, insn->sec->name, ++ rela->addend); ++ return -1; ++ } ++ ++ alt = malloc(sizeof(*alt)); ++ if (!alt) { ++ WARN("malloc failed"); ++ return -1; ++ } ++ ++ alt->insn = alt_insn; ++ list_add_tail(&alt->list, &insn->alts); ++ } ++ ++ return 0; ++} ++ ++/* ++ * find_switch_table() - Given a dynamic jump, find the switch jump table in ++ * .rodata associated with it. ++ * ++ * There are 3 basic patterns: ++ * ++ * 1. jmpq *[rodata addr](,%reg,8) ++ * ++ * This is the most common case by far. It jumps to an address in a simple ++ * jump table which is stored in .rodata. ++ * ++ * 2. jmpq *[rodata addr](%rip) ++ * ++ * This is caused by a rare GCC quirk, currently only seen in three driver ++ * functions in the kernel, only with certain obscure non-distro configs. ++ * ++ * As part of an optimization, GCC makes a copy of an existing switch jump ++ * table, modifies it, and then hard-codes the jump (albeit with an indirect ++ * jump) to use a single entry in the table. The rest of the jump table and ++ * some of its jump targets remain as dead code. ++ * ++ * In such a case we can just crudely ignore all unreachable instruction ++ * warnings for the entire object file. Ideally we would just ignore them ++ * for the function, but that would require redesigning the code quite a ++ * bit. And honestly that's just not worth doing: unreachable instruction ++ * warnings are of questionable value anyway, and this is such a rare issue. ++ * ++ * 3. mov [rodata addr],%reg1 ++ * ... some instructions ... ++ * jmpq *(%reg1,%reg2,8) ++ * ++ * This is a fairly uncommon pattern which is new for GCC 6. As of this ++ * writing, there are 11 occurrences of it in the allmodconfig kernel. ++ * ++ * TODO: Once we have DWARF CFI and smarter instruction decoding logic, ++ * ensure the same register is used in the mov and jump instructions. ++ */ ++static struct rela *find_switch_table(struct objtool_file *file, ++ struct symbol *func, ++ struct instruction *insn) ++{ ++ struct rela *text_rela, *rodata_rela; ++ struct instruction *orig_insn = insn; ++ ++ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); ++ if (text_rela && text_rela->sym == file->rodata->sym) { ++ /* case 1 */ ++ rodata_rela = find_rela_by_dest(file->rodata, ++ text_rela->addend); ++ if (rodata_rela) ++ return rodata_rela; ++ ++ /* case 2 */ ++ rodata_rela = find_rela_by_dest(file->rodata, ++ text_rela->addend + 4); ++ if (!rodata_rela) ++ return NULL; ++ file->ignore_unreachables = true; ++ return rodata_rela; ++ } ++ ++ /* case 3 */ ++ func_for_each_insn_continue_reverse(file, func, insn) { ++ if (insn->type == INSN_JUMP_DYNAMIC) ++ break; ++ ++ /* allow small jumps within the range */ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL && ++ insn->jump_dest && ++ (insn->jump_dest->offset <= insn->offset || ++ insn->jump_dest->offset > orig_insn->offset)) ++ break; ++ ++ /* look for a relocation which references .rodata */ ++ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, ++ insn->len); ++ if (!text_rela || text_rela->sym != file->rodata->sym) ++ continue; ++ ++ /* ++ * Make sure the .rodata address isn't associated with a ++ * symbol. gcc jump tables are anonymous data. ++ */ ++ if (find_symbol_containing(file->rodata, text_rela->addend)) ++ continue; ++ ++ return find_rela_by_dest(file->rodata, text_rela->addend); ++ } ++ ++ return NULL; ++} ++ ++static int add_func_switch_tables(struct objtool_file *file, ++ struct symbol *func) ++{ ++ struct instruction *insn, *prev_jump = NULL; ++ struct rela *rela, *prev_rela = NULL; ++ int ret; ++ ++ func_for_each_insn(file, func, insn) { ++ if (insn->type != INSN_JUMP_DYNAMIC) ++ continue; ++ ++ rela = find_switch_table(file, func, insn); ++ if (!rela) ++ continue; ++ ++ /* ++ * We found a switch table, but we don't know yet how big it ++ * is. Don't add it until we reach the end of the function or ++ * the beginning of another switch table in the same function. ++ */ ++ if (prev_jump) { ++ ret = add_switch_table(file, func, prev_jump, prev_rela, ++ rela); ++ if (ret) ++ return ret; ++ } ++ ++ prev_jump = insn; ++ prev_rela = rela; ++ } ++ ++ if (prev_jump) { ++ ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/* ++ * For some switch statements, gcc generates a jump table in the .rodata ++ * section which contains a list of addresses within the function to jump to. ++ * This finds these jump tables and adds them to the insn->alts lists. ++ */ ++static int add_switch_table_alts(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct symbol *func; ++ int ret; ++ ++ if (!file->rodata || !file->rodata->rela) ++ return 0; ++ ++ list_for_each_entry(sec, &file->elf->sections, list) { ++ list_for_each_entry(func, &sec->symbol_list, list) { ++ if (func->type != STT_FUNC) ++ continue; ++ ++ ret = add_func_switch_tables(file, func); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int decode_sections(struct objtool_file *file) ++{ ++ int ret; ++ ++ ret = decode_instructions(file); ++ if (ret) ++ return ret; ++ ++ ret = add_dead_ends(file); ++ if (ret) ++ return ret; ++ ++ add_ignores(file); ++ ++ ret = add_nospec_ignores(file); ++ if (ret) ++ return ret; ++ ++ ret = add_jump_destinations(file); ++ if (ret) ++ return ret; ++ ++ ret = add_call_destinations(file); ++ if (ret) ++ return ret; ++ ++ ret = add_special_section_alts(file); ++ if (ret) ++ return ret; ++ ++ ret = add_switch_table_alts(file); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ ++static bool is_fentry_call(struct instruction *insn) ++{ ++ if (insn->type == INSN_CALL && ++ insn->call_dest->type == STT_NOTYPE && ++ !strcmp(insn->call_dest->name, "__fentry__")) ++ return true; ++ ++ return false; ++} ++ ++static bool has_modified_stack_frame(struct instruction *insn) ++{ ++ return (insn->state & STATE_FP_SAVED) || ++ (insn->state & STATE_FP_SETUP); ++} ++ ++static bool has_valid_stack_frame(struct instruction *insn) ++{ ++ return (insn->state & STATE_FP_SAVED) && ++ (insn->state & STATE_FP_SETUP); ++} ++ ++static unsigned int frame_state(unsigned long state) ++{ ++ return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); ++} ++ ++/* ++ * Follow the branch starting at the given instruction, and recursively follow ++ * any other branches (jumps). Meanwhile, track the frame pointer state at ++ * each instruction and validate all the rules described in ++ * tools/objtool/Documentation/stack-validation.txt. ++ */ ++static int validate_branch(struct objtool_file *file, ++ struct instruction *first, unsigned char first_state) ++{ ++ struct alternative *alt; ++ struct instruction *insn; ++ struct section *sec; ++ struct symbol *func = NULL; ++ unsigned char state; ++ int ret; ++ ++ insn = first; ++ sec = insn->sec; ++ state = first_state; ++ ++ if (insn->alt_group && list_empty(&insn->alts)) { ++ WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ while (1) { ++ if (file->c_file && insn->func) { ++ if (func && func != insn->func) { ++ WARN("%s() falls through to next function %s()", ++ func->name, insn->func->name); ++ return 1; ++ } ++ ++ func = insn->func; ++ } ++ ++ if (insn->visited) { ++ if (frame_state(insn->state) != frame_state(state)) { ++ WARN_FUNC("frame pointer state mismatch", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ return 0; ++ } ++ ++ insn->visited = true; ++ insn->state = state; ++ ++ list_for_each_entry(alt, &insn->alts, list) { ++ ret = validate_branch(file, alt->insn, state); ++ if (ret) ++ return 1; ++ } ++ ++ switch (insn->type) { ++ ++ case INSN_FP_SAVE: ++ if (!nofp) { ++ if (state & STATE_FP_SAVED) { ++ WARN_FUNC("duplicate frame pointer save", ++ sec, insn->offset); ++ return 1; ++ } ++ state |= STATE_FP_SAVED; ++ } ++ break; ++ ++ case INSN_FP_SETUP: ++ if (!nofp) { ++ if (state & STATE_FP_SETUP) { ++ WARN_FUNC("duplicate frame pointer setup", ++ sec, insn->offset); ++ return 1; ++ } ++ state |= STATE_FP_SETUP; ++ } ++ break; ++ ++ case INSN_FP_RESTORE: ++ if (!nofp) { ++ if (has_valid_stack_frame(insn)) ++ state &= ~STATE_FP_SETUP; ++ ++ state &= ~STATE_FP_SAVED; ++ } ++ break; ++ ++ case INSN_RETURN: ++ if (!nofp && has_modified_stack_frame(insn)) { ++ WARN_FUNC("return without frame pointer restore", ++ sec, insn->offset); ++ return 1; ++ } ++ return 0; ++ ++ case INSN_CALL: ++ if (is_fentry_call(insn)) { ++ state |= STATE_FENTRY; ++ break; ++ } ++ ++ ret = dead_end_function(file, insn->call_dest); ++ if (ret == 1) ++ return 0; ++ if (ret == -1) ++ return 1; ++ ++ /* fallthrough */ ++ case INSN_CALL_DYNAMIC: ++ if (!nofp && !has_valid_stack_frame(insn)) { ++ WARN_FUNC("call without frame pointer save/setup", ++ sec, insn->offset); ++ return 1; ++ } ++ break; ++ ++ case INSN_JUMP_CONDITIONAL: ++ case INSN_JUMP_UNCONDITIONAL: ++ if (insn->jump_dest) { ++ ret = validate_branch(file, insn->jump_dest, ++ state); ++ if (ret) ++ return 1; ++ } else if (has_modified_stack_frame(insn)) { ++ WARN_FUNC("sibling call from callable instruction with changed frame pointer", ++ sec, insn->offset); ++ return 1; ++ } /* else it's a sibling call */ ++ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL) ++ return 0; ++ ++ break; ++ ++ case INSN_JUMP_DYNAMIC: ++ if (list_empty(&insn->alts) && ++ has_modified_stack_frame(insn)) { ++ WARN_FUNC("sibling call from callable instruction with changed frame pointer", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ return 0; ++ ++ default: ++ break; ++ } ++ ++ if (insn->dead_end) ++ return 0; ++ ++ insn = next_insn_same_sec(file, insn); ++ if (!insn) { ++ WARN("%s: unexpected end of section", sec->name); ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++static bool is_kasan_insn(struct instruction *insn) ++{ ++ return (insn->type == INSN_CALL && ++ !strcmp(insn->call_dest->name, "__asan_handle_no_return")); ++} ++ ++static bool is_ubsan_insn(struct instruction *insn) ++{ ++ return (insn->type == INSN_CALL && ++ !strcmp(insn->call_dest->name, ++ "__ubsan_handle_builtin_unreachable")); ++} ++ ++static bool ignore_unreachable_insn(struct symbol *func, ++ struct instruction *insn) ++{ ++ int i; ++ ++ if (insn->type == INSN_NOP) ++ return true; ++ ++ /* ++ * Check if this (or a subsequent) instruction is related to ++ * CONFIG_UBSAN or CONFIG_KASAN. ++ * ++ * End the search at 5 instructions to avoid going into the weeds. ++ */ ++ for (i = 0; i < 5; i++) { ++ ++ if (is_kasan_insn(insn) || is_ubsan_insn(insn)) ++ return true; ++ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { ++ insn = insn->jump_dest; ++ continue; ++ } ++ ++ if (insn->offset + insn->len >= func->offset + func->len) ++ break; ++ insn = list_next_entry(insn, list); ++ } ++ ++ return false; ++} ++ ++static int validate_functions(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct symbol *func; ++ struct instruction *insn; ++ int ret, warnings = 0; ++ ++ list_for_each_entry(sec, &file->elf->sections, list) { ++ list_for_each_entry(func, &sec->symbol_list, list) { ++ if (func->type != STT_FUNC) ++ continue; ++ ++ insn = find_insn(file, sec, func->offset); ++ if (!insn) ++ continue; ++ ++ ret = validate_branch(file, insn, 0); ++ warnings += ret; ++ } ++ } ++ ++ list_for_each_entry(sec, &file->elf->sections, list) { ++ list_for_each_entry(func, &sec->symbol_list, list) { ++ if (func->type != STT_FUNC) ++ continue; ++ ++ func_for_each_insn(file, func, insn) { ++ if (insn->visited) ++ continue; ++ ++ insn->visited = true; ++ ++ if (file->ignore_unreachables || warnings || ++ ignore_unreachable_insn(func, insn)) ++ continue; ++ ++ /* ++ * gcov produces a lot of unreachable ++ * instructions. If we get an unreachable ++ * warning and the file has gcov enabled, just ++ * ignore it, and all other such warnings for ++ * the file. ++ */ ++ if (!file->ignore_unreachables && ++ gcov_enabled(file)) { ++ file->ignore_unreachables = true; ++ continue; ++ } ++ ++ WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); ++ warnings++; ++ } ++ } ++ } ++ ++ return warnings; ++} ++ ++static int validate_uncallable_instructions(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ int warnings = 0; ++ ++ for_each_insn(file, insn) { ++ if (!insn->visited && insn->type == INSN_RETURN) { ++ ++ /* ++ * Don't warn about call instructions in unvisited ++ * retpoline alternatives. ++ */ ++ if (!strcmp(insn->sec->name, ".altinstr_replacement")) ++ continue; ++ ++ WARN_FUNC("return instruction outside of a callable function", ++ insn->sec, insn->offset); ++ warnings++; ++ } ++ } ++ ++ return warnings; ++} ++ ++static void cleanup(struct objtool_file *file) ++{ ++ struct instruction *insn, *tmpinsn; ++ struct alternative *alt, *tmpalt; ++ ++ list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { ++ list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { ++ list_del(&alt->list); ++ free(alt); ++ } ++ list_del(&insn->list); ++ hash_del(&insn->hash); ++ free(insn); ++ } ++ elf_close(file->elf); ++} ++ ++int check(const char *_objname, bool _nofp) ++{ ++ struct objtool_file file; ++ int ret, warnings = 0; ++ ++ objname = _objname; ++ nofp = _nofp; ++ ++ file.elf = elf_open(objname); ++ if (!file.elf) { ++ fprintf(stderr, "error reading elf file %s\n", objname); ++ return 1; ++ } ++ ++ INIT_LIST_HEAD(&file.insn_list); ++ hash_init(file.insn_hash); ++ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); ++ file.rodata = find_section_by_name(file.elf, ".rodata"); ++ file.ignore_unreachables = false; ++ file.c_file = find_section_by_name(file.elf, ".comment"); ++ ++ ret = decode_sections(&file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; ++ ++ ret = validate_functions(&file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; ++ ++ ret = validate_uncallable_instructions(&file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; ++ ++out: ++ cleanup(&file); ++ ++ /* ignore warnings for now until we get all the code cleaned up */ ++ if (ret || warnings) ++ return 0; ++ return 0; ++} +diff --git a/tools/objtool/check.h b/tools/objtool/check.h +new file mode 100644 +index 0000000..aca248a +--- /dev/null ++++ b/tools/objtool/check.h +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#ifndef _CHECK_H ++#define _CHECK_H ++ ++#include <stdbool.h> ++#include "elf.h" ++#include "arch.h" ++#include <linux/hashtable.h> ++ ++struct instruction { ++ struct list_head list; ++ struct hlist_node hash; ++ struct section *sec; ++ unsigned long offset; ++ unsigned int len, state; ++ unsigned char type; ++ unsigned long immediate; ++ bool alt_group, visited, dead_end, ignore_alts; ++ struct symbol *call_dest; ++ struct instruction *jump_dest; ++ struct list_head alts; ++ struct symbol *func; ++}; ++ ++struct objtool_file { ++ struct elf *elf; ++ struct list_head insn_list; ++ DECLARE_HASHTABLE(insn_hash, 16); ++ struct section *rodata, *whitelist; ++ bool ignore_unreachables, c_file; ++}; ++ ++int check(const char *objname, bool nofp); ++ ++#endif /* _CHECK_H */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch new file mode 100644 index 00000000..318297bf --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch @@ -0,0 +1,9906 @@ +From 0706298ca42f992d0c1afb93c8d6710d15f88ccb Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Date: Sun, 3 Jun 2018 12:35:15 +0200 +Subject: [PATCH 88/93] objtool: sync up with the 4.14.47 version of objtool + +There are pros and cons of dealing with tools in the kernel directory. +The pros are the fact that development happens fast, and new features +can be added to the kernel and the tools at the same times. The cons +are when dealing with backported kernel patches, it can be necessary to +backport parts of the tool changes as well. + +For 4.9.y so far, we have backported individual patches. That quickly +breaks down when there are minor differences between how backports were +handled, so grabbing 40+ patch long series can be difficult, not +impossible, but really frustrating to attempt. + +To help mitigate this mess, here's a single big patch to sync up the +objtool logic to the 4.14.47 version of the tool. From this point +forward (after some other minor header file patches are applied), the +tool should be in sync and much easier to maintain over time. + +This has survivied my limited testing, and as the codebase is identical +to 4.14.47, I'm pretty comfortable dropping this big change in here in +4.9.y. Hopefully all goes well... + +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/orc_types.h | 107 ++ + arch/x86/include/asm/unwind_hints.h | 103 ++ + tools/objtool/Build | 3 + + tools/objtool/Documentation/stack-validation.txt | 195 ++- + tools/objtool/Makefile | 35 +- + tools/objtool/arch.h | 65 +- + tools/objtool/arch/x86/Build | 10 +- + tools/objtool/arch/x86/decode.c | 408 +++++- + tools/objtool/arch/x86/include/asm/inat.h | 244 ++++ + tools/objtool/arch/x86/include/asm/inat_types.h | 29 + + tools/objtool/arch/x86/include/asm/insn.h | 211 ++++ + tools/objtool/arch/x86/include/asm/orc_types.h | 107 ++ + tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk | 392 ------ + tools/objtool/arch/x86/insn/inat.c | 97 -- + tools/objtool/arch/x86/insn/inat.h | 234 ---- + tools/objtool/arch/x86/insn/inat_types.h | 29 - + tools/objtool/arch/x86/insn/insn.c | 606 --------- + tools/objtool/arch/x86/insn/insn.h | 211 ---- + tools/objtool/arch/x86/insn/x86-opcode-map.txt | 1063 ---------------- + tools/objtool/arch/x86/lib/inat.c | 97 ++ + tools/objtool/arch/x86/lib/insn.c | 606 +++++++++ + tools/objtool/arch/x86/lib/x86-opcode-map.txt | 1072 ++++++++++++++++ + tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk | 393 ++++++ + tools/objtool/builtin-check.c | 9 +- + tools/objtool/builtin-orc.c | 68 + + tools/objtool/builtin.h | 6 + + tools/objtool/cfi.h | 55 + + tools/objtool/check.c | 1329 ++++++++++++++++---- + tools/objtool/check.h | 39 +- + tools/objtool/elf.c | 284 ++++- + tools/objtool/elf.h | 21 +- + tools/objtool/objtool.c | 12 +- + tools/objtool/orc.h | 30 + + tools/objtool/orc_dump.c | 213 ++++ + tools/objtool/orc_gen.c | 221 ++++ + tools/objtool/special.c | 6 +- + tools/objtool/sync-check.sh | 29 + + tools/objtool/warn.h | 10 + + 38 files changed, 5511 insertions(+), 3138 deletions(-) + create mode 100644 arch/x86/include/asm/orc_types.h + create mode 100644 arch/x86/include/asm/unwind_hints.h + create mode 100644 tools/objtool/arch/x86/include/asm/inat.h + create mode 100644 tools/objtool/arch/x86/include/asm/inat_types.h + create mode 100644 tools/objtool/arch/x86/include/asm/insn.h + create mode 100644 tools/objtool/arch/x86/include/asm/orc_types.h + delete mode 100644 tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk + delete mode 100644 tools/objtool/arch/x86/insn/inat.c + delete mode 100644 tools/objtool/arch/x86/insn/inat.h + delete mode 100644 tools/objtool/arch/x86/insn/inat_types.h + delete mode 100644 tools/objtool/arch/x86/insn/insn.c + delete mode 100644 tools/objtool/arch/x86/insn/insn.h + delete mode 100644 tools/objtool/arch/x86/insn/x86-opcode-map.txt + create mode 100644 tools/objtool/arch/x86/lib/inat.c + create mode 100644 tools/objtool/arch/x86/lib/insn.c + create mode 100644 tools/objtool/arch/x86/lib/x86-opcode-map.txt + create mode 100644 tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk + create mode 100644 tools/objtool/builtin-orc.c + create mode 100644 tools/objtool/cfi.h + create mode 100644 tools/objtool/orc.h + create mode 100644 tools/objtool/orc_dump.c + create mode 100644 tools/objtool/orc_gen.c + create mode 100755 tools/objtool/sync-check.sh + +diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h +new file mode 100644 +index 0000000..7dc777a +--- /dev/null ++++ b/arch/x86/include/asm/orc_types.h +@@ -0,0 +1,107 @@ ++/* ++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#ifndef _ORC_TYPES_H ++#define _ORC_TYPES_H ++ ++#include <linux/types.h> ++#include <linux/compiler.h> ++ ++/* ++ * The ORC_REG_* registers are base registers which are used to find other ++ * registers on the stack. ++ * ++ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the ++ * address of the previous frame: the caller's SP before it called the current ++ * function. ++ * ++ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in ++ * the current frame. ++ * ++ * The most commonly used base registers are SP and BP -- which the previous SP ++ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is ++ * usually based on. ++ * ++ * The rest of the base registers are needed for special cases like entry code ++ * and GCC realigned stacks. ++ */ ++#define ORC_REG_UNDEFINED 0 ++#define ORC_REG_PREV_SP 1 ++#define ORC_REG_DX 2 ++#define ORC_REG_DI 3 ++#define ORC_REG_BP 4 ++#define ORC_REG_SP 5 ++#define ORC_REG_R10 6 ++#define ORC_REG_R13 7 ++#define ORC_REG_BP_INDIRECT 8 ++#define ORC_REG_SP_INDIRECT 9 ++#define ORC_REG_MAX 15 ++ ++/* ++ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the ++ * caller's SP right before it made the call). Used for all callable ++ * functions, i.e. all C code and all callable asm functions. ++ * ++ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points ++ * to a fully populated pt_regs from a syscall, interrupt, or exception. ++ * ++ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset ++ * points to the iret return frame. ++ * ++ * The UNWIND_HINT macros are used only for the unwind_hint struct. They ++ * aren't used in struct orc_entry due to size and complexity constraints. ++ * Objtool converts them to real types when it converts the hints to orc ++ * entries. ++ */ ++#define ORC_TYPE_CALL 0 ++#define ORC_TYPE_REGS 1 ++#define ORC_TYPE_REGS_IRET 2 ++#define UNWIND_HINT_TYPE_SAVE 3 ++#define UNWIND_HINT_TYPE_RESTORE 4 ++ ++#ifndef __ASSEMBLY__ ++/* ++ * This struct is more or less a vastly simplified version of the DWARF Call ++ * Frame Information standard. It contains only the necessary parts of DWARF ++ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the ++ * unwinder how to find the previous SP and BP (and sometimes entry regs) on ++ * the stack for a given code address. Each instance of the struct corresponds ++ * to one or more code locations. ++ */ ++struct orc_entry { ++ s16 sp_offset; ++ s16 bp_offset; ++ unsigned sp_reg:4; ++ unsigned bp_reg:4; ++ unsigned type:2; ++}; ++ ++/* ++ * This struct is used by asm and inline asm code to manually annotate the ++ * location of registers on the stack for the ORC unwinder. ++ * ++ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. ++ */ ++struct unwind_hint { ++ u32 ip; ++ s16 sp_offset; ++ u8 sp_reg; ++ u8 type; ++}; ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* _ORC_TYPES_H */ +diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h +new file mode 100644 +index 0000000..5e02b11 +--- /dev/null ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -0,0 +1,103 @@ ++#ifndef _ASM_X86_UNWIND_HINTS_H ++#define _ASM_X86_UNWIND_HINTS_H ++ ++#include "orc_types.h" ++ ++#ifdef __ASSEMBLY__ ++ ++/* ++ * In asm, there are two kinds of code: normal C-type callable functions and ++ * the rest. The normal callable functions can be called by other code, and ++ * don't do anything unusual with the stack. Such normal callable functions ++ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this ++ * category. In this case, no special debugging annotations are needed because ++ * objtool can automatically generate the ORC data for the ORC unwinder to read ++ * at runtime. ++ * ++ * Anything which doesn't fall into the above category, such as syscall and ++ * interrupt handlers, tends to not be called directly by other functions, and ++ * often does unusual non-C-function-type things with the stack pointer. Such ++ * code needs to be annotated such that objtool can understand it. The ++ * following CFI hint macros are for this type of code. ++ * ++ * These macros provide hints to objtool about the state of the stack at each ++ * instruction. Objtool starts from the hints and follows the code flow, ++ * making automatic CFI adjustments when it sees pushes and pops, filling out ++ * the debuginfo as necessary. It will also warn if it sees any ++ * inconsistencies. ++ */ ++.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL ++#ifdef CONFIG_STACK_VALIDATION ++.Lunwind_hint_ip_\@: ++ .pushsection .discard.unwind_hints ++ /* struct unwind_hint */ ++ .long .Lunwind_hint_ip_\@ - . ++ .short \sp_offset ++ .byte \sp_reg ++ .byte \type ++ .popsection ++#endif ++.endm ++ ++.macro UNWIND_HINT_EMPTY ++ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED ++.endm ++ ++.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 ++ .if \base == %rsp && \indirect ++ .set sp_reg, ORC_REG_SP_INDIRECT ++ .elseif \base == %rsp ++ .set sp_reg, ORC_REG_SP ++ .elseif \base == %rbp ++ .set sp_reg, ORC_REG_BP ++ .elseif \base == %rdi ++ .set sp_reg, ORC_REG_DI ++ .elseif \base == %rdx ++ .set sp_reg, ORC_REG_DX ++ .elseif \base == %r10 ++ .set sp_reg, ORC_REG_R10 ++ .else ++ .error "UNWIND_HINT_REGS: bad base register" ++ .endif ++ ++ .set sp_offset, \offset ++ ++ .if \iret ++ .set type, ORC_TYPE_REGS_IRET ++ .elseif \extra == 0 ++ .set type, ORC_TYPE_REGS_IRET ++ .set sp_offset, \offset + (16*8) ++ .else ++ .set type, ORC_TYPE_REGS ++ .endif ++ ++ UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type ++.endm ++ ++.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 ++ UNWIND_HINT_REGS base=\base offset=\offset iret=1 ++.endm ++ ++.macro UNWIND_HINT_FUNC sp_offset=8 ++ UNWIND_HINT sp_offset=\sp_offset ++.endm ++ ++#else /* !__ASSEMBLY__ */ ++ ++#define UNWIND_HINT(sp_reg, sp_offset, type) \ ++ "987: \n\t" \ ++ ".pushsection .discard.unwind_hints\n\t" \ ++ /* struct unwind_hint */ \ ++ ".long 987b - .\n\t" \ ++ ".short " __stringify(sp_offset) "\n\t" \ ++ ".byte " __stringify(sp_reg) "\n\t" \ ++ ".byte " __stringify(type) "\n\t" \ ++ ".popsection\n\t" ++ ++#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE) ++ ++#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE) ++ ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* _ASM_X86_UNWIND_HINTS_H */ +diff --git a/tools/objtool/Build b/tools/objtool/Build +index 6f2e198..749becd 100644 +--- a/tools/objtool/Build ++++ b/tools/objtool/Build +@@ -1,6 +1,9 @@ + objtool-y += arch/$(SRCARCH)/ + objtool-y += builtin-check.o ++objtool-y += builtin-orc.o + objtool-y += check.o ++objtool-y += orc_gen.o ++objtool-y += orc_dump.o + objtool-y += elf.o + objtool-y += special.o + objtool-y += objtool.o +diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt +index 55a60d3..3995735 100644 +--- a/tools/objtool/Documentation/stack-validation.txt ++++ b/tools/objtool/Documentation/stack-validation.txt +@@ -11,9 +11,6 @@ analyzes every .o file and ensures the validity of its stack metadata. + It enforces a set of rules on asm code and C inline assembly code so + that stack traces can be reliable. + +-Currently it only checks frame pointer usage, but there are plans to add +-CFI validation for C files and CFI generation for asm files. +- + For each function, it recursively follows all possible code paths and + validates the correct frame pointer state at each instruction. + +@@ -23,6 +20,10 @@ alternative execution paths to a given instruction (or set of + instructions). Similarly, it knows how to follow switch statements, for + which gcc sometimes uses jump tables. + ++(Objtool also has an 'orc generate' subcommand which generates debuginfo ++for the ORC unwinder. See Documentation/x86/orc-unwinder.txt in the ++kernel tree for more details.) ++ + + Why do we need stack metadata validation? + ----------------------------------------- +@@ -93,62 +94,24 @@ a) More reliable stack traces for frame pointer enabled kernels + or at the very end of the function after the stack frame has been + destroyed. This is an inherent limitation of frame pointers. + +-b) 100% reliable stack traces for DWARF enabled kernels +- +- (NOTE: This is not yet implemented) +- +- As an alternative to frame pointers, DWARF Call Frame Information +- (CFI) metadata can be used to walk the stack. Unlike frame pointers, +- CFI metadata is out of band. So it doesn't affect runtime +- performance and it can be reliable even when interrupts or exceptions +- are involved. +- +- For C code, gcc automatically generates DWARF CFI metadata. But for +- asm code, generating CFI is a tedious manual approach which requires +- manually placed .cfi assembler macros to be scattered throughout the +- code. It's clumsy and very easy to get wrong, and it makes the real +- code harder to read. +- +- Stacktool will improve this situation in several ways. For code +- which already has CFI annotations, it will validate them. For code +- which doesn't have CFI annotations, it will generate them. So an +- architecture can opt to strip out all the manual .cfi annotations +- from their asm code and have objtool generate them instead. ++b) ORC (Oops Rewind Capability) unwind table generation + +- We might also add a runtime stack validation debug option where we +- periodically walk the stack from schedule() and/or an NMI to ensure +- that the stack metadata is sane and that we reach the bottom of the +- stack. ++ An alternative to frame pointers and DWARF, ORC unwind data can be ++ used to walk the stack. Unlike frame pointers, ORC data is out of ++ band. So it doesn't affect runtime performance and it can be ++ reliable even when interrupts or exceptions are involved. + +- So the benefit of objtool here will be that external tooling should +- always show perfect stack traces. And the same will be true for +- kernel warning/oops traces if the architecture has a runtime DWARF +- unwinder. ++ For more details, see Documentation/x86/orc-unwinder.txt. + + c) Higher live patching compatibility rate + +- (NOTE: This is not yet implemented) +- +- Currently with CONFIG_LIVEPATCH there's a basic live patching +- framework which is safe for roughly 85-90% of "security" fixes. But +- patches can't have complex features like function dependency or +- prototype changes, or data structure changes. +- +- There's a strong need to support patches which have the more complex +- features so that the patch compatibility rate for security fixes can +- eventually approach something resembling 100%. To achieve that, a +- "consistency model" is needed, which allows tasks to be safely +- transitioned from an unpatched state to a patched state. +- +- One of the key requirements of the currently proposed livepatch +- consistency model [*] is that it needs to walk the stack of each +- sleeping task to determine if it can be transitioned to the patched +- state. If objtool can ensure that stack traces are reliable, this +- consistency model can be used and the live patching compatibility +- rate can be improved significantly. +- +- [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com ++ Livepatch has an optional "consistency model", which is needed for ++ more complex patches. In order for the consistency model to work, ++ stack traces need to be reliable (or an unreliable condition needs to ++ be detectable). Objtool makes that possible. + ++ For more details, see the livepatch documentation in the Linux kernel ++ source tree at Documentation/livepatch/livepatch.txt. + + Rules + ----- +@@ -201,80 +164,84 @@ To achieve the validation, objtool enforces the following rules: + return normally. + + +-Errors in .S files +------------------- ++Objtool warnings ++---------------- ++ ++For asm files, if you're getting an error which doesn't make sense, ++first make sure that the affected code follows the above rules. + +-If you're getting an error in a compiled .S file which you don't +-understand, first make sure that the affected code follows the above +-rules. ++For C files, the common culprits are inline asm statements and calls to ++"noreturn" functions. See below for more details. ++ ++Another possible cause for errors in C code is if the Makefile removes ++-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. + + Here are some examples of common warnings reported by objtool, what + they mean, and suggestions for how to fix them. + + +-1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup ++1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup + + The func() function made a function call without first saving and/or +- updating the frame pointer. +- +- If func() is indeed a callable function, add proper frame pointer +- logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, remove +- its ELF function annotation by changing ENDPROC to END. +- +- If you're getting this error in a .c file, see the "Errors in .c +- files" section. ++ updating the frame pointer, and CONFIG_FRAME_POINTER is enabled. + ++ If the error is for an asm file, and func() is indeed a callable ++ function, add proper frame pointer logic using the FRAME_BEGIN and ++ FRAME_END macros. Otherwise, if it's not a callable function, remove ++ its ELF function annotation by changing ENDPROC to END, and instead ++ use the manual unwind hint macros in asm/unwind_hints.h. + +-2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function ++ If it's a GCC-compiled .c file, the error may be because the function ++ uses an inline asm() statement which has a "call" instruction. An ++ asm() statement with a call instruction must declare the use of the ++ stack pointer in its output operand. On x86_64, this means adding ++ the ASM_CALL_CONSTRAINT as an output constraint: + +- A return instruction was detected, but objtool couldn't find a way +- for a callable function to reach the instruction. ++ asm volatile("call func" : ASM_CALL_CONSTRAINT); + +- If the return instruction is inside (or reachable from) a callable +- function, the function needs to be annotated with the ENTRY/ENDPROC +- macros. ++ Otherwise the stack frame may not get created before the call. + +- If you _really_ need a return instruction outside of a function, and +- are 100% sure that it won't affect stack traces, you can tell +- objtool to ignore it. See the "Adding exceptions" section below. + ++2. file.o: warning: objtool: .text+0x53: unreachable instruction + +-3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction ++ Objtool couldn't find a code path to reach the instruction. + +- The instruction lives inside of a callable function, but there's no +- possible control flow path from the beginning of the function to the +- instruction. ++ If the error is for an asm file, and the instruction is inside (or ++ reachable from) a callable function, the function should be annotated ++ with the ENTRY/ENDPROC macros (ENDPROC is the important one). ++ Otherwise, the code should probably be annotated with the unwind hint ++ macros in asm/unwind_hints.h so objtool and the unwinder can know the ++ stack state associated with the code. + +- If the instruction is actually needed, and it's actually in a +- callable function, ensure that its function is properly annotated +- with ENTRY/ENDPROC. ++ If you're 100% sure the code won't affect stack traces, or if you're ++ a just a bad person, you can tell objtool to ignore it. See the ++ "Adding exceptions" section below. + + If it's not actually in a callable function (e.g. kernel entry code), + change ENDPROC to END. + + +-4. asm_file.o: warning: objtool: func(): can't find starting instruction ++4. file.o: warning: objtool: func(): can't find starting instruction + or +- asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction ++ file.o: warning: objtool: func()+0x11dd: can't decode instruction + +- Did you put data in a text section? If so, that can confuse ++ Does the file have data in a text section? If so, that can confuse + objtool's instruction decoder. Move the data to a more appropriate + section like .data or .rodata. + + +-5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction +- +- This is a kernel entry/exit instruction like sysenter or sysret. +- Such instructions aren't allowed in a callable function, and are most +- likely part of the kernel entry code. ++5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function + +- If the instruction isn't actually in a callable function, change +- ENDPROC to END. ++ This is a kernel entry/exit instruction like sysenter or iret. Such ++ instructions aren't allowed in a callable function, and are most ++ likely part of the kernel entry code. They should usually not have ++ the callable function annotation (ENDPROC) and should always be ++ annotated with the unwind hint macros in asm/unwind_hints.h. + + +-6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer ++6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame + +- This is a dynamic jump or a jump to an undefined symbol. Stacktool ++ This is a dynamic jump or a jump to an undefined symbol. Objtool + assumed it's a sibling call and detected that the frame pointer + wasn't first restored to its original state. + +@@ -282,24 +249,28 @@ they mean, and suggestions for how to fix them. + destination code to the local file. + + If the instruction is not actually in a callable function (e.g. +- kernel entry code), change ENDPROC to END. ++ kernel entry code), change ENDPROC to END and annotate manually with ++ the unwind hint macros in asm/unwind_hints.h. + + +-7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch ++7. file: warning: objtool: func()+0x5c: stack state mismatch + + The instruction's frame pointer state is inconsistent, depending on + which execution path was taken to reach the instruction. + +- Make sure the function pushes and sets up the frame pointer (for +- x86_64, this means rbp) at the beginning of the function and pops it +- at the end of the function. Also make sure that no other code in the +- function touches the frame pointer. ++ Make sure that, when CONFIG_FRAME_POINTER is enabled, the function ++ pushes and sets up the frame pointer (for x86_64, this means rbp) at ++ the beginning of the function and pops it at the end of the function. ++ Also make sure that no other code in the function touches the frame ++ pointer. + ++ Another possibility is that the code has some asm or inline asm which ++ does some unusual things to the stack or the frame pointer. In such ++ cases it's probably appropriate to use the unwind hint macros in ++ asm/unwind_hints.h. + +-Errors in .c files +------------------- + +-1. c_file.o: warning: objtool: funcA() falls through to next function funcB() ++8. file.o: warning: objtool: funcA() falls through to next function funcB() + + This means that funcA() doesn't end with a return instruction or an + unconditional jump, and that objtool has determined that the function +@@ -318,22 +289,6 @@ Errors in .c files + might be corrupt due to a gcc bug. For more details, see: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646 + +-2. If you're getting any other objtool error in a compiled .c file, it +- may be because the file uses an asm() statement which has a "call" +- instruction. An asm() statement with a call instruction must declare +- the use of the stack pointer in its output operand. For example, on +- x86_64: +- +- register void *__sp asm("rsp"); +- asm volatile("call func" : "+r" (__sp)); +- +- Otherwise the stack frame may not get created before the call. +- +-3. Another possible cause for errors in C code is if the Makefile removes +- -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. +- +-Also see the above section for .S file errors for more information what +-the individual error messages mean. + + If the error doesn't seem to make sense, it could be a bug in objtool. + Feel free to ask the objtool maintainer for help. +diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile +index 041b493..e6acc28 100644 +--- a/tools/objtool/Makefile ++++ b/tools/objtool/Makefile +@@ -1,3 +1,4 @@ ++# SPDX-License-Identifier: GPL-2.0 + include ../scripts/Makefile.include + include ../scripts/Makefile.arch + +@@ -6,17 +7,19 @@ ARCH := x86 + endif + + # always use the host compiler +-CC = gcc +-LD = ld +-AR = ar ++HOSTCC ?= gcc ++HOSTLD ?= ld ++CC = $(HOSTCC) ++LD = $(HOSTLD) ++AR = ar + + ifeq ($(srctree),) +-srctree := $(patsubst %/,%,$(dir $(shell pwd))) ++srctree := $(patsubst %/,%,$(dir $(CURDIR))) + srctree := $(patsubst %/,%,$(dir $(srctree))) + endif + + SUBCMD_SRCDIR = $(srctree)/tools/lib/subcmd/ +-LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(PWD)/) ++LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(CURDIR)/) + LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a + + OBJTOOL := $(OUTPUT)objtool +@@ -24,8 +27,11 @@ OBJTOOL_IN := $(OBJTOOL)-in.o + + all: $(OBJTOOL) + +-INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi +-CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) ++INCLUDES := -I$(srctree)/tools/include \ ++ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ ++ -I$(srctree)/tools/objtool/arch/$(ARCH)/include ++WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed ++CFLAGS += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) + LDFLAGS += -lelf $(LIBSUBCMD) + + # Allow old libelf to be used: +@@ -39,19 +45,8 @@ include $(srctree)/tools/build/Makefile.include + $(OBJTOOL_IN): fixdep FORCE + @$(MAKE) $(build)=objtool + +-# Busybox's diff doesn't have -I, avoid warning in that case +-# + $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) +- @(diff -I 2>&1 | grep -q 'option requires an argument' && \ +- test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \ +- diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ +- diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ +- diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ +- diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ +- diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ +- diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ +- diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ +- || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true ++ @$(CONFIG_SHELL) ./sync-check.sh + $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ + + +@@ -61,7 +56,7 @@ $(LIBSUBCMD): fixdep FORCE + clean: + $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) + $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete +- $(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep ++ $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep + + FORCE: + +diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h +index a59e061..b0d7dc3 100644 +--- a/tools/objtool/arch.h ++++ b/tools/objtool/arch.h +@@ -19,25 +19,64 @@ + #define _ARCH_H + + #include <stdbool.h> ++#include <linux/list.h> + #include "elf.h" ++#include "cfi.h" + +-#define INSN_FP_SAVE 1 +-#define INSN_FP_SETUP 2 +-#define INSN_FP_RESTORE 3 +-#define INSN_JUMP_CONDITIONAL 4 +-#define INSN_JUMP_UNCONDITIONAL 5 +-#define INSN_JUMP_DYNAMIC 6 +-#define INSN_CALL 7 +-#define INSN_CALL_DYNAMIC 8 +-#define INSN_RETURN 9 +-#define INSN_CONTEXT_SWITCH 10 +-#define INSN_NOP 11 +-#define INSN_OTHER 12 ++#define INSN_JUMP_CONDITIONAL 1 ++#define INSN_JUMP_UNCONDITIONAL 2 ++#define INSN_JUMP_DYNAMIC 3 ++#define INSN_CALL 4 ++#define INSN_CALL_DYNAMIC 5 ++#define INSN_RETURN 6 ++#define INSN_CONTEXT_SWITCH 7 ++#define INSN_STACK 8 ++#define INSN_BUG 9 ++#define INSN_NOP 10 ++#define INSN_OTHER 11 + #define INSN_LAST INSN_OTHER + ++enum op_dest_type { ++ OP_DEST_REG, ++ OP_DEST_REG_INDIRECT, ++ OP_DEST_MEM, ++ OP_DEST_PUSH, ++ OP_DEST_LEAVE, ++}; ++ ++struct op_dest { ++ enum op_dest_type type; ++ unsigned char reg; ++ int offset; ++}; ++ ++enum op_src_type { ++ OP_SRC_REG, ++ OP_SRC_REG_INDIRECT, ++ OP_SRC_CONST, ++ OP_SRC_POP, ++ OP_SRC_ADD, ++ OP_SRC_AND, ++}; ++ ++struct op_src { ++ enum op_src_type type; ++ unsigned char reg; ++ int offset; ++}; ++ ++struct stack_op { ++ struct op_dest dest; ++ struct op_src src; ++}; ++ ++void arch_initial_func_cfi_state(struct cfi_state *state); ++ + int arch_decode_instruction(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, unsigned char *type, +- unsigned long *displacement); ++ unsigned long *immediate, struct stack_op *op); ++ ++bool arch_callee_saved_reg(unsigned char reg); + + #endif /* _ARCH_H */ +diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build +index debbdb0..b998412 100644 +--- a/tools/objtool/arch/x86/Build ++++ b/tools/objtool/arch/x86/Build +@@ -1,12 +1,12 @@ + objtool-y += decode.o + +-inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk +-inat_tables_maps = arch/x86/insn/x86-opcode-map.txt ++inat_tables_script = arch/x86/tools/gen-insn-attr-x86.awk ++inat_tables_maps = arch/x86/lib/x86-opcode-map.txt + +-$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) ++$(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +-$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c ++$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c + +-CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn ++CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib +diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c +index 9fb487f..006b6d7 100644 +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -19,14 +19,25 @@ + #include <stdlib.h> + + #define unlikely(cond) (cond) +-#include "insn/insn.h" +-#include "insn/inat.c" +-#include "insn/insn.c" ++#include <asm/insn.h> ++#include "lib/inat.c" ++#include "lib/insn.c" + + #include "../../elf.h" + #include "../../arch.h" + #include "../../warn.h" + ++static unsigned char op_to_cfi_reg[][2] = { ++ {CFI_AX, CFI_R8}, ++ {CFI_CX, CFI_R9}, ++ {CFI_DX, CFI_R10}, ++ {CFI_BX, CFI_R11}, ++ {CFI_SP, CFI_R12}, ++ {CFI_BP, CFI_R13}, ++ {CFI_SI, CFI_R14}, ++ {CFI_DI, CFI_R15}, ++}; ++ + static int is_x86_64(struct elf *elf) + { + switch (elf->ehdr.e_machine) { +@@ -40,24 +51,50 @@ static int is_x86_64(struct elf *elf) + } + } + ++bool arch_callee_saved_reg(unsigned char reg) ++{ ++ switch (reg) { ++ case CFI_BP: ++ case CFI_BX: ++ case CFI_R12: ++ case CFI_R13: ++ case CFI_R14: ++ case CFI_R15: ++ return true; ++ ++ case CFI_AX: ++ case CFI_CX: ++ case CFI_DX: ++ case CFI_SI: ++ case CFI_DI: ++ case CFI_SP: ++ case CFI_R8: ++ case CFI_R9: ++ case CFI_R10: ++ case CFI_R11: ++ case CFI_RA: ++ default: ++ return false; ++ } ++} ++ + int arch_decode_instruction(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, unsigned char *type, +- unsigned long *immediate) ++ unsigned long *immediate, struct stack_op *op) + { + struct insn insn; +- int x86_64; +- unsigned char op1, op2, ext; ++ int x86_64, sign; ++ unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, ++ rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, ++ modrm_reg = 0, sib = 0; + + x86_64 = is_x86_64(elf); + if (x86_64 == -1) + return -1; + +- insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64); ++ insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64); + insn_get_length(&insn); +- insn_get_opcode(&insn); +- insn_get_modrm(&insn); +- insn_get_immediate(&insn); + + if (!insn_complete(&insn)) { + WARN_FUNC("can't decode instruction", sec, offset); +@@ -73,67 +110,317 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, + op1 = insn.opcode.bytes[0]; + op2 = insn.opcode.bytes[1]; + ++ if (insn.rex_prefix.nbytes) { ++ rex = insn.rex_prefix.bytes[0]; ++ rex_w = X86_REX_W(rex) >> 3; ++ rex_r = X86_REX_R(rex) >> 2; ++ rex_x = X86_REX_X(rex) >> 1; ++ rex_b = X86_REX_B(rex); ++ } ++ ++ if (insn.modrm.nbytes) { ++ modrm = insn.modrm.bytes[0]; ++ modrm_mod = X86_MODRM_MOD(modrm); ++ modrm_reg = X86_MODRM_REG(modrm); ++ modrm_rm = X86_MODRM_RM(modrm); ++ } ++ ++ if (insn.sib.nbytes) ++ sib = insn.sib.bytes[0]; ++ + switch (op1) { +- case 0x55: +- if (!insn.rex_prefix.nbytes) +- /* push rbp */ +- *type = INSN_FP_SAVE; ++ ++ case 0x1: ++ case 0x29: ++ if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { ++ ++ /* add/sub reg, %rsp */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_ADD; ++ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = CFI_SP; ++ } ++ break; ++ ++ case 0x50 ... 0x57: ++ ++ /* push reg */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_REG; ++ op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; ++ op->dest.type = OP_DEST_PUSH; ++ ++ break; ++ ++ case 0x58 ... 0x5f: ++ ++ /* pop reg */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_POP; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; ++ + break; + +- case 0x5d: +- if (!insn.rex_prefix.nbytes) +- /* pop rbp */ +- *type = INSN_FP_RESTORE; ++ case 0x68: ++ case 0x6a: ++ /* push immediate */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_CONST; ++ op->dest.type = OP_DEST_PUSH; + break; + + case 0x70 ... 0x7f: + *type = INSN_JUMP_CONDITIONAL; + break; + ++ case 0x81: ++ case 0x83: ++ if (rex != 0x48) ++ break; ++ ++ if (modrm == 0xe4) { ++ /* and imm, %rsp */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_AND; ++ op->src.reg = CFI_SP; ++ op->src.offset = insn.immediate.value; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = CFI_SP; ++ break; ++ } ++ ++ if (modrm == 0xc4) ++ sign = 1; ++ else if (modrm == 0xec) ++ sign = -1; ++ else ++ break; ++ ++ /* add/sub imm, %rsp */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_ADD; ++ op->src.reg = CFI_SP; ++ op->src.offset = insn.immediate.value * sign; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = CFI_SP; ++ break; ++ + case 0x89: +- if (insn.rex_prefix.nbytes == 1 && +- insn.rex_prefix.bytes[0] == 0x48 && +- insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5) +- /* mov rsp, rbp */ +- *type = INSN_FP_SETUP; ++ if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) { ++ ++ /* mov %rsp, reg */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_REG; ++ op->src.reg = CFI_SP; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; ++ break; ++ } ++ ++ if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { ++ ++ /* mov reg, %rsp */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_REG; ++ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = CFI_SP; ++ break; ++ } ++ ++ /* fallthrough */ ++ case 0x88: ++ if (!rex_b && ++ (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) { ++ ++ /* mov reg, disp(%rbp) */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_REG; ++ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; ++ op->dest.type = OP_DEST_REG_INDIRECT; ++ op->dest.reg = CFI_BP; ++ op->dest.offset = insn.displacement.value; ++ ++ } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { ++ ++ /* mov reg, disp(%rsp) */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_REG; ++ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; ++ op->dest.type = OP_DEST_REG_INDIRECT; ++ op->dest.reg = CFI_SP; ++ op->dest.offset = insn.displacement.value; ++ } ++ ++ break; ++ ++ case 0x8b: ++ if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) { ++ ++ /* mov disp(%rbp), reg */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_REG_INDIRECT; ++ op->src.reg = CFI_BP; ++ op->src.offset = insn.displacement.value; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; ++ ++ } else if (rex_w && !rex_b && sib == 0x24 && ++ modrm_mod != 3 && modrm_rm == 4) { ++ ++ /* mov disp(%rsp), reg */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_REG_INDIRECT; ++ op->src.reg = CFI_SP; ++ op->src.offset = insn.displacement.value; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; ++ } ++ + break; + + case 0x8d: +- if (insn.rex_prefix.nbytes && +- insn.rex_prefix.bytes[0] == 0x48 && +- insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c && +- insn.sib.nbytes && insn.sib.bytes[0] == 0x24) +- /* lea %(rsp), %rbp */ +- *type = INSN_FP_SETUP; ++ if (sib == 0x24 && rex_w && !rex_b && !rex_x) { ++ ++ *type = INSN_STACK; ++ if (!insn.displacement.value) { ++ /* lea (%rsp), reg */ ++ op->src.type = OP_SRC_REG; ++ } else { ++ /* lea disp(%rsp), reg */ ++ op->src.type = OP_SRC_ADD; ++ op->src.offset = insn.displacement.value; ++ } ++ op->src.reg = CFI_SP; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; ++ ++ } else if (rex == 0x48 && modrm == 0x65) { ++ ++ /* lea disp(%rbp), %rsp */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_ADD; ++ op->src.reg = CFI_BP; ++ op->src.offset = insn.displacement.value; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = CFI_SP; ++ ++ } else if (rex == 0x49 && modrm == 0x62 && ++ insn.displacement.value == -8) { ++ ++ /* ++ * lea -0x8(%r10), %rsp ++ * ++ * Restoring rsp back to its original value after a ++ * stack realignment. ++ */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_ADD; ++ op->src.reg = CFI_R10; ++ op->src.offset = -8; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = CFI_SP; ++ ++ } else if (rex == 0x49 && modrm == 0x65 && ++ insn.displacement.value == -16) { ++ ++ /* ++ * lea -0x10(%r13), %rsp ++ * ++ * Restoring rsp back to its original value after a ++ * stack realignment. ++ */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_ADD; ++ op->src.reg = CFI_R13; ++ op->src.offset = -16; ++ op->dest.type = OP_DEST_REG; ++ op->dest.reg = CFI_SP; ++ } ++ ++ break; ++ ++ case 0x8f: ++ /* pop to mem */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_POP; ++ op->dest.type = OP_DEST_MEM; + break; + + case 0x90: + *type = INSN_NOP; + break; + ++ case 0x9c: ++ /* pushf */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_CONST; ++ op->dest.type = OP_DEST_PUSH; ++ break; ++ ++ case 0x9d: ++ /* popf */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_POP; ++ op->dest.type = OP_DEST_MEM; ++ break; ++ + case 0x0f: +- if (op2 >= 0x80 && op2 <= 0x8f) ++ ++ if (op2 >= 0x80 && op2 <= 0x8f) { ++ + *type = INSN_JUMP_CONDITIONAL; +- else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || +- op2 == 0x35) ++ ++ } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || ++ op2 == 0x35) { ++ + /* sysenter, sysret */ + *type = INSN_CONTEXT_SWITCH; +- else if (op2 == 0x0d || op2 == 0x1f) ++ ++ } else if (op2 == 0x0b || op2 == 0xb9) { ++ ++ /* ud2 */ ++ *type = INSN_BUG; ++ ++ } else if (op2 == 0x0d || op2 == 0x1f) { ++ + /* nopl/nopw */ + *type = INSN_NOP; +- else if (op2 == 0x01 && insn.modrm.nbytes && +- (insn.modrm.bytes[0] == 0xc2 || +- insn.modrm.bytes[0] == 0xd8)) +- /* vmlaunch, vmrun */ +- *type = INSN_CONTEXT_SWITCH; ++ ++ } else if (op2 == 0xa0 || op2 == 0xa8) { ++ ++ /* push fs/gs */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_CONST; ++ op->dest.type = OP_DEST_PUSH; ++ ++ } else if (op2 == 0xa1 || op2 == 0xa9) { ++ ++ /* pop fs/gs */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_POP; ++ op->dest.type = OP_DEST_MEM; ++ } + + break; + +- case 0xc9: /* leave */ +- *type = INSN_FP_RESTORE; ++ case 0xc9: ++ /* ++ * leave ++ * ++ * equivalent to: ++ * mov bp, sp ++ * pop bp ++ */ ++ *type = INSN_STACK; ++ op->dest.type = OP_DEST_LEAVE; ++ + break; + +- case 0xe3: /* jecxz/jrcxz */ ++ case 0xe3: ++ /* jecxz/jrcxz */ + *type = INSN_JUMP_CONDITIONAL; + break; + +@@ -158,14 +445,27 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, + break; + + case 0xff: +- ext = X86_MODRM_REG(insn.modrm.bytes[0]); +- if (ext == 2 || ext == 3) ++ if (modrm_reg == 2 || modrm_reg == 3) ++ + *type = INSN_CALL_DYNAMIC; +- else if (ext == 4) ++ ++ else if (modrm_reg == 4) ++ + *type = INSN_JUMP_DYNAMIC; +- else if (ext == 5) /*jmpf */ ++ ++ else if (modrm_reg == 5) ++ ++ /* jmpf */ + *type = INSN_CONTEXT_SWITCH; + ++ else if (modrm_reg == 6) { ++ ++ /* push from mem */ ++ *type = INSN_STACK; ++ op->src.type = OP_SRC_CONST; ++ op->dest.type = OP_DEST_PUSH; ++ } ++ + break; + + default: +@@ -176,3 +476,21 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, + + return 0; + } ++ ++void arch_initial_func_cfi_state(struct cfi_state *state) ++{ ++ int i; ++ ++ for (i = 0; i < CFI_NUM_REGS; i++) { ++ state->regs[i].base = CFI_UNDEFINED; ++ state->regs[i].offset = 0; ++ } ++ ++ /* initial CFA (call frame address) */ ++ state->cfa.base = CFI_SP; ++ state->cfa.offset = 8; ++ ++ /* initial RA (return address) */ ++ state->regs[16].base = CFI_CFA; ++ state->regs[16].offset = -8; ++} +diff --git a/tools/objtool/arch/x86/include/asm/inat.h b/tools/objtool/arch/x86/include/asm/inat.h +new file mode 100644 +index 0000000..1c78580 +--- /dev/null ++++ b/tools/objtool/arch/x86/include/asm/inat.h +@@ -0,0 +1,244 @@ ++#ifndef _ASM_X86_INAT_H ++#define _ASM_X86_INAT_H ++/* ++ * x86 instruction attributes ++ * ++ * Written by Masami Hiramatsu <mhiramat@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ */ ++#include <asm/inat_types.h> ++ ++/* ++ * Internal bits. Don't use bitmasks directly, because these bits are ++ * unstable. You should use checking functions. ++ */ ++ ++#define INAT_OPCODE_TABLE_SIZE 256 ++#define INAT_GROUP_TABLE_SIZE 8 ++ ++/* Legacy last prefixes */ ++#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ ++#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ ++#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ ++/* Other Legacy prefixes */ ++#define INAT_PFX_LOCK 4 /* 0xF0 */ ++#define INAT_PFX_CS 5 /* 0x2E */ ++#define INAT_PFX_DS 6 /* 0x3E */ ++#define INAT_PFX_ES 7 /* 0x26 */ ++#define INAT_PFX_FS 8 /* 0x64 */ ++#define INAT_PFX_GS 9 /* 0x65 */ ++#define INAT_PFX_SS 10 /* 0x36 */ ++#define INAT_PFX_ADDRSZ 11 /* 0x67 */ ++/* x86-64 REX prefix */ ++#define INAT_PFX_REX 12 /* 0x4X */ ++/* AVX VEX prefixes */ ++#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ ++#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ ++#define INAT_PFX_EVEX 15 /* EVEX prefix */ ++ ++#define INAT_LSTPFX_MAX 3 ++#define INAT_LGCPFX_MAX 11 ++ ++/* Immediate size */ ++#define INAT_IMM_BYTE 1 ++#define INAT_IMM_WORD 2 ++#define INAT_IMM_DWORD 3 ++#define INAT_IMM_QWORD 4 ++#define INAT_IMM_PTR 5 ++#define INAT_IMM_VWORD32 6 ++#define INAT_IMM_VWORD 7 ++ ++/* Legacy prefix */ ++#define INAT_PFX_OFFS 0 ++#define INAT_PFX_BITS 4 ++#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) ++#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) ++/* Escape opcodes */ ++#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) ++#define INAT_ESC_BITS 2 ++#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) ++#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) ++/* Group opcodes (1-16) */ ++#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) ++#define INAT_GRP_BITS 5 ++#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) ++#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) ++/* Immediates */ ++#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) ++#define INAT_IMM_BITS 3 ++#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) ++/* Flags */ ++#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) ++#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) ++#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) ++#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) ++#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) ++#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) ++#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) ++#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) ++#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) ++/* Attribute making macros for attribute tables */ ++#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) ++#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) ++#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) ++#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) ++ ++/* Identifiers for segment registers */ ++#define INAT_SEG_REG_IGNORE 0 ++#define INAT_SEG_REG_DEFAULT 1 ++#define INAT_SEG_REG_CS 2 ++#define INAT_SEG_REG_SS 3 ++#define INAT_SEG_REG_DS 4 ++#define INAT_SEG_REG_ES 5 ++#define INAT_SEG_REG_FS 6 ++#define INAT_SEG_REG_GS 7 ++ ++/* Attribute search APIs */ ++extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); ++extern int inat_get_last_prefix_id(insn_byte_t last_pfx); ++extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, ++ int lpfx_id, ++ insn_attr_t esc_attr); ++extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, ++ int lpfx_id, ++ insn_attr_t esc_attr); ++extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, ++ insn_byte_t vex_m, ++ insn_byte_t vex_pp); ++ ++/* Attribute checking functions */ ++static inline int inat_is_legacy_prefix(insn_attr_t attr) ++{ ++ attr &= INAT_PFX_MASK; ++ return attr && attr <= INAT_LGCPFX_MAX; ++} ++ ++static inline int inat_is_address_size_prefix(insn_attr_t attr) ++{ ++ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; ++} ++ ++static inline int inat_is_operand_size_prefix(insn_attr_t attr) ++{ ++ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; ++} ++ ++static inline int inat_is_rex_prefix(insn_attr_t attr) ++{ ++ return (attr & INAT_PFX_MASK) == INAT_PFX_REX; ++} ++ ++static inline int inat_last_prefix_id(insn_attr_t attr) ++{ ++ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) ++ return 0; ++ else ++ return attr & INAT_PFX_MASK; ++} ++ ++static inline int inat_is_vex_prefix(insn_attr_t attr) ++{ ++ attr &= INAT_PFX_MASK; ++ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || ++ attr == INAT_PFX_EVEX; ++} ++ ++static inline int inat_is_evex_prefix(insn_attr_t attr) ++{ ++ return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; ++} ++ ++static inline int inat_is_vex3_prefix(insn_attr_t attr) ++{ ++ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; ++} ++ ++static inline int inat_is_escape(insn_attr_t attr) ++{ ++ return attr & INAT_ESC_MASK; ++} ++ ++static inline int inat_escape_id(insn_attr_t attr) ++{ ++ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; ++} ++ ++static inline int inat_is_group(insn_attr_t attr) ++{ ++ return attr & INAT_GRP_MASK; ++} ++ ++static inline int inat_group_id(insn_attr_t attr) ++{ ++ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; ++} ++ ++static inline int inat_group_common_attribute(insn_attr_t attr) ++{ ++ return attr & ~INAT_GRP_MASK; ++} ++ ++static inline int inat_has_immediate(insn_attr_t attr) ++{ ++ return attr & INAT_IMM_MASK; ++} ++ ++static inline int inat_immediate_size(insn_attr_t attr) ++{ ++ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; ++} ++ ++static inline int inat_has_modrm(insn_attr_t attr) ++{ ++ return attr & INAT_MODRM; ++} ++ ++static inline int inat_is_force64(insn_attr_t attr) ++{ ++ return attr & INAT_FORCE64; ++} ++ ++static inline int inat_has_second_immediate(insn_attr_t attr) ++{ ++ return attr & INAT_SCNDIMM; ++} ++ ++static inline int inat_has_moffset(insn_attr_t attr) ++{ ++ return attr & INAT_MOFFSET; ++} ++ ++static inline int inat_has_variant(insn_attr_t attr) ++{ ++ return attr & INAT_VARIANT; ++} ++ ++static inline int inat_accept_vex(insn_attr_t attr) ++{ ++ return attr & INAT_VEXOK; ++} ++ ++static inline int inat_must_vex(insn_attr_t attr) ++{ ++ return attr & (INAT_VEXONLY | INAT_EVEXONLY); ++} ++ ++static inline int inat_must_evex(insn_attr_t attr) ++{ ++ return attr & INAT_EVEXONLY; ++} ++#endif +diff --git a/tools/objtool/arch/x86/include/asm/inat_types.h b/tools/objtool/arch/x86/include/asm/inat_types.h +new file mode 100644 +index 0000000..cb3c20c +--- /dev/null ++++ b/tools/objtool/arch/x86/include/asm/inat_types.h +@@ -0,0 +1,29 @@ ++#ifndef _ASM_X86_INAT_TYPES_H ++#define _ASM_X86_INAT_TYPES_H ++/* ++ * x86 instruction attributes ++ * ++ * Written by Masami Hiramatsu <mhiramat@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ */ ++ ++/* Instruction attributes */ ++typedef unsigned int insn_attr_t; ++typedef unsigned char insn_byte_t; ++typedef signed int insn_value_t; ++ ++#endif +diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h +new file mode 100644 +index 0000000..b3e32b0 +--- /dev/null ++++ b/tools/objtool/arch/x86/include/asm/insn.h +@@ -0,0 +1,211 @@ ++#ifndef _ASM_X86_INSN_H ++#define _ASM_X86_INSN_H ++/* ++ * x86 instruction analysis ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Copyright (C) IBM Corporation, 2009 ++ */ ++ ++/* insn_attr_t is defined in inat.h */ ++#include <asm/inat.h> ++ ++struct insn_field { ++ union { ++ insn_value_t value; ++ insn_byte_t bytes[4]; ++ }; ++ /* !0 if we've run insn_get_xxx() for this field */ ++ unsigned char got; ++ unsigned char nbytes; ++}; ++ ++struct insn { ++ struct insn_field prefixes; /* ++ * Prefixes ++ * prefixes.bytes[3]: last prefix ++ */ ++ struct insn_field rex_prefix; /* REX prefix */ ++ struct insn_field vex_prefix; /* VEX prefix */ ++ struct insn_field opcode; /* ++ * opcode.bytes[0]: opcode1 ++ * opcode.bytes[1]: opcode2 ++ * opcode.bytes[2]: opcode3 ++ */ ++ struct insn_field modrm; ++ struct insn_field sib; ++ struct insn_field displacement; ++ union { ++ struct insn_field immediate; ++ struct insn_field moffset1; /* for 64bit MOV */ ++ struct insn_field immediate1; /* for 64bit imm or off16/32 */ ++ }; ++ union { ++ struct insn_field moffset2; /* for 64bit MOV */ ++ struct insn_field immediate2; /* for 64bit imm or seg16 */ ++ }; ++ ++ insn_attr_t attr; ++ unsigned char opnd_bytes; ++ unsigned char addr_bytes; ++ unsigned char length; ++ unsigned char x86_64; ++ ++ const insn_byte_t *kaddr; /* kernel address of insn to analyze */ ++ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ ++ const insn_byte_t *next_byte; ++}; ++ ++#define MAX_INSN_SIZE 15 ++ ++#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) ++#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) ++#define X86_MODRM_RM(modrm) ((modrm) & 0x07) ++ ++#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) ++#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) ++#define X86_SIB_BASE(sib) ((sib) & 0x07) ++ ++#define X86_REX_W(rex) ((rex) & 8) ++#define X86_REX_R(rex) ((rex) & 4) ++#define X86_REX_X(rex) ((rex) & 2) ++#define X86_REX_B(rex) ((rex) & 1) ++ ++/* VEX bit flags */ ++#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ ++#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ ++#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ ++#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ ++#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ ++/* VEX bit fields */ ++#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ ++#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ ++#define X86_VEX2_M 1 /* VEX2.M always 1 */ ++#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ ++#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ ++#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ ++ ++extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); ++extern void insn_get_prefixes(struct insn *insn); ++extern void insn_get_opcode(struct insn *insn); ++extern void insn_get_modrm(struct insn *insn); ++extern void insn_get_sib(struct insn *insn); ++extern void insn_get_displacement(struct insn *insn); ++extern void insn_get_immediate(struct insn *insn); ++extern void insn_get_length(struct insn *insn); ++ ++/* Attribute will be determined after getting ModRM (for opcode groups) */ ++static inline void insn_get_attribute(struct insn *insn) ++{ ++ insn_get_modrm(insn); ++} ++ ++/* Instruction uses RIP-relative addressing */ ++extern int insn_rip_relative(struct insn *insn); ++ ++/* Init insn for kernel text */ ++static inline void kernel_insn_init(struct insn *insn, ++ const void *kaddr, int buf_len) ++{ ++#ifdef CONFIG_X86_64 ++ insn_init(insn, kaddr, buf_len, 1); ++#else /* CONFIG_X86_32 */ ++ insn_init(insn, kaddr, buf_len, 0); ++#endif ++} ++ ++static inline int insn_is_avx(struct insn *insn) ++{ ++ if (!insn->prefixes.got) ++ insn_get_prefixes(insn); ++ return (insn->vex_prefix.value != 0); ++} ++ ++static inline int insn_is_evex(struct insn *insn) ++{ ++ if (!insn->prefixes.got) ++ insn_get_prefixes(insn); ++ return (insn->vex_prefix.nbytes == 4); ++} ++ ++/* Ensure this instruction is decoded completely */ ++static inline int insn_complete(struct insn *insn) ++{ ++ return insn->opcode.got && insn->modrm.got && insn->sib.got && ++ insn->displacement.got && insn->immediate.got; ++} ++ ++static inline insn_byte_t insn_vex_m_bits(struct insn *insn) ++{ ++ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ ++ return X86_VEX2_M; ++ else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ ++ return X86_VEX3_M(insn->vex_prefix.bytes[1]); ++ else /* EVEX */ ++ return X86_EVEX_M(insn->vex_prefix.bytes[1]); ++} ++ ++static inline insn_byte_t insn_vex_p_bits(struct insn *insn) ++{ ++ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ ++ return X86_VEX_P(insn->vex_prefix.bytes[1]); ++ else ++ return X86_VEX_P(insn->vex_prefix.bytes[2]); ++} ++ ++/* Get the last prefix id from last prefix or VEX prefix */ ++static inline int insn_last_prefix_id(struct insn *insn) ++{ ++ if (insn_is_avx(insn)) ++ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ ++ ++ if (insn->prefixes.bytes[3]) ++ return inat_get_last_prefix_id(insn->prefixes.bytes[3]); ++ ++ return 0; ++} ++ ++/* Offset of each field from kaddr */ ++static inline int insn_offset_rex_prefix(struct insn *insn) ++{ ++ return insn->prefixes.nbytes; ++} ++static inline int insn_offset_vex_prefix(struct insn *insn) ++{ ++ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; ++} ++static inline int insn_offset_opcode(struct insn *insn) ++{ ++ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; ++} ++static inline int insn_offset_modrm(struct insn *insn) ++{ ++ return insn_offset_opcode(insn) + insn->opcode.nbytes; ++} ++static inline int insn_offset_sib(struct insn *insn) ++{ ++ return insn_offset_modrm(insn) + insn->modrm.nbytes; ++} ++static inline int insn_offset_displacement(struct insn *insn) ++{ ++ return insn_offset_sib(insn) + insn->sib.nbytes; ++} ++static inline int insn_offset_immediate(struct insn *insn) ++{ ++ return insn_offset_displacement(insn) + insn->displacement.nbytes; ++} ++ ++#endif /* _ASM_X86_INSN_H */ +diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h +new file mode 100644 +index 0000000..9c9dc57 +--- /dev/null ++++ b/tools/objtool/arch/x86/include/asm/orc_types.h +@@ -0,0 +1,107 @@ ++/* ++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#ifndef _ORC_TYPES_H ++#define _ORC_TYPES_H ++ ++#include <linux/types.h> ++#include <linux/compiler.h> ++ ++/* ++ * The ORC_REG_* registers are base registers which are used to find other ++ * registers on the stack. ++ * ++ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the ++ * address of the previous frame: the caller's SP before it called the current ++ * function. ++ * ++ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in ++ * the current frame. ++ * ++ * The most commonly used base registers are SP and BP -- which the previous SP ++ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is ++ * usually based on. ++ * ++ * The rest of the base registers are needed for special cases like entry code ++ * and GCC realigned stacks. ++ */ ++#define ORC_REG_UNDEFINED 0 ++#define ORC_REG_PREV_SP 1 ++#define ORC_REG_DX 2 ++#define ORC_REG_DI 3 ++#define ORC_REG_BP 4 ++#define ORC_REG_SP 5 ++#define ORC_REG_R10 6 ++#define ORC_REG_R13 7 ++#define ORC_REG_BP_INDIRECT 8 ++#define ORC_REG_SP_INDIRECT 9 ++#define ORC_REG_MAX 15 ++ ++/* ++ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the ++ * caller's SP right before it made the call). Used for all callable ++ * functions, i.e. all C code and all callable asm functions. ++ * ++ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points ++ * to a fully populated pt_regs from a syscall, interrupt, or exception. ++ * ++ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset ++ * points to the iret return frame. ++ * ++ * The UNWIND_HINT macros are used only for the unwind_hint struct. They ++ * aren't used in struct orc_entry due to size and complexity constraints. ++ * Objtool converts them to real types when it converts the hints to orc ++ * entries. ++ */ ++#define ORC_TYPE_CALL 0 ++#define ORC_TYPE_REGS 1 ++#define ORC_TYPE_REGS_IRET 2 ++#define UNWIND_HINT_TYPE_SAVE 3 ++#define UNWIND_HINT_TYPE_RESTORE 4 ++ ++#ifndef __ASSEMBLY__ ++/* ++ * This struct is more or less a vastly simplified version of the DWARF Call ++ * Frame Information standard. It contains only the necessary parts of DWARF ++ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the ++ * unwinder how to find the previous SP and BP (and sometimes entry regs) on ++ * the stack for a given code address. Each instance of the struct corresponds ++ * to one or more code locations. ++ */ ++struct orc_entry { ++ s16 sp_offset; ++ s16 bp_offset; ++ unsigned sp_reg:4; ++ unsigned bp_reg:4; ++ unsigned type:2; ++} __packed; ++ ++/* ++ * This struct is used by asm and inline asm code to manually annotate the ++ * location of registers on the stack for the ORC unwinder. ++ * ++ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. ++ */ ++struct unwind_hint { ++ u32 ip; ++ s16 sp_offset; ++ u8 sp_reg; ++ u8 type; ++}; ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* _ORC_TYPES_H */ +diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk +deleted file mode 100644 +index a3d2c62..0000000 +--- a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk ++++ /dev/null +@@ -1,392 +0,0 @@ +-#!/bin/awk -f +-# gen-insn-attr-x86.awk: Instruction attribute table generator +-# Written by Masami Hiramatsu <mhiramat@redhat.com> +-# +-# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c +- +-# Awk implementation sanity check +-function check_awk_implement() { +- if (sprintf("%x", 0) != "0") +- return "Your awk has a printf-format problem." +- return "" +-} +- +-# Clear working vars +-function clear_vars() { +- delete table +- delete lptable2 +- delete lptable1 +- delete lptable3 +- eid = -1 # escape id +- gid = -1 # group id +- aid = -1 # AVX id +- tname = "" +-} +- +-BEGIN { +- # Implementation error checking +- awkchecked = check_awk_implement() +- if (awkchecked != "") { +- print "Error: " awkchecked > "/dev/stderr" +- print "Please try to use gawk." > "/dev/stderr" +- exit 1 +- } +- +- # Setup generating tables +- print "/* x86 opcode map generated from x86-opcode-map.txt */" +- print "/* Do not change this code. */\n" +- ggid = 1 +- geid = 1 +- gaid = 0 +- delete etable +- delete gtable +- delete atable +- +- opnd_expr = "^[A-Za-z/]" +- ext_expr = "^\\(" +- sep_expr = "^\\|$" +- group_expr = "^Grp[0-9A-Za-z]+" +- +- imm_expr = "^[IJAOL][a-z]" +- imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" +- imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" +- imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" +- imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" +- imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" +- imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" +- imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" +- imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" +- imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" +- imm_flag["Ob"] = "INAT_MOFFSET" +- imm_flag["Ov"] = "INAT_MOFFSET" +- imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" +- +- modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" +- force64_expr = "\\([df]64\\)" +- rex_expr = "^REX(\\.[XRWB]+)*" +- fpu_expr = "^ESC" # TODO +- +- lprefix1_expr = "\\((66|!F3)\\)" +- lprefix2_expr = "\\(F3\\)" +- lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" +- lprefix_expr = "\\((66|F2|F3)\\)" +- max_lprefix = 4 +- +- # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript +- # accepts VEX prefix +- vexok_opcode_expr = "^[vk].*" +- vexok_expr = "\\(v1\\)" +- # All opcodes with (v) superscript supports *only* VEX prefix +- vexonly_expr = "\\(v\\)" +- # All opcodes with (ev) superscript supports *only* EVEX prefix +- evexonly_expr = "\\(ev\\)" +- +- prefix_expr = "\\(Prefix\\)" +- prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" +- prefix_num["REPNE"] = "INAT_PFX_REPNE" +- prefix_num["REP/REPE"] = "INAT_PFX_REPE" +- prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" +- prefix_num["XRELEASE"] = "INAT_PFX_REPE" +- prefix_num["LOCK"] = "INAT_PFX_LOCK" +- prefix_num["SEG=CS"] = "INAT_PFX_CS" +- prefix_num["SEG=DS"] = "INAT_PFX_DS" +- prefix_num["SEG=ES"] = "INAT_PFX_ES" +- prefix_num["SEG=FS"] = "INAT_PFX_FS" +- prefix_num["SEG=GS"] = "INAT_PFX_GS" +- prefix_num["SEG=SS"] = "INAT_PFX_SS" +- prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" +- prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" +- prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" +- prefix_num["EVEX"] = "INAT_PFX_EVEX" +- +- clear_vars() +-} +- +-function semantic_error(msg) { +- print "Semantic error at " NR ": " msg > "/dev/stderr" +- exit 1 +-} +- +-function debug(msg) { +- print "DEBUG: " msg +-} +- +-function array_size(arr, i,c) { +- c = 0 +- for (i in arr) +- c++ +- return c +-} +- +-/^Table:/ { +- print "/* " $0 " */" +- if (tname != "") +- semantic_error("Hit Table: before EndTable:."); +-} +- +-/^Referrer:/ { +- if (NF != 1) { +- # escape opcode table +- ref = "" +- for (i = 2; i <= NF; i++) +- ref = ref $i +- eid = escape[ref] +- tname = sprintf("inat_escape_table_%d", eid) +- } +-} +- +-/^AVXcode:/ { +- if (NF != 1) { +- # AVX/escape opcode table +- aid = $2 +- if (gaid <= aid) +- gaid = aid + 1 +- if (tname == "") # AVX only opcode table +- tname = sprintf("inat_avx_table_%d", $2) +- } +- if (aid == -1 && eid == -1) # primary opcode table +- tname = "inat_primary_table" +-} +- +-/^GrpTable:/ { +- print "/* " $0 " */" +- if (!($2 in group)) +- semantic_error("No group: " $2 ) +- gid = group[$2] +- tname = "inat_group_table_" gid +-} +- +-function print_table(tbl,name,fmt,n) +-{ +- print "const insn_attr_t " name " = {" +- for (i = 0; i < n; i++) { +- id = sprintf(fmt, i) +- if (tbl[id]) +- print " [" id "] = " tbl[id] "," +- } +- print "};" +-} +- +-/^EndTable/ { +- if (gid != -1) { +- # print group tables +- if (array_size(table) != 0) { +- print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", +- "0x%x", 8) +- gtable[gid,0] = tname +- } +- if (array_size(lptable1) != 0) { +- print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", +- "0x%x", 8) +- gtable[gid,1] = tname "_1" +- } +- if (array_size(lptable2) != 0) { +- print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", +- "0x%x", 8) +- gtable[gid,2] = tname "_2" +- } +- if (array_size(lptable3) != 0) { +- print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", +- "0x%x", 8) +- gtable[gid,3] = tname "_3" +- } +- } else { +- # print primary/escaped tables +- if (array_size(table) != 0) { +- print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", +- "0x%02x", 256) +- etable[eid,0] = tname +- if (aid >= 0) +- atable[aid,0] = tname +- } +- if (array_size(lptable1) != 0) { +- print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", +- "0x%02x", 256) +- etable[eid,1] = tname "_1" +- if (aid >= 0) +- atable[aid,1] = tname "_1" +- } +- if (array_size(lptable2) != 0) { +- print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", +- "0x%02x", 256) +- etable[eid,2] = tname "_2" +- if (aid >= 0) +- atable[aid,2] = tname "_2" +- } +- if (array_size(lptable3) != 0) { +- print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", +- "0x%02x", 256) +- etable[eid,3] = tname "_3" +- if (aid >= 0) +- atable[aid,3] = tname "_3" +- } +- } +- print "" +- clear_vars() +-} +- +-function add_flags(old,new) { +- if (old && new) +- return old " | " new +- else if (old) +- return old +- else +- return new +-} +- +-# convert operands to flags. +-function convert_operands(count,opnd, i,j,imm,mod) +-{ +- imm = null +- mod = null +- for (j = 1; j <= count; j++) { +- i = opnd[j] +- if (match(i, imm_expr) == 1) { +- if (!imm_flag[i]) +- semantic_error("Unknown imm opnd: " i) +- if (imm) { +- if (i != "Ib") +- semantic_error("Second IMM error") +- imm = add_flags(imm, "INAT_SCNDIMM") +- } else +- imm = imm_flag[i] +- } else if (match(i, modrm_expr)) +- mod = "INAT_MODRM" +- } +- return add_flags(imm, mod) +-} +- +-/^[0-9a-f]+\:/ { +- if (NR == 1) +- next +- # get index +- idx = "0x" substr($1, 1, index($1,":") - 1) +- if (idx in table) +- semantic_error("Redefine " idx " in " tname) +- +- # check if escaped opcode +- if ("escape" == $2) { +- if ($3 != "#") +- semantic_error("No escaped name") +- ref = "" +- for (i = 4; i <= NF; i++) +- ref = ref $i +- if (ref in escape) +- semantic_error("Redefine escape (" ref ")") +- escape[ref] = geid +- geid++ +- table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" +- next +- } +- +- variant = null +- # converts +- i = 2 +- while (i <= NF) { +- opcode = $(i++) +- delete opnds +- ext = null +- flags = null +- opnd = null +- # parse one opcode +- if (match($i, opnd_expr)) { +- opnd = $i +- count = split($(i++), opnds, ",") +- flags = convert_operands(count, opnds) +- } +- if (match($i, ext_expr)) +- ext = $(i++) +- if (match($i, sep_expr)) +- i++ +- else if (i < NF) +- semantic_error($i " is not a separator") +- +- # check if group opcode +- if (match(opcode, group_expr)) { +- if (!(opcode in group)) { +- group[opcode] = ggid +- ggid++ +- } +- flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") +- } +- # check force(or default) 64bit +- if (match(ext, force64_expr)) +- flags = add_flags(flags, "INAT_FORCE64") +- +- # check REX prefix +- if (match(opcode, rex_expr)) +- flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") +- +- # check coprocessor escape : TODO +- if (match(opcode, fpu_expr)) +- flags = add_flags(flags, "INAT_MODRM") +- +- # check VEX codes +- if (match(ext, evexonly_expr)) +- flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") +- else if (match(ext, vexonly_expr)) +- flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") +- else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) +- flags = add_flags(flags, "INAT_VEXOK") +- +- # check prefixes +- if (match(ext, prefix_expr)) { +- if (!prefix_num[opcode]) +- semantic_error("Unknown prefix: " opcode) +- flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") +- } +- if (length(flags) == 0) +- continue +- # check if last prefix +- if (match(ext, lprefix1_expr)) { +- lptable1[idx] = add_flags(lptable1[idx],flags) +- variant = "INAT_VARIANT" +- } +- if (match(ext, lprefix2_expr)) { +- lptable2[idx] = add_flags(lptable2[idx],flags) +- variant = "INAT_VARIANT" +- } +- if (match(ext, lprefix3_expr)) { +- lptable3[idx] = add_flags(lptable3[idx],flags) +- variant = "INAT_VARIANT" +- } +- if (!match(ext, lprefix_expr)){ +- table[idx] = add_flags(table[idx],flags) +- } +- } +- if (variant) +- table[idx] = add_flags(table[idx],variant) +-} +- +-END { +- if (awkchecked != "") +- exit 1 +- # print escape opcode map's array +- print "/* Escape opcode map array */" +- print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ +- "[INAT_LSTPFX_MAX + 1] = {" +- for (i = 0; i < geid; i++) +- for (j = 0; j < max_lprefix; j++) +- if (etable[i,j]) +- print " ["i"]["j"] = "etable[i,j]"," +- print "};\n" +- # print group opcode map's array +- print "/* Group opcode map array */" +- print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ +- "[INAT_LSTPFX_MAX + 1] = {" +- for (i = 0; i < ggid; i++) +- for (j = 0; j < max_lprefix; j++) +- if (gtable[i,j]) +- print " ["i"]["j"] = "gtable[i,j]"," +- print "};\n" +- # print AVX opcode map's array +- print "/* AVX opcode map array */" +- print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ +- "[INAT_LSTPFX_MAX + 1] = {" +- for (i = 0; i < gaid; i++) +- for (j = 0; j < max_lprefix; j++) +- if (atable[i,j]) +- print " ["i"]["j"] = "atable[i,j]"," +- print "};" +-} +- +diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/insn/inat.c +deleted file mode 100644 +index e4bf28e..0000000 +--- a/tools/objtool/arch/x86/insn/inat.c ++++ /dev/null +@@ -1,97 +0,0 @@ +-/* +- * x86 instruction attribute tables +- * +- * Written by Masami Hiramatsu <mhiramat@redhat.com> +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +- * +- */ +-#include "insn.h" +- +-/* Attribute tables are generated from opcode map */ +-#include "inat-tables.c" +- +-/* Attribute search APIs */ +-insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +-{ +- return inat_primary_table[opcode]; +-} +- +-int inat_get_last_prefix_id(insn_byte_t last_pfx) +-{ +- insn_attr_t lpfx_attr; +- +- lpfx_attr = inat_get_opcode_attribute(last_pfx); +- return inat_last_prefix_id(lpfx_attr); +-} +- +-insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, +- insn_attr_t esc_attr) +-{ +- const insn_attr_t *table; +- int n; +- +- n = inat_escape_id(esc_attr); +- +- table = inat_escape_tables[n][0]; +- if (!table) +- return 0; +- if (inat_has_variant(table[opcode]) && lpfx_id) { +- table = inat_escape_tables[n][lpfx_id]; +- if (!table) +- return 0; +- } +- return table[opcode]; +-} +- +-insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, +- insn_attr_t grp_attr) +-{ +- const insn_attr_t *table; +- int n; +- +- n = inat_group_id(grp_attr); +- +- table = inat_group_tables[n][0]; +- if (!table) +- return inat_group_common_attribute(grp_attr); +- if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { +- table = inat_group_tables[n][lpfx_id]; +- if (!table) +- return inat_group_common_attribute(grp_attr); +- } +- return table[X86_MODRM_REG(modrm)] | +- inat_group_common_attribute(grp_attr); +-} +- +-insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, +- insn_byte_t vex_p) +-{ +- const insn_attr_t *table; +- if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) +- return 0; +- /* At first, this checks the master table */ +- table = inat_avx_tables[vex_m][0]; +- if (!table) +- return 0; +- if (!inat_is_group(table[opcode]) && vex_p) { +- /* If this is not a group, get attribute directly */ +- table = inat_avx_tables[vex_m][vex_p]; +- if (!table) +- return 0; +- } +- return table[opcode]; +-} +- +diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h +deleted file mode 100644 +index 125ecd2..0000000 +--- a/tools/objtool/arch/x86/insn/inat.h ++++ /dev/null +@@ -1,234 +0,0 @@ +-#ifndef _ASM_X86_INAT_H +-#define _ASM_X86_INAT_H +-/* +- * x86 instruction attributes +- * +- * Written by Masami Hiramatsu <mhiramat@redhat.com> +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +- * +- */ +-#include "inat_types.h" +- +-/* +- * Internal bits. Don't use bitmasks directly, because these bits are +- * unstable. You should use checking functions. +- */ +- +-#define INAT_OPCODE_TABLE_SIZE 256 +-#define INAT_GROUP_TABLE_SIZE 8 +- +-/* Legacy last prefixes */ +-#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +-#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +-#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +-/* Other Legacy prefixes */ +-#define INAT_PFX_LOCK 4 /* 0xF0 */ +-#define INAT_PFX_CS 5 /* 0x2E */ +-#define INAT_PFX_DS 6 /* 0x3E */ +-#define INAT_PFX_ES 7 /* 0x26 */ +-#define INAT_PFX_FS 8 /* 0x64 */ +-#define INAT_PFX_GS 9 /* 0x65 */ +-#define INAT_PFX_SS 10 /* 0x36 */ +-#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +-/* x86-64 REX prefix */ +-#define INAT_PFX_REX 12 /* 0x4X */ +-/* AVX VEX prefixes */ +-#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +-#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ +-#define INAT_PFX_EVEX 15 /* EVEX prefix */ +- +-#define INAT_LSTPFX_MAX 3 +-#define INAT_LGCPFX_MAX 11 +- +-/* Immediate size */ +-#define INAT_IMM_BYTE 1 +-#define INAT_IMM_WORD 2 +-#define INAT_IMM_DWORD 3 +-#define INAT_IMM_QWORD 4 +-#define INAT_IMM_PTR 5 +-#define INAT_IMM_VWORD32 6 +-#define INAT_IMM_VWORD 7 +- +-/* Legacy prefix */ +-#define INAT_PFX_OFFS 0 +-#define INAT_PFX_BITS 4 +-#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +-#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +-/* Escape opcodes */ +-#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +-#define INAT_ESC_BITS 2 +-#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +-#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +-/* Group opcodes (1-16) */ +-#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +-#define INAT_GRP_BITS 5 +-#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +-#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +-/* Immediates */ +-#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +-#define INAT_IMM_BITS 3 +-#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +-/* Flags */ +-#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +-#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +-#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +-#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +-#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +-#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +-#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +-#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +-#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) +-/* Attribute making macros for attribute tables */ +-#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +-#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +-#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +-#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) +- +-/* Attribute search APIs */ +-extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +-extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +-extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, +- int lpfx_id, +- insn_attr_t esc_attr); +-extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, +- int lpfx_id, +- insn_attr_t esc_attr); +-extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, +- insn_byte_t vex_m, +- insn_byte_t vex_pp); +- +-/* Attribute checking functions */ +-static inline int inat_is_legacy_prefix(insn_attr_t attr) +-{ +- attr &= INAT_PFX_MASK; +- return attr && attr <= INAT_LGCPFX_MAX; +-} +- +-static inline int inat_is_address_size_prefix(insn_attr_t attr) +-{ +- return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +-} +- +-static inline int inat_is_operand_size_prefix(insn_attr_t attr) +-{ +- return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +-} +- +-static inline int inat_is_rex_prefix(insn_attr_t attr) +-{ +- return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +-} +- +-static inline int inat_last_prefix_id(insn_attr_t attr) +-{ +- if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) +- return 0; +- else +- return attr & INAT_PFX_MASK; +-} +- +-static inline int inat_is_vex_prefix(insn_attr_t attr) +-{ +- attr &= INAT_PFX_MASK; +- return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || +- attr == INAT_PFX_EVEX; +-} +- +-static inline int inat_is_evex_prefix(insn_attr_t attr) +-{ +- return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; +-} +- +-static inline int inat_is_vex3_prefix(insn_attr_t attr) +-{ +- return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +-} +- +-static inline int inat_is_escape(insn_attr_t attr) +-{ +- return attr & INAT_ESC_MASK; +-} +- +-static inline int inat_escape_id(insn_attr_t attr) +-{ +- return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +-} +- +-static inline int inat_is_group(insn_attr_t attr) +-{ +- return attr & INAT_GRP_MASK; +-} +- +-static inline int inat_group_id(insn_attr_t attr) +-{ +- return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +-} +- +-static inline int inat_group_common_attribute(insn_attr_t attr) +-{ +- return attr & ~INAT_GRP_MASK; +-} +- +-static inline int inat_has_immediate(insn_attr_t attr) +-{ +- return attr & INAT_IMM_MASK; +-} +- +-static inline int inat_immediate_size(insn_attr_t attr) +-{ +- return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +-} +- +-static inline int inat_has_modrm(insn_attr_t attr) +-{ +- return attr & INAT_MODRM; +-} +- +-static inline int inat_is_force64(insn_attr_t attr) +-{ +- return attr & INAT_FORCE64; +-} +- +-static inline int inat_has_second_immediate(insn_attr_t attr) +-{ +- return attr & INAT_SCNDIMM; +-} +- +-static inline int inat_has_moffset(insn_attr_t attr) +-{ +- return attr & INAT_MOFFSET; +-} +- +-static inline int inat_has_variant(insn_attr_t attr) +-{ +- return attr & INAT_VARIANT; +-} +- +-static inline int inat_accept_vex(insn_attr_t attr) +-{ +- return attr & INAT_VEXOK; +-} +- +-static inline int inat_must_vex(insn_attr_t attr) +-{ +- return attr & (INAT_VEXONLY | INAT_EVEXONLY); +-} +- +-static inline int inat_must_evex(insn_attr_t attr) +-{ +- return attr & INAT_EVEXONLY; +-} +-#endif +diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/insn/inat_types.h +deleted file mode 100644 +index cb3c20c..0000000 +--- a/tools/objtool/arch/x86/insn/inat_types.h ++++ /dev/null +@@ -1,29 +0,0 @@ +-#ifndef _ASM_X86_INAT_TYPES_H +-#define _ASM_X86_INAT_TYPES_H +-/* +- * x86 instruction attributes +- * +- * Written by Masami Hiramatsu <mhiramat@redhat.com> +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +- * +- */ +- +-/* Instruction attributes */ +-typedef unsigned int insn_attr_t; +-typedef unsigned char insn_byte_t; +-typedef signed int insn_value_t; +- +-#endif +diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/insn/insn.c +deleted file mode 100644 +index ca983e2..0000000 +--- a/tools/objtool/arch/x86/insn/insn.c ++++ /dev/null +@@ -1,606 +0,0 @@ +-/* +- * x86 instruction analysis +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +- * +- * Copyright (C) IBM Corporation, 2002, 2004, 2009 +- */ +- +-#ifdef __KERNEL__ +-#include <linux/string.h> +-#else +-#include <string.h> +-#endif +-#include "inat.h" +-#include "insn.h" +- +-/* Verify next sizeof(t) bytes can be on the same instruction */ +-#define validate_next(t, insn, n) \ +- ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) +- +-#define __get_next(t, insn) \ +- ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) +- +-#define __peek_nbyte_next(t, insn, n) \ +- ({ t r = *(t*)((insn)->next_byte + n); r; }) +- +-#define get_next(t, insn) \ +- ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) +- +-#define peek_nbyte_next(t, insn, n) \ +- ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) +- +-#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) +- +-/** +- * insn_init() - initialize struct insn +- * @insn: &struct insn to be initialized +- * @kaddr: address (in kernel memory) of instruction (or copy thereof) +- * @x86_64: !0 for 64-bit kernel or 64-bit app +- */ +-void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) +-{ +- /* +- * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid +- * even if the input buffer is long enough to hold them. +- */ +- if (buf_len > MAX_INSN_SIZE) +- buf_len = MAX_INSN_SIZE; +- +- memset(insn, 0, sizeof(*insn)); +- insn->kaddr = kaddr; +- insn->end_kaddr = kaddr + buf_len; +- insn->next_byte = kaddr; +- insn->x86_64 = x86_64 ? 1 : 0; +- insn->opnd_bytes = 4; +- if (x86_64) +- insn->addr_bytes = 8; +- else +- insn->addr_bytes = 4; +-} +- +-/** +- * insn_get_prefixes - scan x86 instruction prefix bytes +- * @insn: &struct insn containing instruction +- * +- * Populates the @insn->prefixes bitmap, and updates @insn->next_byte +- * to point to the (first) opcode. No effect if @insn->prefixes.got +- * is already set. +- */ +-void insn_get_prefixes(struct insn *insn) +-{ +- struct insn_field *prefixes = &insn->prefixes; +- insn_attr_t attr; +- insn_byte_t b, lb; +- int i, nb; +- +- if (prefixes->got) +- return; +- +- nb = 0; +- lb = 0; +- b = peek_next(insn_byte_t, insn); +- attr = inat_get_opcode_attribute(b); +- while (inat_is_legacy_prefix(attr)) { +- /* Skip if same prefix */ +- for (i = 0; i < nb; i++) +- if (prefixes->bytes[i] == b) +- goto found; +- if (nb == 4) +- /* Invalid instruction */ +- break; +- prefixes->bytes[nb++] = b; +- if (inat_is_address_size_prefix(attr)) { +- /* address size switches 2/4 or 4/8 */ +- if (insn->x86_64) +- insn->addr_bytes ^= 12; +- else +- insn->addr_bytes ^= 6; +- } else if (inat_is_operand_size_prefix(attr)) { +- /* oprand size switches 2/4 */ +- insn->opnd_bytes ^= 6; +- } +-found: +- prefixes->nbytes++; +- insn->next_byte++; +- lb = b; +- b = peek_next(insn_byte_t, insn); +- attr = inat_get_opcode_attribute(b); +- } +- /* Set the last prefix */ +- if (lb && lb != insn->prefixes.bytes[3]) { +- if (unlikely(insn->prefixes.bytes[3])) { +- /* Swap the last prefix */ +- b = insn->prefixes.bytes[3]; +- for (i = 0; i < nb; i++) +- if (prefixes->bytes[i] == lb) +- prefixes->bytes[i] = b; +- } +- insn->prefixes.bytes[3] = lb; +- } +- +- /* Decode REX prefix */ +- if (insn->x86_64) { +- b = peek_next(insn_byte_t, insn); +- attr = inat_get_opcode_attribute(b); +- if (inat_is_rex_prefix(attr)) { +- insn->rex_prefix.value = b; +- insn->rex_prefix.nbytes = 1; +- insn->next_byte++; +- if (X86_REX_W(b)) +- /* REX.W overrides opnd_size */ +- insn->opnd_bytes = 8; +- } +- } +- insn->rex_prefix.got = 1; +- +- /* Decode VEX prefix */ +- b = peek_next(insn_byte_t, insn); +- attr = inat_get_opcode_attribute(b); +- if (inat_is_vex_prefix(attr)) { +- insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); +- if (!insn->x86_64) { +- /* +- * In 32-bits mode, if the [7:6] bits (mod bits of +- * ModRM) on the second byte are not 11b, it is +- * LDS or LES or BOUND. +- */ +- if (X86_MODRM_MOD(b2) != 3) +- goto vex_end; +- } +- insn->vex_prefix.bytes[0] = b; +- insn->vex_prefix.bytes[1] = b2; +- if (inat_is_evex_prefix(attr)) { +- b2 = peek_nbyte_next(insn_byte_t, insn, 2); +- insn->vex_prefix.bytes[2] = b2; +- b2 = peek_nbyte_next(insn_byte_t, insn, 3); +- insn->vex_prefix.bytes[3] = b2; +- insn->vex_prefix.nbytes = 4; +- insn->next_byte += 4; +- if (insn->x86_64 && X86_VEX_W(b2)) +- /* VEX.W overrides opnd_size */ +- insn->opnd_bytes = 8; +- } else if (inat_is_vex3_prefix(attr)) { +- b2 = peek_nbyte_next(insn_byte_t, insn, 2); +- insn->vex_prefix.bytes[2] = b2; +- insn->vex_prefix.nbytes = 3; +- insn->next_byte += 3; +- if (insn->x86_64 && X86_VEX_W(b2)) +- /* VEX.W overrides opnd_size */ +- insn->opnd_bytes = 8; +- } else { +- /* +- * For VEX2, fake VEX3-like byte#2. +- * Makes it easier to decode vex.W, vex.vvvv, +- * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. +- */ +- insn->vex_prefix.bytes[2] = b2 & 0x7f; +- insn->vex_prefix.nbytes = 2; +- insn->next_byte += 2; +- } +- } +-vex_end: +- insn->vex_prefix.got = 1; +- +- prefixes->got = 1; +- +-err_out: +- return; +-} +- +-/** +- * insn_get_opcode - collect opcode(s) +- * @insn: &struct insn containing instruction +- * +- * Populates @insn->opcode, updates @insn->next_byte to point past the +- * opcode byte(s), and set @insn->attr (except for groups). +- * If necessary, first collects any preceding (prefix) bytes. +- * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got +- * is already 1. +- */ +-void insn_get_opcode(struct insn *insn) +-{ +- struct insn_field *opcode = &insn->opcode; +- insn_byte_t op; +- int pfx_id; +- if (opcode->got) +- return; +- if (!insn->prefixes.got) +- insn_get_prefixes(insn); +- +- /* Get first opcode */ +- op = get_next(insn_byte_t, insn); +- opcode->bytes[0] = op; +- opcode->nbytes = 1; +- +- /* Check if there is VEX prefix or not */ +- if (insn_is_avx(insn)) { +- insn_byte_t m, p; +- m = insn_vex_m_bits(insn); +- p = insn_vex_p_bits(insn); +- insn->attr = inat_get_avx_attribute(op, m, p); +- if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || +- (!inat_accept_vex(insn->attr) && +- !inat_is_group(insn->attr))) +- insn->attr = 0; /* This instruction is bad */ +- goto end; /* VEX has only 1 byte for opcode */ +- } +- +- insn->attr = inat_get_opcode_attribute(op); +- while (inat_is_escape(insn->attr)) { +- /* Get escaped opcode */ +- op = get_next(insn_byte_t, insn); +- opcode->bytes[opcode->nbytes++] = op; +- pfx_id = insn_last_prefix_id(insn); +- insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); +- } +- if (inat_must_vex(insn->attr)) +- insn->attr = 0; /* This instruction is bad */ +-end: +- opcode->got = 1; +- +-err_out: +- return; +-} +- +-/** +- * insn_get_modrm - collect ModRM byte, if any +- * @insn: &struct insn containing instruction +- * +- * Populates @insn->modrm and updates @insn->next_byte to point past the +- * ModRM byte, if any. If necessary, first collects the preceding bytes +- * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. +- */ +-void insn_get_modrm(struct insn *insn) +-{ +- struct insn_field *modrm = &insn->modrm; +- insn_byte_t pfx_id, mod; +- if (modrm->got) +- return; +- if (!insn->opcode.got) +- insn_get_opcode(insn); +- +- if (inat_has_modrm(insn->attr)) { +- mod = get_next(insn_byte_t, insn); +- modrm->value = mod; +- modrm->nbytes = 1; +- if (inat_is_group(insn->attr)) { +- pfx_id = insn_last_prefix_id(insn); +- insn->attr = inat_get_group_attribute(mod, pfx_id, +- insn->attr); +- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) +- insn->attr = 0; /* This is bad */ +- } +- } +- +- if (insn->x86_64 && inat_is_force64(insn->attr)) +- insn->opnd_bytes = 8; +- modrm->got = 1; +- +-err_out: +- return; +-} +- +- +-/** +- * insn_rip_relative() - Does instruction use RIP-relative addressing mode? +- * @insn: &struct insn containing instruction +- * +- * If necessary, first collects the instruction up to and including the +- * ModRM byte. No effect if @insn->x86_64 is 0. +- */ +-int insn_rip_relative(struct insn *insn) +-{ +- struct insn_field *modrm = &insn->modrm; +- +- if (!insn->x86_64) +- return 0; +- if (!modrm->got) +- insn_get_modrm(insn); +- /* +- * For rip-relative instructions, the mod field (top 2 bits) +- * is zero and the r/m field (bottom 3 bits) is 0x5. +- */ +- return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +-} +- +-/** +- * insn_get_sib() - Get the SIB byte of instruction +- * @insn: &struct insn containing instruction +- * +- * If necessary, first collects the instruction up to and including the +- * ModRM byte. +- */ +-void insn_get_sib(struct insn *insn) +-{ +- insn_byte_t modrm; +- +- if (insn->sib.got) +- return; +- if (!insn->modrm.got) +- insn_get_modrm(insn); +- if (insn->modrm.nbytes) { +- modrm = (insn_byte_t)insn->modrm.value; +- if (insn->addr_bytes != 2 && +- X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { +- insn->sib.value = get_next(insn_byte_t, insn); +- insn->sib.nbytes = 1; +- } +- } +- insn->sib.got = 1; +- +-err_out: +- return; +-} +- +- +-/** +- * insn_get_displacement() - Get the displacement of instruction +- * @insn: &struct insn containing instruction +- * +- * If necessary, first collects the instruction up to and including the +- * SIB byte. +- * Displacement value is sign-expanded. +- */ +-void insn_get_displacement(struct insn *insn) +-{ +- insn_byte_t mod, rm, base; +- +- if (insn->displacement.got) +- return; +- if (!insn->sib.got) +- insn_get_sib(insn); +- if (insn->modrm.nbytes) { +- /* +- * Interpreting the modrm byte: +- * mod = 00 - no displacement fields (exceptions below) +- * mod = 01 - 1-byte displacement field +- * mod = 10 - displacement field is 4 bytes, or 2 bytes if +- * address size = 2 (0x67 prefix in 32-bit mode) +- * mod = 11 - no memory operand +- * +- * If address size = 2... +- * mod = 00, r/m = 110 - displacement field is 2 bytes +- * +- * If address size != 2... +- * mod != 11, r/m = 100 - SIB byte exists +- * mod = 00, SIB base = 101 - displacement field is 4 bytes +- * mod = 00, r/m = 101 - rip-relative addressing, displacement +- * field is 4 bytes +- */ +- mod = X86_MODRM_MOD(insn->modrm.value); +- rm = X86_MODRM_RM(insn->modrm.value); +- base = X86_SIB_BASE(insn->sib.value); +- if (mod == 3) +- goto out; +- if (mod == 1) { +- insn->displacement.value = get_next(signed char, insn); +- insn->displacement.nbytes = 1; +- } else if (insn->addr_bytes == 2) { +- if ((mod == 0 && rm == 6) || mod == 2) { +- insn->displacement.value = +- get_next(short, insn); +- insn->displacement.nbytes = 2; +- } +- } else { +- if ((mod == 0 && rm == 5) || mod == 2 || +- (mod == 0 && base == 5)) { +- insn->displacement.value = get_next(int, insn); +- insn->displacement.nbytes = 4; +- } +- } +- } +-out: +- insn->displacement.got = 1; +- +-err_out: +- return; +-} +- +-/* Decode moffset16/32/64. Return 0 if failed */ +-static int __get_moffset(struct insn *insn) +-{ +- switch (insn->addr_bytes) { +- case 2: +- insn->moffset1.value = get_next(short, insn); +- insn->moffset1.nbytes = 2; +- break; +- case 4: +- insn->moffset1.value = get_next(int, insn); +- insn->moffset1.nbytes = 4; +- break; +- case 8: +- insn->moffset1.value = get_next(int, insn); +- insn->moffset1.nbytes = 4; +- insn->moffset2.value = get_next(int, insn); +- insn->moffset2.nbytes = 4; +- break; +- default: /* opnd_bytes must be modified manually */ +- goto err_out; +- } +- insn->moffset1.got = insn->moffset2.got = 1; +- +- return 1; +- +-err_out: +- return 0; +-} +- +-/* Decode imm v32(Iz). Return 0 if failed */ +-static int __get_immv32(struct insn *insn) +-{ +- switch (insn->opnd_bytes) { +- case 2: +- insn->immediate.value = get_next(short, insn); +- insn->immediate.nbytes = 2; +- break; +- case 4: +- case 8: +- insn->immediate.value = get_next(int, insn); +- insn->immediate.nbytes = 4; +- break; +- default: /* opnd_bytes must be modified manually */ +- goto err_out; +- } +- +- return 1; +- +-err_out: +- return 0; +-} +- +-/* Decode imm v64(Iv/Ov), Return 0 if failed */ +-static int __get_immv(struct insn *insn) +-{ +- switch (insn->opnd_bytes) { +- case 2: +- insn->immediate1.value = get_next(short, insn); +- insn->immediate1.nbytes = 2; +- break; +- case 4: +- insn->immediate1.value = get_next(int, insn); +- insn->immediate1.nbytes = 4; +- break; +- case 8: +- insn->immediate1.value = get_next(int, insn); +- insn->immediate1.nbytes = 4; +- insn->immediate2.value = get_next(int, insn); +- insn->immediate2.nbytes = 4; +- break; +- default: /* opnd_bytes must be modified manually */ +- goto err_out; +- } +- insn->immediate1.got = insn->immediate2.got = 1; +- +- return 1; +-err_out: +- return 0; +-} +- +-/* Decode ptr16:16/32(Ap) */ +-static int __get_immptr(struct insn *insn) +-{ +- switch (insn->opnd_bytes) { +- case 2: +- insn->immediate1.value = get_next(short, insn); +- insn->immediate1.nbytes = 2; +- break; +- case 4: +- insn->immediate1.value = get_next(int, insn); +- insn->immediate1.nbytes = 4; +- break; +- case 8: +- /* ptr16:64 is not exist (no segment) */ +- return 0; +- default: /* opnd_bytes must be modified manually */ +- goto err_out; +- } +- insn->immediate2.value = get_next(unsigned short, insn); +- insn->immediate2.nbytes = 2; +- insn->immediate1.got = insn->immediate2.got = 1; +- +- return 1; +-err_out: +- return 0; +-} +- +-/** +- * insn_get_immediate() - Get the immediates of instruction +- * @insn: &struct insn containing instruction +- * +- * If necessary, first collects the instruction up to and including the +- * displacement bytes. +- * Basically, most of immediates are sign-expanded. Unsigned-value can be +- * get by bit masking with ((1 << (nbytes * 8)) - 1) +- */ +-void insn_get_immediate(struct insn *insn) +-{ +- if (insn->immediate.got) +- return; +- if (!insn->displacement.got) +- insn_get_displacement(insn); +- +- if (inat_has_moffset(insn->attr)) { +- if (!__get_moffset(insn)) +- goto err_out; +- goto done; +- } +- +- if (!inat_has_immediate(insn->attr)) +- /* no immediates */ +- goto done; +- +- switch (inat_immediate_size(insn->attr)) { +- case INAT_IMM_BYTE: +- insn->immediate.value = get_next(signed char, insn); +- insn->immediate.nbytes = 1; +- break; +- case INAT_IMM_WORD: +- insn->immediate.value = get_next(short, insn); +- insn->immediate.nbytes = 2; +- break; +- case INAT_IMM_DWORD: +- insn->immediate.value = get_next(int, insn); +- insn->immediate.nbytes = 4; +- break; +- case INAT_IMM_QWORD: +- insn->immediate1.value = get_next(int, insn); +- insn->immediate1.nbytes = 4; +- insn->immediate2.value = get_next(int, insn); +- insn->immediate2.nbytes = 4; +- break; +- case INAT_IMM_PTR: +- if (!__get_immptr(insn)) +- goto err_out; +- break; +- case INAT_IMM_VWORD32: +- if (!__get_immv32(insn)) +- goto err_out; +- break; +- case INAT_IMM_VWORD: +- if (!__get_immv(insn)) +- goto err_out; +- break; +- default: +- /* Here, insn must have an immediate, but failed */ +- goto err_out; +- } +- if (inat_has_second_immediate(insn->attr)) { +- insn->immediate2.value = get_next(signed char, insn); +- insn->immediate2.nbytes = 1; +- } +-done: +- insn->immediate.got = 1; +- +-err_out: +- return; +-} +- +-/** +- * insn_get_length() - Get the length of instruction +- * @insn: &struct insn containing instruction +- * +- * If necessary, first collects the instruction up to and including the +- * immediates bytes. +- */ +-void insn_get_length(struct insn *insn) +-{ +- if (insn->length) +- return; +- if (!insn->immediate.got) +- insn_get_immediate(insn); +- insn->length = (unsigned char)((unsigned long)insn->next_byte +- - (unsigned long)insn->kaddr); +-} +diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/insn/insn.h +deleted file mode 100644 +index e23578c..0000000 +--- a/tools/objtool/arch/x86/insn/insn.h ++++ /dev/null +@@ -1,211 +0,0 @@ +-#ifndef _ASM_X86_INSN_H +-#define _ASM_X86_INSN_H +-/* +- * x86 instruction analysis +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +- * +- * Copyright (C) IBM Corporation, 2009 +- */ +- +-/* insn_attr_t is defined in inat.h */ +-#include "inat.h" +- +-struct insn_field { +- union { +- insn_value_t value; +- insn_byte_t bytes[4]; +- }; +- /* !0 if we've run insn_get_xxx() for this field */ +- unsigned char got; +- unsigned char nbytes; +-}; +- +-struct insn { +- struct insn_field prefixes; /* +- * Prefixes +- * prefixes.bytes[3]: last prefix +- */ +- struct insn_field rex_prefix; /* REX prefix */ +- struct insn_field vex_prefix; /* VEX prefix */ +- struct insn_field opcode; /* +- * opcode.bytes[0]: opcode1 +- * opcode.bytes[1]: opcode2 +- * opcode.bytes[2]: opcode3 +- */ +- struct insn_field modrm; +- struct insn_field sib; +- struct insn_field displacement; +- union { +- struct insn_field immediate; +- struct insn_field moffset1; /* for 64bit MOV */ +- struct insn_field immediate1; /* for 64bit imm or off16/32 */ +- }; +- union { +- struct insn_field moffset2; /* for 64bit MOV */ +- struct insn_field immediate2; /* for 64bit imm or seg16 */ +- }; +- +- insn_attr_t attr; +- unsigned char opnd_bytes; +- unsigned char addr_bytes; +- unsigned char length; +- unsigned char x86_64; +- +- const insn_byte_t *kaddr; /* kernel address of insn to analyze */ +- const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ +- const insn_byte_t *next_byte; +-}; +- +-#define MAX_INSN_SIZE 15 +- +-#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +-#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +-#define X86_MODRM_RM(modrm) ((modrm) & 0x07) +- +-#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +-#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +-#define X86_SIB_BASE(sib) ((sib) & 0x07) +- +-#define X86_REX_W(rex) ((rex) & 8) +-#define X86_REX_R(rex) ((rex) & 4) +-#define X86_REX_X(rex) ((rex) & 2) +-#define X86_REX_B(rex) ((rex) & 1) +- +-/* VEX bit flags */ +-#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +-#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +-#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +-#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +-#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +-/* VEX bit fields */ +-#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +-#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +-#define X86_VEX2_M 1 /* VEX2.M always 1 */ +-#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +-#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +-#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ +- +-extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +-extern void insn_get_prefixes(struct insn *insn); +-extern void insn_get_opcode(struct insn *insn); +-extern void insn_get_modrm(struct insn *insn); +-extern void insn_get_sib(struct insn *insn); +-extern void insn_get_displacement(struct insn *insn); +-extern void insn_get_immediate(struct insn *insn); +-extern void insn_get_length(struct insn *insn); +- +-/* Attribute will be determined after getting ModRM (for opcode groups) */ +-static inline void insn_get_attribute(struct insn *insn) +-{ +- insn_get_modrm(insn); +-} +- +-/* Instruction uses RIP-relative addressing */ +-extern int insn_rip_relative(struct insn *insn); +- +-/* Init insn for kernel text */ +-static inline void kernel_insn_init(struct insn *insn, +- const void *kaddr, int buf_len) +-{ +-#ifdef CONFIG_X86_64 +- insn_init(insn, kaddr, buf_len, 1); +-#else /* CONFIG_X86_32 */ +- insn_init(insn, kaddr, buf_len, 0); +-#endif +-} +- +-static inline int insn_is_avx(struct insn *insn) +-{ +- if (!insn->prefixes.got) +- insn_get_prefixes(insn); +- return (insn->vex_prefix.value != 0); +-} +- +-static inline int insn_is_evex(struct insn *insn) +-{ +- if (!insn->prefixes.got) +- insn_get_prefixes(insn); +- return (insn->vex_prefix.nbytes == 4); +-} +- +-/* Ensure this instruction is decoded completely */ +-static inline int insn_complete(struct insn *insn) +-{ +- return insn->opcode.got && insn->modrm.got && insn->sib.got && +- insn->displacement.got && insn->immediate.got; +-} +- +-static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +-{ +- if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ +- return X86_VEX2_M; +- else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ +- return X86_VEX3_M(insn->vex_prefix.bytes[1]); +- else /* EVEX */ +- return X86_EVEX_M(insn->vex_prefix.bytes[1]); +-} +- +-static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +-{ +- if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ +- return X86_VEX_P(insn->vex_prefix.bytes[1]); +- else +- return X86_VEX_P(insn->vex_prefix.bytes[2]); +-} +- +-/* Get the last prefix id from last prefix or VEX prefix */ +-static inline int insn_last_prefix_id(struct insn *insn) +-{ +- if (insn_is_avx(insn)) +- return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ +- +- if (insn->prefixes.bytes[3]) +- return inat_get_last_prefix_id(insn->prefixes.bytes[3]); +- +- return 0; +-} +- +-/* Offset of each field from kaddr */ +-static inline int insn_offset_rex_prefix(struct insn *insn) +-{ +- return insn->prefixes.nbytes; +-} +-static inline int insn_offset_vex_prefix(struct insn *insn) +-{ +- return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +-} +-static inline int insn_offset_opcode(struct insn *insn) +-{ +- return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +-} +-static inline int insn_offset_modrm(struct insn *insn) +-{ +- return insn_offset_opcode(insn) + insn->opcode.nbytes; +-} +-static inline int insn_offset_sib(struct insn *insn) +-{ +- return insn_offset_modrm(insn) + insn->modrm.nbytes; +-} +-static inline int insn_offset_displacement(struct insn *insn) +-{ +- return insn_offset_sib(insn) + insn->sib.nbytes; +-} +-static inline int insn_offset_immediate(struct insn *insn) +-{ +- return insn_offset_displacement(insn) + insn->displacement.nbytes; +-} +- +-#endif /* _ASM_X86_INSN_H */ +diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt +deleted file mode 100644 +index 767be7c..0000000 +--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt ++++ /dev/null +@@ -1,1063 +0,0 @@ +-# x86 Opcode Maps +-# +-# This is (mostly) based on following documentations. +-# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C +-# (#326018-047US, June 2013) +-# +-#<Opcode maps> +-# Table: table-name +-# Referrer: escaped-name +-# AVXcode: avx-code +-# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +-# (or) +-# opcode: escape # escaped-name +-# EndTable +-# +-# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix +-# mnemonics that begin with lowercase 'k' accept a VEX prefix +-# +-#<group maps> +-# GrpTable: GrpXXX +-# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +-# EndTable +-# +-# AVX Superscripts +-# (ev): this opcode requires EVEX prefix. +-# (evo): this opcode is changed by EVEX prefix (EVEX opcode) +-# (v): this opcode requires VEX prefix. +-# (v1): this opcode only supports 128bit VEX. +-# +-# Last Prefix Superscripts +-# - (66): the last prefix is 0x66 +-# - (F3): the last prefix is 0xF3 +-# - (F2): the last prefix is 0xF2 +-# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) +-# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. +- +-Table: one byte opcode +-Referrer: +-AVXcode: +-# 0x00 - 0x0f +-00: ADD Eb,Gb +-01: ADD Ev,Gv +-02: ADD Gb,Eb +-03: ADD Gv,Ev +-04: ADD AL,Ib +-05: ADD rAX,Iz +-06: PUSH ES (i64) +-07: POP ES (i64) +-08: OR Eb,Gb +-09: OR Ev,Gv +-0a: OR Gb,Eb +-0b: OR Gv,Ev +-0c: OR AL,Ib +-0d: OR rAX,Iz +-0e: PUSH CS (i64) +-0f: escape # 2-byte escape +-# 0x10 - 0x1f +-10: ADC Eb,Gb +-11: ADC Ev,Gv +-12: ADC Gb,Eb +-13: ADC Gv,Ev +-14: ADC AL,Ib +-15: ADC rAX,Iz +-16: PUSH SS (i64) +-17: POP SS (i64) +-18: SBB Eb,Gb +-19: SBB Ev,Gv +-1a: SBB Gb,Eb +-1b: SBB Gv,Ev +-1c: SBB AL,Ib +-1d: SBB rAX,Iz +-1e: PUSH DS (i64) +-1f: POP DS (i64) +-# 0x20 - 0x2f +-20: AND Eb,Gb +-21: AND Ev,Gv +-22: AND Gb,Eb +-23: AND Gv,Ev +-24: AND AL,Ib +-25: AND rAx,Iz +-26: SEG=ES (Prefix) +-27: DAA (i64) +-28: SUB Eb,Gb +-29: SUB Ev,Gv +-2a: SUB Gb,Eb +-2b: SUB Gv,Ev +-2c: SUB AL,Ib +-2d: SUB rAX,Iz +-2e: SEG=CS (Prefix) +-2f: DAS (i64) +-# 0x30 - 0x3f +-30: XOR Eb,Gb +-31: XOR Ev,Gv +-32: XOR Gb,Eb +-33: XOR Gv,Ev +-34: XOR AL,Ib +-35: XOR rAX,Iz +-36: SEG=SS (Prefix) +-37: AAA (i64) +-38: CMP Eb,Gb +-39: CMP Ev,Gv +-3a: CMP Gb,Eb +-3b: CMP Gv,Ev +-3c: CMP AL,Ib +-3d: CMP rAX,Iz +-3e: SEG=DS (Prefix) +-3f: AAS (i64) +-# 0x40 - 0x4f +-40: INC eAX (i64) | REX (o64) +-41: INC eCX (i64) | REX.B (o64) +-42: INC eDX (i64) | REX.X (o64) +-43: INC eBX (i64) | REX.XB (o64) +-44: INC eSP (i64) | REX.R (o64) +-45: INC eBP (i64) | REX.RB (o64) +-46: INC eSI (i64) | REX.RX (o64) +-47: INC eDI (i64) | REX.RXB (o64) +-48: DEC eAX (i64) | REX.W (o64) +-49: DEC eCX (i64) | REX.WB (o64) +-4a: DEC eDX (i64) | REX.WX (o64) +-4b: DEC eBX (i64) | REX.WXB (o64) +-4c: DEC eSP (i64) | REX.WR (o64) +-4d: DEC eBP (i64) | REX.WRB (o64) +-4e: DEC eSI (i64) | REX.WRX (o64) +-4f: DEC eDI (i64) | REX.WRXB (o64) +-# 0x50 - 0x5f +-50: PUSH rAX/r8 (d64) +-51: PUSH rCX/r9 (d64) +-52: PUSH rDX/r10 (d64) +-53: PUSH rBX/r11 (d64) +-54: PUSH rSP/r12 (d64) +-55: PUSH rBP/r13 (d64) +-56: PUSH rSI/r14 (d64) +-57: PUSH rDI/r15 (d64) +-58: POP rAX/r8 (d64) +-59: POP rCX/r9 (d64) +-5a: POP rDX/r10 (d64) +-5b: POP rBX/r11 (d64) +-5c: POP rSP/r12 (d64) +-5d: POP rBP/r13 (d64) +-5e: POP rSI/r14 (d64) +-5f: POP rDI/r15 (d64) +-# 0x60 - 0x6f +-60: PUSHA/PUSHAD (i64) +-61: POPA/POPAD (i64) +-62: BOUND Gv,Ma (i64) | EVEX (Prefix) +-63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +-64: SEG=FS (Prefix) +-65: SEG=GS (Prefix) +-66: Operand-Size (Prefix) +-67: Address-Size (Prefix) +-68: PUSH Iz (d64) +-69: IMUL Gv,Ev,Iz +-6a: PUSH Ib (d64) +-6b: IMUL Gv,Ev,Ib +-6c: INS/INSB Yb,DX +-6d: INS/INSW/INSD Yz,DX +-6e: OUTS/OUTSB DX,Xb +-6f: OUTS/OUTSW/OUTSD DX,Xz +-# 0x70 - 0x7f +-70: JO Jb +-71: JNO Jb +-72: JB/JNAE/JC Jb +-73: JNB/JAE/JNC Jb +-74: JZ/JE Jb +-75: JNZ/JNE Jb +-76: JBE/JNA Jb +-77: JNBE/JA Jb +-78: JS Jb +-79: JNS Jb +-7a: JP/JPE Jb +-7b: JNP/JPO Jb +-7c: JL/JNGE Jb +-7d: JNL/JGE Jb +-7e: JLE/JNG Jb +-7f: JNLE/JG Jb +-# 0x80 - 0x8f +-80: Grp1 Eb,Ib (1A) +-81: Grp1 Ev,Iz (1A) +-82: Grp1 Eb,Ib (1A),(i64) +-83: Grp1 Ev,Ib (1A) +-84: TEST Eb,Gb +-85: TEST Ev,Gv +-86: XCHG Eb,Gb +-87: XCHG Ev,Gv +-88: MOV Eb,Gb +-89: MOV Ev,Gv +-8a: MOV Gb,Eb +-8b: MOV Gv,Ev +-8c: MOV Ev,Sw +-8d: LEA Gv,M +-8e: MOV Sw,Ew +-8f: Grp1A (1A) | POP Ev (d64) +-# 0x90 - 0x9f +-90: NOP | PAUSE (F3) | XCHG r8,rAX +-91: XCHG rCX/r9,rAX +-92: XCHG rDX/r10,rAX +-93: XCHG rBX/r11,rAX +-94: XCHG rSP/r12,rAX +-95: XCHG rBP/r13,rAX +-96: XCHG rSI/r14,rAX +-97: XCHG rDI/r15,rAX +-98: CBW/CWDE/CDQE +-99: CWD/CDQ/CQO +-9a: CALLF Ap (i64) +-9b: FWAIT/WAIT +-9c: PUSHF/D/Q Fv (d64) +-9d: POPF/D/Q Fv (d64) +-9e: SAHF +-9f: LAHF +-# 0xa0 - 0xaf +-a0: MOV AL,Ob +-a1: MOV rAX,Ov +-a2: MOV Ob,AL +-a3: MOV Ov,rAX +-a4: MOVS/B Yb,Xb +-a5: MOVS/W/D/Q Yv,Xv +-a6: CMPS/B Xb,Yb +-a7: CMPS/W/D Xv,Yv +-a8: TEST AL,Ib +-a9: TEST rAX,Iz +-aa: STOS/B Yb,AL +-ab: STOS/W/D/Q Yv,rAX +-ac: LODS/B AL,Xb +-ad: LODS/W/D/Q rAX,Xv +-ae: SCAS/B AL,Yb +-# Note: The May 2011 Intel manual shows Xv for the second parameter of the +-# next instruction but Yv is correct +-af: SCAS/W/D/Q rAX,Yv +-# 0xb0 - 0xbf +-b0: MOV AL/R8L,Ib +-b1: MOV CL/R9L,Ib +-b2: MOV DL/R10L,Ib +-b3: MOV BL/R11L,Ib +-b4: MOV AH/R12L,Ib +-b5: MOV CH/R13L,Ib +-b6: MOV DH/R14L,Ib +-b7: MOV BH/R15L,Ib +-b8: MOV rAX/r8,Iv +-b9: MOV rCX/r9,Iv +-ba: MOV rDX/r10,Iv +-bb: MOV rBX/r11,Iv +-bc: MOV rSP/r12,Iv +-bd: MOV rBP/r13,Iv +-be: MOV rSI/r14,Iv +-bf: MOV rDI/r15,Iv +-# 0xc0 - 0xcf +-c0: Grp2 Eb,Ib (1A) +-c1: Grp2 Ev,Ib (1A) +-c2: RETN Iw (f64) +-c3: RETN +-c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +-c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +-c6: Grp11A Eb,Ib (1A) +-c7: Grp11B Ev,Iz (1A) +-c8: ENTER Iw,Ib +-c9: LEAVE (d64) +-ca: RETF Iw +-cb: RETF +-cc: INT3 +-cd: INT Ib +-ce: INTO (i64) +-cf: IRET/D/Q +-# 0xd0 - 0xdf +-d0: Grp2 Eb,1 (1A) +-d1: Grp2 Ev,1 (1A) +-d2: Grp2 Eb,CL (1A) +-d3: Grp2 Ev,CL (1A) +-d4: AAM Ib (i64) +-d5: AAD Ib (i64) +-d6: +-d7: XLAT/XLATB +-d8: ESC +-d9: ESC +-da: ESC +-db: ESC +-dc: ESC +-dd: ESC +-de: ESC +-df: ESC +-# 0xe0 - 0xef +-# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +-# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +-# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. +-e0: LOOPNE/LOOPNZ Jb (f64) +-e1: LOOPE/LOOPZ Jb (f64) +-e2: LOOP Jb (f64) +-e3: JrCXZ Jb (f64) +-e4: IN AL,Ib +-e5: IN eAX,Ib +-e6: OUT Ib,AL +-e7: OUT Ib,eAX +-# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +-# in "near" jumps and calls is 16-bit. For CALL, +-# push of return address is 16-bit wide, RSP is decremented by 2 +-# but is not truncated to 16 bits, unlike RIP. +-e8: CALL Jz (f64) +-e9: JMP-near Jz (f64) +-ea: JMP-far Ap (i64) +-eb: JMP-short Jb (f64) +-ec: IN AL,DX +-ed: IN eAX,DX +-ee: OUT DX,AL +-ef: OUT DX,eAX +-# 0xf0 - 0xff +-f0: LOCK (Prefix) +-f1: +-f2: REPNE (Prefix) | XACQUIRE (Prefix) +-f3: REP/REPE (Prefix) | XRELEASE (Prefix) +-f4: HLT +-f5: CMC +-f6: Grp3_1 Eb (1A) +-f7: Grp3_2 Ev (1A) +-f8: CLC +-f9: STC +-fa: CLI +-fb: STI +-fc: CLD +-fd: STD +-fe: Grp4 (1A) +-ff: Grp5 (1A) +-EndTable +- +-Table: 2-byte opcode (0x0f) +-Referrer: 2-byte escape +-AVXcode: 1 +-# 0x0f 0x00-0x0f +-00: Grp6 (1A) +-01: Grp7 (1A) +-02: LAR Gv,Ew +-03: LSL Gv,Ew +-04: +-05: SYSCALL (o64) +-06: CLTS +-07: SYSRET (o64) +-08: INVD +-09: WBINVD +-0a: +-0b: UD2 (1B) +-0c: +-# AMD's prefetch group. Intel supports prefetchw(/1) only. +-0d: GrpP +-0e: FEMMS +-# 3DNow! uses the last imm byte as opcode extension. +-0f: 3DNow! Pq,Qq,Ib +-# 0x0f 0x10-0x1f +-# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +-# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +-# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +-# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +-# Reference A.1 +-10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +-11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +-12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +-13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +-14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +-15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +-16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +-17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) +-18: Grp16 (1A) +-19: +-# Intel SDM opcode map does not list MPX instructions. For now using Gv for +-# bnd registers and Ev for everything else is OK because the instruction +-# decoder does not use the information except as an indication that there is +-# a ModR/M byte. +-1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev +-1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv +-1c: +-1d: +-1e: +-1f: NOP Ev +-# 0x0f 0x20-0x2f +-20: MOV Rd,Cd +-21: MOV Rd,Dd +-22: MOV Cd,Rd +-23: MOV Dd,Rd +-24: +-25: +-26: +-27: +-28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +-29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +-2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +-2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +-2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +-2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +-2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +-2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) +-# 0x0f 0x30-0x3f +-30: WRMSR +-31: RDTSC +-32: RDMSR +-33: RDPMC +-34: SYSENTER +-35: SYSEXIT +-36: +-37: GETSEC +-38: escape # 3-byte escape 1 +-39: +-3a: escape # 3-byte escape 2 +-3b: +-3c: +-3d: +-3e: +-3f: +-# 0x0f 0x40-0x4f +-40: CMOVO Gv,Ev +-41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) +-42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) +-43: CMOVAE/NB/NC Gv,Ev +-44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) +-45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) +-46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) +-47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) +-48: CMOVS Gv,Ev +-49: CMOVNS Gv,Ev +-4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) +-4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk +-4c: CMOVL/NGE Gv,Ev +-4d: CMOVNL/GE Gv,Ev +-4e: CMOVLE/NG Gv,Ev +-4f: CMOVNLE/G Gv,Ev +-# 0x0f 0x50-0x5f +-50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +-51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +-52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +-53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +-54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +-55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +-56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +-57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +-58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +-59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +-5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +-5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +-5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +-5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +-5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +-5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) +-# 0x0f 0x60-0x6f +-60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +-61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +-62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +-63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +-64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +-65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +-66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +-67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +-68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +-69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +-6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +-6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +-6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +-6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +-6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +-6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) +-# 0x0f 0x70-0x7f +-70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) +-71: Grp12 (1A) +-72: Grp13 (1A) +-73: Grp14 (1A) +-74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +-75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +-76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +-# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +-77: emms | vzeroupper | vzeroall +-78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) +-79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) +-7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) +-7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) +-7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +-7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +-7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +-7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) +-# 0x0f 0x80-0x8f +-# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +-80: JO Jz (f64) +-81: JNO Jz (f64) +-82: JB/JC/JNAE Jz (f64) +-83: JAE/JNB/JNC Jz (f64) +-84: JE/JZ Jz (f64) +-85: JNE/JNZ Jz (f64) +-86: JBE/JNA Jz (f64) +-87: JA/JNBE Jz (f64) +-88: JS Jz (f64) +-89: JNS Jz (f64) +-8a: JP/JPE Jz (f64) +-8b: JNP/JPO Jz (f64) +-8c: JL/JNGE Jz (f64) +-8d: JNL/JGE Jz (f64) +-8e: JLE/JNG Jz (f64) +-8f: JNLE/JG Jz (f64) +-# 0x0f 0x90-0x9f +-90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) +-91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) +-92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) +-93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) +-94: SETE/Z Eb +-95: SETNE/NZ Eb +-96: SETBE/NA Eb +-97: SETA/NBE Eb +-98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) +-99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) +-9a: SETP/PE Eb +-9b: SETNP/PO Eb +-9c: SETL/NGE Eb +-9d: SETNL/GE Eb +-9e: SETLE/NG Eb +-9f: SETNLE/G Eb +-# 0x0f 0xa0-0xaf +-a0: PUSH FS (d64) +-a1: POP FS (d64) +-a2: CPUID +-a3: BT Ev,Gv +-a4: SHLD Ev,Gv,Ib +-a5: SHLD Ev,Gv,CL +-a6: GrpPDLK +-a7: GrpRNG +-a8: PUSH GS (d64) +-a9: POP GS (d64) +-aa: RSM +-ab: BTS Ev,Gv +-ac: SHRD Ev,Gv,Ib +-ad: SHRD Ev,Gv,CL +-ae: Grp15 (1A),(1C) +-af: IMUL Gv,Ev +-# 0x0f 0xb0-0xbf +-b0: CMPXCHG Eb,Gb +-b1: CMPXCHG Ev,Gv +-b2: LSS Gv,Mp +-b3: BTR Ev,Gv +-b4: LFS Gv,Mp +-b5: LGS Gv,Mp +-b6: MOVZX Gv,Eb +-b7: MOVZX Gv,Ew +-b8: JMPE (!F3) | POPCNT Gv,Ev (F3) +-b9: Grp10 (1A) +-ba: Grp8 Ev,Ib (1A) +-bb: BTC Ev,Gv +-bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +-bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) +-be: MOVSX Gv,Eb +-bf: MOVSX Gv,Ew +-# 0x0f 0xc0-0xcf +-c0: XADD Eb,Gb +-c1: XADD Ev,Gv +-c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +-c3: movnti My,Gy +-c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +-c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +-c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) +-c7: Grp9 (1A) +-c8: BSWAP RAX/EAX/R8/R8D +-c9: BSWAP RCX/ECX/R9/R9D +-ca: BSWAP RDX/EDX/R10/R10D +-cb: BSWAP RBX/EBX/R11/R11D +-cc: BSWAP RSP/ESP/R12/R12D +-cd: BSWAP RBP/EBP/R13/R13D +-ce: BSWAP RSI/ESI/R14/R14D +-cf: BSWAP RDI/EDI/R15/R15D +-# 0x0f 0xd0-0xdf +-d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +-d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +-d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +-d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +-d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +-d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +-d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +-d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +-d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +-d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +-da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +-db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) +-dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +-dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +-de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +-df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) +-# 0x0f 0xe0-0xef +-e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +-e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +-e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +-e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +-e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +-e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +-e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) +-e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +-e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +-e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +-ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +-eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) +-ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +-ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +-ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +-ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) +-# 0x0f 0xf0-0xff +-f0: vlddqu Vx,Mx (F2) +-f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +-f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +-f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +-f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +-f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +-f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +-f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +-f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +-f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +-fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +-fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +-fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +-fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +-fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) +-ff: +-EndTable +- +-Table: 3-byte opcode 1 (0x0f 0x38) +-Referrer: 3-byte escape 1 +-AVXcode: 2 +-# 0x0f 0x38 0x00-0x0f +-00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +-01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +-02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +-03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +-04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +-05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +-06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +-07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +-08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +-09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +-0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +-0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +-0c: vpermilps Vx,Hx,Wx (66),(v) +-0d: vpermilpd Vx,Hx,Wx (66),(v) +-0e: vtestps Vx,Wx (66),(v) +-0f: vtestpd Vx,Wx (66),(v) +-# 0x0f 0x38 0x10-0x1f +-10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) +-11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) +-12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) +-13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) +-14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) +-15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) +-16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) +-17: vptest Vx,Wx (66) +-18: vbroadcastss Vx,Wd (66),(v) +-19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) +-1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) +-1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) +-1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +-1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +-1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) +-1f: vpabsq Vx,Wx (66),(ev) +-# 0x0f 0x38 0x20-0x2f +-20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) +-21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) +-22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) +-23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) +-24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) +-25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) +-26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) +-27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) +-28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) +-29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) +-2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) +-2b: vpackusdw Vx,Hx,Wx (66),(v1) +-2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) +-2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) +-2e: vmaskmovps Mx,Hx,Vx (66),(v) +-2f: vmaskmovpd Mx,Hx,Vx (66),(v) +-# 0x0f 0x38 0x30-0x3f +-30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) +-31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) +-32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) +-33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) +-34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) +-35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) +-36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) +-37: vpcmpgtq Vx,Hx,Wx (66),(v1) +-38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) +-39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) +-3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) +-3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) +-3c: vpmaxsb Vx,Hx,Wx (66),(v1) +-3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) +-3e: vpmaxuw Vx,Hx,Wx (66),(v1) +-3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) +-# 0x0f 0x38 0x40-0x8f +-40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) +-41: vphminposuw Vdq,Wdq (66),(v1) +-42: vgetexpps/d Vx,Wx (66),(ev) +-43: vgetexpss/d Vx,Hx,Wx (66),(ev) +-44: vplzcntd/q Vx,Wx (66),(ev) +-45: vpsrlvd/q Vx,Hx,Wx (66),(v) +-46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) +-47: vpsllvd/q Vx,Hx,Wx (66),(v) +-# Skip 0x48-0x4b +-4c: vrcp14ps/d Vpd,Wpd (66),(ev) +-4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) +-4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) +-4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) +-# Skip 0x50-0x57 +-58: vpbroadcastd Vx,Wx (66),(v) +-59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) +-5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) +-5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) +-# Skip 0x5c-0x63 +-64: vpblendmd/q Vx,Hx,Wx (66),(ev) +-65: vblendmps/d Vx,Hx,Wx (66),(ev) +-66: vpblendmb/w Vx,Hx,Wx (66),(ev) +-# Skip 0x67-0x74 +-75: vpermi2b/w Vx,Hx,Wx (66),(ev) +-76: vpermi2d/q Vx,Hx,Wx (66),(ev) +-77: vpermi2ps/d Vx,Hx,Wx (66),(ev) +-78: vpbroadcastb Vx,Wx (66),(v) +-79: vpbroadcastw Vx,Wx (66),(v) +-7a: vpbroadcastb Vx,Rv (66),(ev) +-7b: vpbroadcastw Vx,Rv (66),(ev) +-7c: vpbroadcastd/q Vx,Rv (66),(ev) +-7d: vpermt2b/w Vx,Hx,Wx (66),(ev) +-7e: vpermt2d/q Vx,Hx,Wx (66),(ev) +-7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) +-80: INVEPT Gy,Mdq (66) +-81: INVPID Gy,Mdq (66) +-82: INVPCID Gy,Mdq (66) +-83: vpmultishiftqb Vx,Hx,Wx (66),(ev) +-88: vexpandps/d Vpd,Wpd (66),(ev) +-89: vpexpandd/q Vx,Wx (66),(ev) +-8a: vcompressps/d Wx,Vx (66),(ev) +-8b: vpcompressd/q Wx,Vx (66),(ev) +-8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +-8d: vpermb/w Vx,Hx,Wx (66),(ev) +-8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) +-# 0x0f 0x38 0x90-0xbf (FMA) +-90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) +-91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) +-92: vgatherdps/d Vx,Hx,Wx (66),(v) +-93: vgatherqps/d Vx,Hx,Wx (66),(v) +-94: +-95: +-96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +-97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +-98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +-99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +-9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +-9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +-9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +-9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +-9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +-9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +-a0: vpscatterdd/q Wx,Vx (66),(ev) +-a1: vpscatterqd/q Wx,Vx (66),(ev) +-a2: vscatterdps/d Wx,Vx (66),(ev) +-a3: vscatterqps/d Wx,Vx (66),(ev) +-a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +-a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +-a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +-a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +-aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +-ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +-ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +-ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +-ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +-af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +-b4: vpmadd52luq Vx,Hx,Wx (66),(ev) +-b5: vpmadd52huq Vx,Hx,Wx (66),(ev) +-b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +-b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +-b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +-b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +-ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +-bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +-bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +-bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +-be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +-bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +-# 0x0f 0x38 0xc0-0xff +-c4: vpconflictd/q Vx,Wx (66),(ev) +-c6: Grp18 (1A) +-c7: Grp19 (1A) +-c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) +-c9: sha1msg1 Vdq,Wdq +-ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) +-cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) +-cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) +-cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) +-db: VAESIMC Vdq,Wdq (66),(v1) +-dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +-dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +-de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +-df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +-f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) +-f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) +-f2: ANDN Gy,By,Ey (v) +-f3: Grp17 (1A) +-f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +-f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +-f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +-EndTable +- +-Table: 3-byte opcode 2 (0x0f 0x3a) +-Referrer: 3-byte escape 2 +-AVXcode: 3 +-# 0x0f 0x3a 0x00-0xff +-00: vpermq Vqq,Wqq,Ib (66),(v) +-01: vpermpd Vqq,Wqq,Ib (66),(v) +-02: vpblendd Vx,Hx,Wx,Ib (66),(v) +-03: valignd/q Vx,Hx,Wx,Ib (66),(ev) +-04: vpermilps Vx,Wx,Ib (66),(v) +-05: vpermilpd Vx,Wx,Ib (66),(v) +-06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +-07: +-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +-09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) +-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +-0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) +-0c: vblendps Vx,Hx,Wx,Ib (66) +-0d: vblendpd Vx,Hx,Wx,Ib (66) +-0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +-0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +-14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +-15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +-16: vpextrd/q Ey,Vdq,Ib (66),(v1) +-17: vextractps Ed,Vdq,Ib (66),(v1) +-18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +-19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) +-1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +-1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) +-1d: vcvtps2ph Wx,Vx,Ib (66),(v) +-1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) +-1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) +-20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +-21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +-22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +-23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) +-25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) +-26: vgetmantps/d Vx,Wx,Ib (66),(ev) +-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +-30: kshiftrb/w Vk,Uk,Ib (66),(v) +-31: kshiftrd/q Vk,Uk,Ib (66),(v) +-32: kshiftlb/w Vk,Uk,Ib (66),(v) +-33: kshiftld/q Vk,Uk,Ib (66),(v) +-38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +-39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) +-3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +-3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) +-3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) +-3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) +-40: vdpps Vx,Hx,Wx,Ib (66) +-41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +-42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) +-43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) +-44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +-46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +-4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +-4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +-4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +-50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) +-51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) +-54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) +-55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) +-56: vreduceps/d Vx,Wx,Ib (66),(ev) +-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +-60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +-61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +-62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +-63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +-66: vfpclassps/d Vk,Wx,Ib (66),(ev) +-67: vfpclassss/d Vk,Wx,Ib (66),(ev) +-cc: sha1rnds4 Vdq,Wdq,Ib +-df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +-f0: RORX Gy,Ey,Ib (F2),(v) +-EndTable +- +-GrpTable: Grp1 +-0: ADD +-1: OR +-2: ADC +-3: SBB +-4: AND +-5: SUB +-6: XOR +-7: CMP +-EndTable +- +-GrpTable: Grp1A +-0: POP +-EndTable +- +-GrpTable: Grp2 +-0: ROL +-1: ROR +-2: RCL +-3: RCR +-4: SHL/SAL +-5: SHR +-6: +-7: SAR +-EndTable +- +-GrpTable: Grp3_1 +-0: TEST Eb,Ib +-1: +-2: NOT Eb +-3: NEG Eb +-4: MUL AL,Eb +-5: IMUL AL,Eb +-6: DIV AL,Eb +-7: IDIV AL,Eb +-EndTable +- +-GrpTable: Grp3_2 +-0: TEST Ev,Iz +-1: +-2: NOT Ev +-3: NEG Ev +-4: MUL rAX,Ev +-5: IMUL rAX,Ev +-6: DIV rAX,Ev +-7: IDIV rAX,Ev +-EndTable +- +-GrpTable: Grp4 +-0: INC Eb +-1: DEC Eb +-EndTable +- +-GrpTable: Grp5 +-0: INC Ev +-1: DEC Ev +-# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +-2: CALLN Ev (f64) +-3: CALLF Ep +-4: JMPN Ev (f64) +-5: JMPF Mp +-6: PUSH Ev (d64) +-7: +-EndTable +- +-GrpTable: Grp6 +-0: SLDT Rv/Mw +-1: STR Rv/Mw +-2: LLDT Ew +-3: LTR Ew +-4: VERR Ew +-5: VERW Ew +-EndTable +- +-GrpTable: Grp7 +-0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +-1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) +-2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +-3: LIDT Ms +-4: SMSW Mw/Rv +-5: rdpkru (110),(11B) | wrpkru (111),(11B) +-6: LMSW Ew +-7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +-EndTable +- +-GrpTable: Grp8 +-4: BT +-5: BTS +-6: BTR +-7: BTC +-EndTable +- +-GrpTable: Grp9 +-1: CMPXCHG8B/16B Mq/Mdq +-3: xrstors +-4: xsavec +-5: xsaves +-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +-7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) +-EndTable +- +-GrpTable: Grp10 +-EndTable +- +-# Grp11A and Grp11B are expressed as Grp11 in Intel SDM +-GrpTable: Grp11A +-0: MOV Eb,Ib +-7: XABORT Ib (000),(11B) +-EndTable +- +-GrpTable: Grp11B +-0: MOV Eb,Iz +-7: XBEGIN Jz (000),(11B) +-EndTable +- +-GrpTable: Grp12 +-2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +-4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +-6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) +-EndTable +- +-GrpTable: Grp13 +-0: vprord/q Hx,Wx,Ib (66),(ev) +-1: vprold/q Hx,Wx,Ib (66),(ev) +-2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +-4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) +-6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) +-EndTable +- +-GrpTable: Grp14 +-2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +-3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +-6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +-7: vpslldq Hx,Ux,Ib (66),(11B),(v1) +-EndTable +- +-GrpTable: Grp15 +-0: fxsave | RDFSBASE Ry (F3),(11B) +-1: fxstor | RDGSBASE Ry (F3),(11B) +-2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +-3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) +-4: XSAVE +-5: XRSTOR | lfence (11B) +-6: XSAVEOPT | clwb (66) | mfence (11B) +-7: clflush | clflushopt (66) | sfence (11B) +-EndTable +- +-GrpTable: Grp16 +-0: prefetch NTA +-1: prefetch T0 +-2: prefetch T1 +-3: prefetch T2 +-EndTable +- +-GrpTable: Grp17 +-1: BLSR By,Ey (v) +-2: BLSMSK By,Ey (v) +-3: BLSI By,Ey (v) +-EndTable +- +-GrpTable: Grp18 +-1: vgatherpf0dps/d Wx (66),(ev) +-2: vgatherpf1dps/d Wx (66),(ev) +-5: vscatterpf0dps/d Wx (66),(ev) +-6: vscatterpf1dps/d Wx (66),(ev) +-EndTable +- +-GrpTable: Grp19 +-1: vgatherpf0qps/d Wx (66),(ev) +-2: vgatherpf1qps/d Wx (66),(ev) +-5: vscatterpf0qps/d Wx (66),(ev) +-6: vscatterpf1qps/d Wx (66),(ev) +-EndTable +- +-# AMD's Prefetch Group +-GrpTable: GrpP +-0: PREFETCH +-1: PREFETCHW +-EndTable +- +-GrpTable: GrpPDLK +-0: MONTMUL +-1: XSHA1 +-2: XSHA2 +-EndTable +- +-GrpTable: GrpRNG +-0: xstore-rng +-1: xcrypt-ecb +-2: xcrypt-cbc +-4: xcrypt-cfb +-5: xcrypt-ofb +-EndTable +diff --git a/tools/objtool/arch/x86/lib/inat.c b/tools/objtool/arch/x86/lib/inat.c +new file mode 100644 +index 0000000..c1f01a8 +--- /dev/null ++++ b/tools/objtool/arch/x86/lib/inat.c +@@ -0,0 +1,97 @@ ++/* ++ * x86 instruction attribute tables ++ * ++ * Written by Masami Hiramatsu <mhiramat@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ */ ++#include <asm/insn.h> ++ ++/* Attribute tables are generated from opcode map */ ++#include "inat-tables.c" ++ ++/* Attribute search APIs */ ++insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) ++{ ++ return inat_primary_table[opcode]; ++} ++ ++int inat_get_last_prefix_id(insn_byte_t last_pfx) ++{ ++ insn_attr_t lpfx_attr; ++ ++ lpfx_attr = inat_get_opcode_attribute(last_pfx); ++ return inat_last_prefix_id(lpfx_attr); ++} ++ ++insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, ++ insn_attr_t esc_attr) ++{ ++ const insn_attr_t *table; ++ int n; ++ ++ n = inat_escape_id(esc_attr); ++ ++ table = inat_escape_tables[n][0]; ++ if (!table) ++ return 0; ++ if (inat_has_variant(table[opcode]) && lpfx_id) { ++ table = inat_escape_tables[n][lpfx_id]; ++ if (!table) ++ return 0; ++ } ++ return table[opcode]; ++} ++ ++insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, ++ insn_attr_t grp_attr) ++{ ++ const insn_attr_t *table; ++ int n; ++ ++ n = inat_group_id(grp_attr); ++ ++ table = inat_group_tables[n][0]; ++ if (!table) ++ return inat_group_common_attribute(grp_attr); ++ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { ++ table = inat_group_tables[n][lpfx_id]; ++ if (!table) ++ return inat_group_common_attribute(grp_attr); ++ } ++ return table[X86_MODRM_REG(modrm)] | ++ inat_group_common_attribute(grp_attr); ++} ++ ++insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, ++ insn_byte_t vex_p) ++{ ++ const insn_attr_t *table; ++ if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) ++ return 0; ++ /* At first, this checks the master table */ ++ table = inat_avx_tables[vex_m][0]; ++ if (!table) ++ return 0; ++ if (!inat_is_group(table[opcode]) && vex_p) { ++ /* If this is not a group, get attribute directly */ ++ table = inat_avx_tables[vex_m][vex_p]; ++ if (!table) ++ return 0; ++ } ++ return table[opcode]; ++} ++ +diff --git a/tools/objtool/arch/x86/lib/insn.c b/tools/objtool/arch/x86/lib/insn.c +new file mode 100644 +index 0000000..1088eb8 +--- /dev/null ++++ b/tools/objtool/arch/x86/lib/insn.c +@@ -0,0 +1,606 @@ ++/* ++ * x86 instruction analysis ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Copyright (C) IBM Corporation, 2002, 2004, 2009 ++ */ ++ ++#ifdef __KERNEL__ ++#include <linux/string.h> ++#else ++#include <string.h> ++#endif ++#include <asm/inat.h> ++#include <asm/insn.h> ++ ++/* Verify next sizeof(t) bytes can be on the same instruction */ ++#define validate_next(t, insn, n) \ ++ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) ++ ++#define __get_next(t, insn) \ ++ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) ++ ++#define __peek_nbyte_next(t, insn, n) \ ++ ({ t r = *(t*)((insn)->next_byte + n); r; }) ++ ++#define get_next(t, insn) \ ++ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) ++ ++#define peek_nbyte_next(t, insn, n) \ ++ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) ++ ++#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) ++ ++/** ++ * insn_init() - initialize struct insn ++ * @insn: &struct insn to be initialized ++ * @kaddr: address (in kernel memory) of instruction (or copy thereof) ++ * @x86_64: !0 for 64-bit kernel or 64-bit app ++ */ ++void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) ++{ ++ /* ++ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid ++ * even if the input buffer is long enough to hold them. ++ */ ++ if (buf_len > MAX_INSN_SIZE) ++ buf_len = MAX_INSN_SIZE; ++ ++ memset(insn, 0, sizeof(*insn)); ++ insn->kaddr = kaddr; ++ insn->end_kaddr = kaddr + buf_len; ++ insn->next_byte = kaddr; ++ insn->x86_64 = x86_64 ? 1 : 0; ++ insn->opnd_bytes = 4; ++ if (x86_64) ++ insn->addr_bytes = 8; ++ else ++ insn->addr_bytes = 4; ++} ++ ++/** ++ * insn_get_prefixes - scan x86 instruction prefix bytes ++ * @insn: &struct insn containing instruction ++ * ++ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte ++ * to point to the (first) opcode. No effect if @insn->prefixes.got ++ * is already set. ++ */ ++void insn_get_prefixes(struct insn *insn) ++{ ++ struct insn_field *prefixes = &insn->prefixes; ++ insn_attr_t attr; ++ insn_byte_t b, lb; ++ int i, nb; ++ ++ if (prefixes->got) ++ return; ++ ++ nb = 0; ++ lb = 0; ++ b = peek_next(insn_byte_t, insn); ++ attr = inat_get_opcode_attribute(b); ++ while (inat_is_legacy_prefix(attr)) { ++ /* Skip if same prefix */ ++ for (i = 0; i < nb; i++) ++ if (prefixes->bytes[i] == b) ++ goto found; ++ if (nb == 4) ++ /* Invalid instruction */ ++ break; ++ prefixes->bytes[nb++] = b; ++ if (inat_is_address_size_prefix(attr)) { ++ /* address size switches 2/4 or 4/8 */ ++ if (insn->x86_64) ++ insn->addr_bytes ^= 12; ++ else ++ insn->addr_bytes ^= 6; ++ } else if (inat_is_operand_size_prefix(attr)) { ++ /* oprand size switches 2/4 */ ++ insn->opnd_bytes ^= 6; ++ } ++found: ++ prefixes->nbytes++; ++ insn->next_byte++; ++ lb = b; ++ b = peek_next(insn_byte_t, insn); ++ attr = inat_get_opcode_attribute(b); ++ } ++ /* Set the last prefix */ ++ if (lb && lb != insn->prefixes.bytes[3]) { ++ if (unlikely(insn->prefixes.bytes[3])) { ++ /* Swap the last prefix */ ++ b = insn->prefixes.bytes[3]; ++ for (i = 0; i < nb; i++) ++ if (prefixes->bytes[i] == lb) ++ prefixes->bytes[i] = b; ++ } ++ insn->prefixes.bytes[3] = lb; ++ } ++ ++ /* Decode REX prefix */ ++ if (insn->x86_64) { ++ b = peek_next(insn_byte_t, insn); ++ attr = inat_get_opcode_attribute(b); ++ if (inat_is_rex_prefix(attr)) { ++ insn->rex_prefix.value = b; ++ insn->rex_prefix.nbytes = 1; ++ insn->next_byte++; ++ if (X86_REX_W(b)) ++ /* REX.W overrides opnd_size */ ++ insn->opnd_bytes = 8; ++ } ++ } ++ insn->rex_prefix.got = 1; ++ ++ /* Decode VEX prefix */ ++ b = peek_next(insn_byte_t, insn); ++ attr = inat_get_opcode_attribute(b); ++ if (inat_is_vex_prefix(attr)) { ++ insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); ++ if (!insn->x86_64) { ++ /* ++ * In 32-bits mode, if the [7:6] bits (mod bits of ++ * ModRM) on the second byte are not 11b, it is ++ * LDS or LES or BOUND. ++ */ ++ if (X86_MODRM_MOD(b2) != 3) ++ goto vex_end; ++ } ++ insn->vex_prefix.bytes[0] = b; ++ insn->vex_prefix.bytes[1] = b2; ++ if (inat_is_evex_prefix(attr)) { ++ b2 = peek_nbyte_next(insn_byte_t, insn, 2); ++ insn->vex_prefix.bytes[2] = b2; ++ b2 = peek_nbyte_next(insn_byte_t, insn, 3); ++ insn->vex_prefix.bytes[3] = b2; ++ insn->vex_prefix.nbytes = 4; ++ insn->next_byte += 4; ++ if (insn->x86_64 && X86_VEX_W(b2)) ++ /* VEX.W overrides opnd_size */ ++ insn->opnd_bytes = 8; ++ } else if (inat_is_vex3_prefix(attr)) { ++ b2 = peek_nbyte_next(insn_byte_t, insn, 2); ++ insn->vex_prefix.bytes[2] = b2; ++ insn->vex_prefix.nbytes = 3; ++ insn->next_byte += 3; ++ if (insn->x86_64 && X86_VEX_W(b2)) ++ /* VEX.W overrides opnd_size */ ++ insn->opnd_bytes = 8; ++ } else { ++ /* ++ * For VEX2, fake VEX3-like byte#2. ++ * Makes it easier to decode vex.W, vex.vvvv, ++ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. ++ */ ++ insn->vex_prefix.bytes[2] = b2 & 0x7f; ++ insn->vex_prefix.nbytes = 2; ++ insn->next_byte += 2; ++ } ++ } ++vex_end: ++ insn->vex_prefix.got = 1; ++ ++ prefixes->got = 1; ++ ++err_out: ++ return; ++} ++ ++/** ++ * insn_get_opcode - collect opcode(s) ++ * @insn: &struct insn containing instruction ++ * ++ * Populates @insn->opcode, updates @insn->next_byte to point past the ++ * opcode byte(s), and set @insn->attr (except for groups). ++ * If necessary, first collects any preceding (prefix) bytes. ++ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got ++ * is already 1. ++ */ ++void insn_get_opcode(struct insn *insn) ++{ ++ struct insn_field *opcode = &insn->opcode; ++ insn_byte_t op; ++ int pfx_id; ++ if (opcode->got) ++ return; ++ if (!insn->prefixes.got) ++ insn_get_prefixes(insn); ++ ++ /* Get first opcode */ ++ op = get_next(insn_byte_t, insn); ++ opcode->bytes[0] = op; ++ opcode->nbytes = 1; ++ ++ /* Check if there is VEX prefix or not */ ++ if (insn_is_avx(insn)) { ++ insn_byte_t m, p; ++ m = insn_vex_m_bits(insn); ++ p = insn_vex_p_bits(insn); ++ insn->attr = inat_get_avx_attribute(op, m, p); ++ if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || ++ (!inat_accept_vex(insn->attr) && ++ !inat_is_group(insn->attr))) ++ insn->attr = 0; /* This instruction is bad */ ++ goto end; /* VEX has only 1 byte for opcode */ ++ } ++ ++ insn->attr = inat_get_opcode_attribute(op); ++ while (inat_is_escape(insn->attr)) { ++ /* Get escaped opcode */ ++ op = get_next(insn_byte_t, insn); ++ opcode->bytes[opcode->nbytes++] = op; ++ pfx_id = insn_last_prefix_id(insn); ++ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); ++ } ++ if (inat_must_vex(insn->attr)) ++ insn->attr = 0; /* This instruction is bad */ ++end: ++ opcode->got = 1; ++ ++err_out: ++ return; ++} ++ ++/** ++ * insn_get_modrm - collect ModRM byte, if any ++ * @insn: &struct insn containing instruction ++ * ++ * Populates @insn->modrm and updates @insn->next_byte to point past the ++ * ModRM byte, if any. If necessary, first collects the preceding bytes ++ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. ++ */ ++void insn_get_modrm(struct insn *insn) ++{ ++ struct insn_field *modrm = &insn->modrm; ++ insn_byte_t pfx_id, mod; ++ if (modrm->got) ++ return; ++ if (!insn->opcode.got) ++ insn_get_opcode(insn); ++ ++ if (inat_has_modrm(insn->attr)) { ++ mod = get_next(insn_byte_t, insn); ++ modrm->value = mod; ++ modrm->nbytes = 1; ++ if (inat_is_group(insn->attr)) { ++ pfx_id = insn_last_prefix_id(insn); ++ insn->attr = inat_get_group_attribute(mod, pfx_id, ++ insn->attr); ++ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) ++ insn->attr = 0; /* This is bad */ ++ } ++ } ++ ++ if (insn->x86_64 && inat_is_force64(insn->attr)) ++ insn->opnd_bytes = 8; ++ modrm->got = 1; ++ ++err_out: ++ return; ++} ++ ++ ++/** ++ * insn_rip_relative() - Does instruction use RIP-relative addressing mode? ++ * @insn: &struct insn containing instruction ++ * ++ * If necessary, first collects the instruction up to and including the ++ * ModRM byte. No effect if @insn->x86_64 is 0. ++ */ ++int insn_rip_relative(struct insn *insn) ++{ ++ struct insn_field *modrm = &insn->modrm; ++ ++ if (!insn->x86_64) ++ return 0; ++ if (!modrm->got) ++ insn_get_modrm(insn); ++ /* ++ * For rip-relative instructions, the mod field (top 2 bits) ++ * is zero and the r/m field (bottom 3 bits) is 0x5. ++ */ ++ return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); ++} ++ ++/** ++ * insn_get_sib() - Get the SIB byte of instruction ++ * @insn: &struct insn containing instruction ++ * ++ * If necessary, first collects the instruction up to and including the ++ * ModRM byte. ++ */ ++void insn_get_sib(struct insn *insn) ++{ ++ insn_byte_t modrm; ++ ++ if (insn->sib.got) ++ return; ++ if (!insn->modrm.got) ++ insn_get_modrm(insn); ++ if (insn->modrm.nbytes) { ++ modrm = (insn_byte_t)insn->modrm.value; ++ if (insn->addr_bytes != 2 && ++ X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { ++ insn->sib.value = get_next(insn_byte_t, insn); ++ insn->sib.nbytes = 1; ++ } ++ } ++ insn->sib.got = 1; ++ ++err_out: ++ return; ++} ++ ++ ++/** ++ * insn_get_displacement() - Get the displacement of instruction ++ * @insn: &struct insn containing instruction ++ * ++ * If necessary, first collects the instruction up to and including the ++ * SIB byte. ++ * Displacement value is sign-expanded. ++ */ ++void insn_get_displacement(struct insn *insn) ++{ ++ insn_byte_t mod, rm, base; ++ ++ if (insn->displacement.got) ++ return; ++ if (!insn->sib.got) ++ insn_get_sib(insn); ++ if (insn->modrm.nbytes) { ++ /* ++ * Interpreting the modrm byte: ++ * mod = 00 - no displacement fields (exceptions below) ++ * mod = 01 - 1-byte displacement field ++ * mod = 10 - displacement field is 4 bytes, or 2 bytes if ++ * address size = 2 (0x67 prefix in 32-bit mode) ++ * mod = 11 - no memory operand ++ * ++ * If address size = 2... ++ * mod = 00, r/m = 110 - displacement field is 2 bytes ++ * ++ * If address size != 2... ++ * mod != 11, r/m = 100 - SIB byte exists ++ * mod = 00, SIB base = 101 - displacement field is 4 bytes ++ * mod = 00, r/m = 101 - rip-relative addressing, displacement ++ * field is 4 bytes ++ */ ++ mod = X86_MODRM_MOD(insn->modrm.value); ++ rm = X86_MODRM_RM(insn->modrm.value); ++ base = X86_SIB_BASE(insn->sib.value); ++ if (mod == 3) ++ goto out; ++ if (mod == 1) { ++ insn->displacement.value = get_next(signed char, insn); ++ insn->displacement.nbytes = 1; ++ } else if (insn->addr_bytes == 2) { ++ if ((mod == 0 && rm == 6) || mod == 2) { ++ insn->displacement.value = ++ get_next(short, insn); ++ insn->displacement.nbytes = 2; ++ } ++ } else { ++ if ((mod == 0 && rm == 5) || mod == 2 || ++ (mod == 0 && base == 5)) { ++ insn->displacement.value = get_next(int, insn); ++ insn->displacement.nbytes = 4; ++ } ++ } ++ } ++out: ++ insn->displacement.got = 1; ++ ++err_out: ++ return; ++} ++ ++/* Decode moffset16/32/64. Return 0 if failed */ ++static int __get_moffset(struct insn *insn) ++{ ++ switch (insn->addr_bytes) { ++ case 2: ++ insn->moffset1.value = get_next(short, insn); ++ insn->moffset1.nbytes = 2; ++ break; ++ case 4: ++ insn->moffset1.value = get_next(int, insn); ++ insn->moffset1.nbytes = 4; ++ break; ++ case 8: ++ insn->moffset1.value = get_next(int, insn); ++ insn->moffset1.nbytes = 4; ++ insn->moffset2.value = get_next(int, insn); ++ insn->moffset2.nbytes = 4; ++ break; ++ default: /* opnd_bytes must be modified manually */ ++ goto err_out; ++ } ++ insn->moffset1.got = insn->moffset2.got = 1; ++ ++ return 1; ++ ++err_out: ++ return 0; ++} ++ ++/* Decode imm v32(Iz). Return 0 if failed */ ++static int __get_immv32(struct insn *insn) ++{ ++ switch (insn->opnd_bytes) { ++ case 2: ++ insn->immediate.value = get_next(short, insn); ++ insn->immediate.nbytes = 2; ++ break; ++ case 4: ++ case 8: ++ insn->immediate.value = get_next(int, insn); ++ insn->immediate.nbytes = 4; ++ break; ++ default: /* opnd_bytes must be modified manually */ ++ goto err_out; ++ } ++ ++ return 1; ++ ++err_out: ++ return 0; ++} ++ ++/* Decode imm v64(Iv/Ov), Return 0 if failed */ ++static int __get_immv(struct insn *insn) ++{ ++ switch (insn->opnd_bytes) { ++ case 2: ++ insn->immediate1.value = get_next(short, insn); ++ insn->immediate1.nbytes = 2; ++ break; ++ case 4: ++ insn->immediate1.value = get_next(int, insn); ++ insn->immediate1.nbytes = 4; ++ break; ++ case 8: ++ insn->immediate1.value = get_next(int, insn); ++ insn->immediate1.nbytes = 4; ++ insn->immediate2.value = get_next(int, insn); ++ insn->immediate2.nbytes = 4; ++ break; ++ default: /* opnd_bytes must be modified manually */ ++ goto err_out; ++ } ++ insn->immediate1.got = insn->immediate2.got = 1; ++ ++ return 1; ++err_out: ++ return 0; ++} ++ ++/* Decode ptr16:16/32(Ap) */ ++static int __get_immptr(struct insn *insn) ++{ ++ switch (insn->opnd_bytes) { ++ case 2: ++ insn->immediate1.value = get_next(short, insn); ++ insn->immediate1.nbytes = 2; ++ break; ++ case 4: ++ insn->immediate1.value = get_next(int, insn); ++ insn->immediate1.nbytes = 4; ++ break; ++ case 8: ++ /* ptr16:64 is not exist (no segment) */ ++ return 0; ++ default: /* opnd_bytes must be modified manually */ ++ goto err_out; ++ } ++ insn->immediate2.value = get_next(unsigned short, insn); ++ insn->immediate2.nbytes = 2; ++ insn->immediate1.got = insn->immediate2.got = 1; ++ ++ return 1; ++err_out: ++ return 0; ++} ++ ++/** ++ * insn_get_immediate() - Get the immediates of instruction ++ * @insn: &struct insn containing instruction ++ * ++ * If necessary, first collects the instruction up to and including the ++ * displacement bytes. ++ * Basically, most of immediates are sign-expanded. Unsigned-value can be ++ * get by bit masking with ((1 << (nbytes * 8)) - 1) ++ */ ++void insn_get_immediate(struct insn *insn) ++{ ++ if (insn->immediate.got) ++ return; ++ if (!insn->displacement.got) ++ insn_get_displacement(insn); ++ ++ if (inat_has_moffset(insn->attr)) { ++ if (!__get_moffset(insn)) ++ goto err_out; ++ goto done; ++ } ++ ++ if (!inat_has_immediate(insn->attr)) ++ /* no immediates */ ++ goto done; ++ ++ switch (inat_immediate_size(insn->attr)) { ++ case INAT_IMM_BYTE: ++ insn->immediate.value = get_next(signed char, insn); ++ insn->immediate.nbytes = 1; ++ break; ++ case INAT_IMM_WORD: ++ insn->immediate.value = get_next(short, insn); ++ insn->immediate.nbytes = 2; ++ break; ++ case INAT_IMM_DWORD: ++ insn->immediate.value = get_next(int, insn); ++ insn->immediate.nbytes = 4; ++ break; ++ case INAT_IMM_QWORD: ++ insn->immediate1.value = get_next(int, insn); ++ insn->immediate1.nbytes = 4; ++ insn->immediate2.value = get_next(int, insn); ++ insn->immediate2.nbytes = 4; ++ break; ++ case INAT_IMM_PTR: ++ if (!__get_immptr(insn)) ++ goto err_out; ++ break; ++ case INAT_IMM_VWORD32: ++ if (!__get_immv32(insn)) ++ goto err_out; ++ break; ++ case INAT_IMM_VWORD: ++ if (!__get_immv(insn)) ++ goto err_out; ++ break; ++ default: ++ /* Here, insn must have an immediate, but failed */ ++ goto err_out; ++ } ++ if (inat_has_second_immediate(insn->attr)) { ++ insn->immediate2.value = get_next(signed char, insn); ++ insn->immediate2.nbytes = 1; ++ } ++done: ++ insn->immediate.got = 1; ++ ++err_out: ++ return; ++} ++ ++/** ++ * insn_get_length() - Get the length of instruction ++ * @insn: &struct insn containing instruction ++ * ++ * If necessary, first collects the instruction up to and including the ++ * immediates bytes. ++ */ ++void insn_get_length(struct insn *insn) ++{ ++ if (insn->length) ++ return; ++ if (!insn->immediate.got) ++ insn_get_immediate(insn); ++ insn->length = (unsigned char)((unsigned long)insn->next_byte ++ - (unsigned long)insn->kaddr); ++} +diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt +new file mode 100644 +index 0000000..aa2270d +--- /dev/null ++++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt +@@ -0,0 +1,1072 @@ ++# x86 Opcode Maps ++# ++# This is (mostly) based on following documentations. ++# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C ++# (#326018-047US, June 2013) ++# ++#<Opcode maps> ++# Table: table-name ++# Referrer: escaped-name ++# AVXcode: avx-code ++# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] ++# (or) ++# opcode: escape # escaped-name ++# EndTable ++# ++# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix ++# mnemonics that begin with lowercase 'k' accept a VEX prefix ++# ++#<group maps> ++# GrpTable: GrpXXX ++# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] ++# EndTable ++# ++# AVX Superscripts ++# (ev): this opcode requires EVEX prefix. ++# (evo): this opcode is changed by EVEX prefix (EVEX opcode) ++# (v): this opcode requires VEX prefix. ++# (v1): this opcode only supports 128bit VEX. ++# ++# Last Prefix Superscripts ++# - (66): the last prefix is 0x66 ++# - (F3): the last prefix is 0xF3 ++# - (F2): the last prefix is 0xF2 ++# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) ++# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. ++ ++Table: one byte opcode ++Referrer: ++AVXcode: ++# 0x00 - 0x0f ++00: ADD Eb,Gb ++01: ADD Ev,Gv ++02: ADD Gb,Eb ++03: ADD Gv,Ev ++04: ADD AL,Ib ++05: ADD rAX,Iz ++06: PUSH ES (i64) ++07: POP ES (i64) ++08: OR Eb,Gb ++09: OR Ev,Gv ++0a: OR Gb,Eb ++0b: OR Gv,Ev ++0c: OR AL,Ib ++0d: OR rAX,Iz ++0e: PUSH CS (i64) ++0f: escape # 2-byte escape ++# 0x10 - 0x1f ++10: ADC Eb,Gb ++11: ADC Ev,Gv ++12: ADC Gb,Eb ++13: ADC Gv,Ev ++14: ADC AL,Ib ++15: ADC rAX,Iz ++16: PUSH SS (i64) ++17: POP SS (i64) ++18: SBB Eb,Gb ++19: SBB Ev,Gv ++1a: SBB Gb,Eb ++1b: SBB Gv,Ev ++1c: SBB AL,Ib ++1d: SBB rAX,Iz ++1e: PUSH DS (i64) ++1f: POP DS (i64) ++# 0x20 - 0x2f ++20: AND Eb,Gb ++21: AND Ev,Gv ++22: AND Gb,Eb ++23: AND Gv,Ev ++24: AND AL,Ib ++25: AND rAx,Iz ++26: SEG=ES (Prefix) ++27: DAA (i64) ++28: SUB Eb,Gb ++29: SUB Ev,Gv ++2a: SUB Gb,Eb ++2b: SUB Gv,Ev ++2c: SUB AL,Ib ++2d: SUB rAX,Iz ++2e: SEG=CS (Prefix) ++2f: DAS (i64) ++# 0x30 - 0x3f ++30: XOR Eb,Gb ++31: XOR Ev,Gv ++32: XOR Gb,Eb ++33: XOR Gv,Ev ++34: XOR AL,Ib ++35: XOR rAX,Iz ++36: SEG=SS (Prefix) ++37: AAA (i64) ++38: CMP Eb,Gb ++39: CMP Ev,Gv ++3a: CMP Gb,Eb ++3b: CMP Gv,Ev ++3c: CMP AL,Ib ++3d: CMP rAX,Iz ++3e: SEG=DS (Prefix) ++3f: AAS (i64) ++# 0x40 - 0x4f ++40: INC eAX (i64) | REX (o64) ++41: INC eCX (i64) | REX.B (o64) ++42: INC eDX (i64) | REX.X (o64) ++43: INC eBX (i64) | REX.XB (o64) ++44: INC eSP (i64) | REX.R (o64) ++45: INC eBP (i64) | REX.RB (o64) ++46: INC eSI (i64) | REX.RX (o64) ++47: INC eDI (i64) | REX.RXB (o64) ++48: DEC eAX (i64) | REX.W (o64) ++49: DEC eCX (i64) | REX.WB (o64) ++4a: DEC eDX (i64) | REX.WX (o64) ++4b: DEC eBX (i64) | REX.WXB (o64) ++4c: DEC eSP (i64) | REX.WR (o64) ++4d: DEC eBP (i64) | REX.WRB (o64) ++4e: DEC eSI (i64) | REX.WRX (o64) ++4f: DEC eDI (i64) | REX.WRXB (o64) ++# 0x50 - 0x5f ++50: PUSH rAX/r8 (d64) ++51: PUSH rCX/r9 (d64) ++52: PUSH rDX/r10 (d64) ++53: PUSH rBX/r11 (d64) ++54: PUSH rSP/r12 (d64) ++55: PUSH rBP/r13 (d64) ++56: PUSH rSI/r14 (d64) ++57: PUSH rDI/r15 (d64) ++58: POP rAX/r8 (d64) ++59: POP rCX/r9 (d64) ++5a: POP rDX/r10 (d64) ++5b: POP rBX/r11 (d64) ++5c: POP rSP/r12 (d64) ++5d: POP rBP/r13 (d64) ++5e: POP rSI/r14 (d64) ++5f: POP rDI/r15 (d64) ++# 0x60 - 0x6f ++60: PUSHA/PUSHAD (i64) ++61: POPA/POPAD (i64) ++62: BOUND Gv,Ma (i64) | EVEX (Prefix) ++63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) ++64: SEG=FS (Prefix) ++65: SEG=GS (Prefix) ++66: Operand-Size (Prefix) ++67: Address-Size (Prefix) ++68: PUSH Iz (d64) ++69: IMUL Gv,Ev,Iz ++6a: PUSH Ib (d64) ++6b: IMUL Gv,Ev,Ib ++6c: INS/INSB Yb,DX ++6d: INS/INSW/INSD Yz,DX ++6e: OUTS/OUTSB DX,Xb ++6f: OUTS/OUTSW/OUTSD DX,Xz ++# 0x70 - 0x7f ++70: JO Jb ++71: JNO Jb ++72: JB/JNAE/JC Jb ++73: JNB/JAE/JNC Jb ++74: JZ/JE Jb ++75: JNZ/JNE Jb ++76: JBE/JNA Jb ++77: JNBE/JA Jb ++78: JS Jb ++79: JNS Jb ++7a: JP/JPE Jb ++7b: JNP/JPO Jb ++7c: JL/JNGE Jb ++7d: JNL/JGE Jb ++7e: JLE/JNG Jb ++7f: JNLE/JG Jb ++# 0x80 - 0x8f ++80: Grp1 Eb,Ib (1A) ++81: Grp1 Ev,Iz (1A) ++82: Grp1 Eb,Ib (1A),(i64) ++83: Grp1 Ev,Ib (1A) ++84: TEST Eb,Gb ++85: TEST Ev,Gv ++86: XCHG Eb,Gb ++87: XCHG Ev,Gv ++88: MOV Eb,Gb ++89: MOV Ev,Gv ++8a: MOV Gb,Eb ++8b: MOV Gv,Ev ++8c: MOV Ev,Sw ++8d: LEA Gv,M ++8e: MOV Sw,Ew ++8f: Grp1A (1A) | POP Ev (d64) ++# 0x90 - 0x9f ++90: NOP | PAUSE (F3) | XCHG r8,rAX ++91: XCHG rCX/r9,rAX ++92: XCHG rDX/r10,rAX ++93: XCHG rBX/r11,rAX ++94: XCHG rSP/r12,rAX ++95: XCHG rBP/r13,rAX ++96: XCHG rSI/r14,rAX ++97: XCHG rDI/r15,rAX ++98: CBW/CWDE/CDQE ++99: CWD/CDQ/CQO ++9a: CALLF Ap (i64) ++9b: FWAIT/WAIT ++9c: PUSHF/D/Q Fv (d64) ++9d: POPF/D/Q Fv (d64) ++9e: SAHF ++9f: LAHF ++# 0xa0 - 0xaf ++a0: MOV AL,Ob ++a1: MOV rAX,Ov ++a2: MOV Ob,AL ++a3: MOV Ov,rAX ++a4: MOVS/B Yb,Xb ++a5: MOVS/W/D/Q Yv,Xv ++a6: CMPS/B Xb,Yb ++a7: CMPS/W/D Xv,Yv ++a8: TEST AL,Ib ++a9: TEST rAX,Iz ++aa: STOS/B Yb,AL ++ab: STOS/W/D/Q Yv,rAX ++ac: LODS/B AL,Xb ++ad: LODS/W/D/Q rAX,Xv ++ae: SCAS/B AL,Yb ++# Note: The May 2011 Intel manual shows Xv for the second parameter of the ++# next instruction but Yv is correct ++af: SCAS/W/D/Q rAX,Yv ++# 0xb0 - 0xbf ++b0: MOV AL/R8L,Ib ++b1: MOV CL/R9L,Ib ++b2: MOV DL/R10L,Ib ++b3: MOV BL/R11L,Ib ++b4: MOV AH/R12L,Ib ++b5: MOV CH/R13L,Ib ++b6: MOV DH/R14L,Ib ++b7: MOV BH/R15L,Ib ++b8: MOV rAX/r8,Iv ++b9: MOV rCX/r9,Iv ++ba: MOV rDX/r10,Iv ++bb: MOV rBX/r11,Iv ++bc: MOV rSP/r12,Iv ++bd: MOV rBP/r13,Iv ++be: MOV rSI/r14,Iv ++bf: MOV rDI/r15,Iv ++# 0xc0 - 0xcf ++c0: Grp2 Eb,Ib (1A) ++c1: Grp2 Ev,Ib (1A) ++c2: RETN Iw (f64) ++c3: RETN ++c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) ++c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) ++c6: Grp11A Eb,Ib (1A) ++c7: Grp11B Ev,Iz (1A) ++c8: ENTER Iw,Ib ++c9: LEAVE (d64) ++ca: RETF Iw ++cb: RETF ++cc: INT3 ++cd: INT Ib ++ce: INTO (i64) ++cf: IRET/D/Q ++# 0xd0 - 0xdf ++d0: Grp2 Eb,1 (1A) ++d1: Grp2 Ev,1 (1A) ++d2: Grp2 Eb,CL (1A) ++d3: Grp2 Ev,CL (1A) ++d4: AAM Ib (i64) ++d5: AAD Ib (i64) ++d6: ++d7: XLAT/XLATB ++d8: ESC ++d9: ESC ++da: ESC ++db: ESC ++dc: ESC ++dd: ESC ++de: ESC ++df: ESC ++# 0xe0 - 0xef ++# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix ++# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation ++# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. ++e0: LOOPNE/LOOPNZ Jb (f64) ++e1: LOOPE/LOOPZ Jb (f64) ++e2: LOOP Jb (f64) ++e3: JrCXZ Jb (f64) ++e4: IN AL,Ib ++e5: IN eAX,Ib ++e6: OUT Ib,AL ++e7: OUT Ib,eAX ++# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset ++# in "near" jumps and calls is 16-bit. For CALL, ++# push of return address is 16-bit wide, RSP is decremented by 2 ++# but is not truncated to 16 bits, unlike RIP. ++e8: CALL Jz (f64) ++e9: JMP-near Jz (f64) ++ea: JMP-far Ap (i64) ++eb: JMP-short Jb (f64) ++ec: IN AL,DX ++ed: IN eAX,DX ++ee: OUT DX,AL ++ef: OUT DX,eAX ++# 0xf0 - 0xff ++f0: LOCK (Prefix) ++f1: ++f2: REPNE (Prefix) | XACQUIRE (Prefix) ++f3: REP/REPE (Prefix) | XRELEASE (Prefix) ++f4: HLT ++f5: CMC ++f6: Grp3_1 Eb (1A) ++f7: Grp3_2 Ev (1A) ++f8: CLC ++f9: STC ++fa: CLI ++fb: STI ++fc: CLD ++fd: STD ++fe: Grp4 (1A) ++ff: Grp5 (1A) ++EndTable ++ ++Table: 2-byte opcode (0x0f) ++Referrer: 2-byte escape ++AVXcode: 1 ++# 0x0f 0x00-0x0f ++00: Grp6 (1A) ++01: Grp7 (1A) ++02: LAR Gv,Ew ++03: LSL Gv,Ew ++04: ++05: SYSCALL (o64) ++06: CLTS ++07: SYSRET (o64) ++08: INVD ++09: WBINVD ++0a: ++0b: UD2 (1B) ++0c: ++# AMD's prefetch group. Intel supports prefetchw(/1) only. ++0d: GrpP ++0e: FEMMS ++# 3DNow! uses the last imm byte as opcode extension. ++0f: 3DNow! Pq,Qq,Ib ++# 0x0f 0x10-0x1f ++# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands ++# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. ++# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. ++# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming ++# Reference A.1 ++10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) ++11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) ++12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) ++13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) ++14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) ++15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) ++16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) ++17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) ++18: Grp16 (1A) ++19: ++# Intel SDM opcode map does not list MPX instructions. For now using Gv for ++# bnd registers and Ev for everything else is OK because the instruction ++# decoder does not use the information except as an indication that there is ++# a ModR/M byte. ++1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev ++1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv ++1c: ++1d: ++1e: ++1f: NOP Ev ++# 0x0f 0x20-0x2f ++20: MOV Rd,Cd ++21: MOV Rd,Dd ++22: MOV Cd,Rd ++23: MOV Dd,Rd ++24: ++25: ++26: ++27: ++28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) ++29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) ++2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) ++2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) ++2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) ++2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) ++2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) ++2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) ++# 0x0f 0x30-0x3f ++30: WRMSR ++31: RDTSC ++32: RDMSR ++33: RDPMC ++34: SYSENTER ++35: SYSEXIT ++36: ++37: GETSEC ++38: escape # 3-byte escape 1 ++39: ++3a: escape # 3-byte escape 2 ++3b: ++3c: ++3d: ++3e: ++3f: ++# 0x0f 0x40-0x4f ++40: CMOVO Gv,Ev ++41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) ++42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) ++43: CMOVAE/NB/NC Gv,Ev ++44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) ++45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) ++46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) ++47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) ++48: CMOVS Gv,Ev ++49: CMOVNS Gv,Ev ++4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) ++4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk ++4c: CMOVL/NGE Gv,Ev ++4d: CMOVNL/GE Gv,Ev ++4e: CMOVLE/NG Gv,Ev ++4f: CMOVNLE/G Gv,Ev ++# 0x0f 0x50-0x5f ++50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) ++51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) ++52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) ++53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) ++54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) ++55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) ++56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) ++57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) ++58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) ++59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) ++5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) ++5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) ++5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) ++5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) ++5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) ++5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) ++# 0x0f 0x60-0x6f ++60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) ++61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) ++62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) ++63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) ++64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) ++65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) ++66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) ++67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) ++68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) ++69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) ++6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) ++6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) ++6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) ++6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) ++6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) ++6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) ++# 0x0f 0x70-0x7f ++70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) ++71: Grp12 (1A) ++72: Grp13 (1A) ++73: Grp14 (1A) ++74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) ++75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) ++76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) ++# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. ++77: emms | vzeroupper | vzeroall ++78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) ++79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) ++7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) ++7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) ++7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) ++7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) ++7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) ++7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) ++# 0x0f 0x80-0x8f ++# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). ++80: JO Jz (f64) ++81: JNO Jz (f64) ++82: JB/JC/JNAE Jz (f64) ++83: JAE/JNB/JNC Jz (f64) ++84: JE/JZ Jz (f64) ++85: JNE/JNZ Jz (f64) ++86: JBE/JNA Jz (f64) ++87: JA/JNBE Jz (f64) ++88: JS Jz (f64) ++89: JNS Jz (f64) ++8a: JP/JPE Jz (f64) ++8b: JNP/JPO Jz (f64) ++8c: JL/JNGE Jz (f64) ++8d: JNL/JGE Jz (f64) ++8e: JLE/JNG Jz (f64) ++8f: JNLE/JG Jz (f64) ++# 0x0f 0x90-0x9f ++90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) ++91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) ++92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) ++93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) ++94: SETE/Z Eb ++95: SETNE/NZ Eb ++96: SETBE/NA Eb ++97: SETA/NBE Eb ++98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) ++99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) ++9a: SETP/PE Eb ++9b: SETNP/PO Eb ++9c: SETL/NGE Eb ++9d: SETNL/GE Eb ++9e: SETLE/NG Eb ++9f: SETNLE/G Eb ++# 0x0f 0xa0-0xaf ++a0: PUSH FS (d64) ++a1: POP FS (d64) ++a2: CPUID ++a3: BT Ev,Gv ++a4: SHLD Ev,Gv,Ib ++a5: SHLD Ev,Gv,CL ++a6: GrpPDLK ++a7: GrpRNG ++a8: PUSH GS (d64) ++a9: POP GS (d64) ++aa: RSM ++ab: BTS Ev,Gv ++ac: SHRD Ev,Gv,Ib ++ad: SHRD Ev,Gv,CL ++ae: Grp15 (1A),(1C) ++af: IMUL Gv,Ev ++# 0x0f 0xb0-0xbf ++b0: CMPXCHG Eb,Gb ++b1: CMPXCHG Ev,Gv ++b2: LSS Gv,Mp ++b3: BTR Ev,Gv ++b4: LFS Gv,Mp ++b5: LGS Gv,Mp ++b6: MOVZX Gv,Eb ++b7: MOVZX Gv,Ew ++b8: JMPE (!F3) | POPCNT Gv,Ev (F3) ++b9: Grp10 (1A) ++ba: Grp8 Ev,Ib (1A) ++bb: BTC Ev,Gv ++bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) ++bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) ++be: MOVSX Gv,Eb ++bf: MOVSX Gv,Ew ++# 0x0f 0xc0-0xcf ++c0: XADD Eb,Gb ++c1: XADD Ev,Gv ++c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) ++c3: movnti My,Gy ++c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) ++c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) ++c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) ++c7: Grp9 (1A) ++c8: BSWAP RAX/EAX/R8/R8D ++c9: BSWAP RCX/ECX/R9/R9D ++ca: BSWAP RDX/EDX/R10/R10D ++cb: BSWAP RBX/EBX/R11/R11D ++cc: BSWAP RSP/ESP/R12/R12D ++cd: BSWAP RBP/EBP/R13/R13D ++ce: BSWAP RSI/ESI/R14/R14D ++cf: BSWAP RDI/EDI/R15/R15D ++# 0x0f 0xd0-0xdf ++d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) ++d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) ++d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) ++d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) ++d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) ++d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) ++d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) ++d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) ++d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) ++d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) ++da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) ++db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) ++dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) ++dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) ++de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) ++df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) ++# 0x0f 0xe0-0xef ++e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) ++e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) ++e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) ++e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) ++e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) ++e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) ++e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) ++e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) ++e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) ++e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) ++ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) ++eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) ++ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) ++ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) ++ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) ++ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) ++# 0x0f 0xf0-0xff ++f0: vlddqu Vx,Mx (F2) ++f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) ++f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) ++f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) ++f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) ++f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) ++f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) ++f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) ++f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) ++f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) ++fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) ++fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) ++fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) ++fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) ++fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) ++ff: UD0 ++EndTable ++ ++Table: 3-byte opcode 1 (0x0f 0x38) ++Referrer: 3-byte escape 1 ++AVXcode: 2 ++# 0x0f 0x38 0x00-0x0f ++00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) ++01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) ++02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) ++03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) ++04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) ++05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) ++06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) ++07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) ++08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) ++09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) ++0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) ++0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) ++0c: vpermilps Vx,Hx,Wx (66),(v) ++0d: vpermilpd Vx,Hx,Wx (66),(v) ++0e: vtestps Vx,Wx (66),(v) ++0f: vtestpd Vx,Wx (66),(v) ++# 0x0f 0x38 0x10-0x1f ++10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) ++11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) ++12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) ++13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) ++14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) ++15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) ++16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) ++17: vptest Vx,Wx (66) ++18: vbroadcastss Vx,Wd (66),(v) ++19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) ++1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) ++1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) ++1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) ++1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) ++1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) ++1f: vpabsq Vx,Wx (66),(ev) ++# 0x0f 0x38 0x20-0x2f ++20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) ++21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) ++22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) ++23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) ++24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) ++25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) ++26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) ++27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) ++28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) ++29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) ++2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) ++2b: vpackusdw Vx,Hx,Wx (66),(v1) ++2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) ++2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) ++2e: vmaskmovps Mx,Hx,Vx (66),(v) ++2f: vmaskmovpd Mx,Hx,Vx (66),(v) ++# 0x0f 0x38 0x30-0x3f ++30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) ++31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) ++32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) ++33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) ++34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) ++35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) ++36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) ++37: vpcmpgtq Vx,Hx,Wx (66),(v1) ++38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) ++39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) ++3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) ++3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) ++3c: vpmaxsb Vx,Hx,Wx (66),(v1) ++3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) ++3e: vpmaxuw Vx,Hx,Wx (66),(v1) ++3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) ++# 0x0f 0x38 0x40-0x8f ++40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) ++41: vphminposuw Vdq,Wdq (66),(v1) ++42: vgetexpps/d Vx,Wx (66),(ev) ++43: vgetexpss/d Vx,Hx,Wx (66),(ev) ++44: vplzcntd/q Vx,Wx (66),(ev) ++45: vpsrlvd/q Vx,Hx,Wx (66),(v) ++46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) ++47: vpsllvd/q Vx,Hx,Wx (66),(v) ++# Skip 0x48-0x4b ++4c: vrcp14ps/d Vpd,Wpd (66),(ev) ++4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) ++4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) ++4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) ++# Skip 0x50-0x57 ++58: vpbroadcastd Vx,Wx (66),(v) ++59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) ++5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) ++5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) ++# Skip 0x5c-0x63 ++64: vpblendmd/q Vx,Hx,Wx (66),(ev) ++65: vblendmps/d Vx,Hx,Wx (66),(ev) ++66: vpblendmb/w Vx,Hx,Wx (66),(ev) ++# Skip 0x67-0x74 ++75: vpermi2b/w Vx,Hx,Wx (66),(ev) ++76: vpermi2d/q Vx,Hx,Wx (66),(ev) ++77: vpermi2ps/d Vx,Hx,Wx (66),(ev) ++78: vpbroadcastb Vx,Wx (66),(v) ++79: vpbroadcastw Vx,Wx (66),(v) ++7a: vpbroadcastb Vx,Rv (66),(ev) ++7b: vpbroadcastw Vx,Rv (66),(ev) ++7c: vpbroadcastd/q Vx,Rv (66),(ev) ++7d: vpermt2b/w Vx,Hx,Wx (66),(ev) ++7e: vpermt2d/q Vx,Hx,Wx (66),(ev) ++7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) ++80: INVEPT Gy,Mdq (66) ++81: INVVPID Gy,Mdq (66) ++82: INVPCID Gy,Mdq (66) ++83: vpmultishiftqb Vx,Hx,Wx (66),(ev) ++88: vexpandps/d Vpd,Wpd (66),(ev) ++89: vpexpandd/q Vx,Wx (66),(ev) ++8a: vcompressps/d Wx,Vx (66),(ev) ++8b: vpcompressd/q Wx,Vx (66),(ev) ++8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) ++8d: vpermb/w Vx,Hx,Wx (66),(ev) ++8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) ++# 0x0f 0x38 0x90-0xbf (FMA) ++90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) ++91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) ++92: vgatherdps/d Vx,Hx,Wx (66),(v) ++93: vgatherqps/d Vx,Hx,Wx (66),(v) ++94: ++95: ++96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) ++97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) ++98: vfmadd132ps/d Vx,Hx,Wx (66),(v) ++99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) ++9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) ++9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) ++9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) ++9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) ++9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) ++9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) ++a0: vpscatterdd/q Wx,Vx (66),(ev) ++a1: vpscatterqd/q Wx,Vx (66),(ev) ++a2: vscatterdps/d Wx,Vx (66),(ev) ++a3: vscatterqps/d Wx,Vx (66),(ev) ++a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) ++a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) ++a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) ++a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) ++aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) ++ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) ++ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) ++ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) ++ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) ++af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) ++b4: vpmadd52luq Vx,Hx,Wx (66),(ev) ++b5: vpmadd52huq Vx,Hx,Wx (66),(ev) ++b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) ++b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) ++b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) ++b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) ++ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) ++bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) ++bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) ++bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) ++be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) ++bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) ++# 0x0f 0x38 0xc0-0xff ++c4: vpconflictd/q Vx,Wx (66),(ev) ++c6: Grp18 (1A) ++c7: Grp19 (1A) ++c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) ++c9: sha1msg1 Vdq,Wdq ++ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) ++cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) ++cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) ++cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) ++db: VAESIMC Vdq,Wdq (66),(v1) ++dc: VAESENC Vdq,Hdq,Wdq (66),(v1) ++dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) ++de: VAESDEC Vdq,Hdq,Wdq (66),(v1) ++df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) ++f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) ++f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) ++f2: ANDN Gy,By,Ey (v) ++f3: Grp17 (1A) ++f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) ++f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) ++f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) ++EndTable ++ ++Table: 3-byte opcode 2 (0x0f 0x3a) ++Referrer: 3-byte escape 2 ++AVXcode: 3 ++# 0x0f 0x3a 0x00-0xff ++00: vpermq Vqq,Wqq,Ib (66),(v) ++01: vpermpd Vqq,Wqq,Ib (66),(v) ++02: vpblendd Vx,Hx,Wx,Ib (66),(v) ++03: valignd/q Vx,Hx,Wx,Ib (66),(ev) ++04: vpermilps Vx,Wx,Ib (66),(v) ++05: vpermilpd Vx,Wx,Ib (66),(v) ++06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) ++07: ++08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) ++09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) ++0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) ++0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) ++0c: vblendps Vx,Hx,Wx,Ib (66) ++0d: vblendpd Vx,Hx,Wx,Ib (66) ++0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) ++0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) ++14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) ++15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) ++16: vpextrd/q Ey,Vdq,Ib (66),(v1) ++17: vextractps Ed,Vdq,Ib (66),(v1) ++18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) ++19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) ++1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) ++1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) ++1d: vcvtps2ph Wx,Vx,Ib (66),(v) ++1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) ++1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) ++20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) ++21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) ++22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) ++23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) ++25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) ++26: vgetmantps/d Vx,Wx,Ib (66),(ev) ++27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) ++30: kshiftrb/w Vk,Uk,Ib (66),(v) ++31: kshiftrd/q Vk,Uk,Ib (66),(v) ++32: kshiftlb/w Vk,Uk,Ib (66),(v) ++33: kshiftld/q Vk,Uk,Ib (66),(v) ++38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) ++39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) ++3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) ++3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) ++3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) ++3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) ++40: vdpps Vx,Hx,Wx,Ib (66) ++41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) ++42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) ++43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) ++44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) ++46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) ++4a: vblendvps Vx,Hx,Wx,Lx (66),(v) ++4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) ++4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) ++50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) ++51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) ++54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) ++55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) ++56: vreduceps/d Vx,Wx,Ib (66),(ev) ++57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) ++60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) ++61: vpcmpestri Vdq,Wdq,Ib (66),(v1) ++62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) ++63: vpcmpistri Vdq,Wdq,Ib (66),(v1) ++66: vfpclassps/d Vk,Wx,Ib (66),(ev) ++67: vfpclassss/d Vk,Wx,Ib (66),(ev) ++cc: sha1rnds4 Vdq,Wdq,Ib ++df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) ++f0: RORX Gy,Ey,Ib (F2),(v) ++EndTable ++ ++GrpTable: Grp1 ++0: ADD ++1: OR ++2: ADC ++3: SBB ++4: AND ++5: SUB ++6: XOR ++7: CMP ++EndTable ++ ++GrpTable: Grp1A ++0: POP ++EndTable ++ ++GrpTable: Grp2 ++0: ROL ++1: ROR ++2: RCL ++3: RCR ++4: SHL/SAL ++5: SHR ++6: ++7: SAR ++EndTable ++ ++GrpTable: Grp3_1 ++0: TEST Eb,Ib ++1: ++2: NOT Eb ++3: NEG Eb ++4: MUL AL,Eb ++5: IMUL AL,Eb ++6: DIV AL,Eb ++7: IDIV AL,Eb ++EndTable ++ ++GrpTable: Grp3_2 ++0: TEST Ev,Iz ++1: ++2: NOT Ev ++3: NEG Ev ++4: MUL rAX,Ev ++5: IMUL rAX,Ev ++6: DIV rAX,Ev ++7: IDIV rAX,Ev ++EndTable ++ ++GrpTable: Grp4 ++0: INC Eb ++1: DEC Eb ++EndTable ++ ++GrpTable: Grp5 ++0: INC Ev ++1: DEC Ev ++# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). ++2: CALLN Ev (f64) ++3: CALLF Ep ++4: JMPN Ev (f64) ++5: JMPF Mp ++6: PUSH Ev (d64) ++7: ++EndTable ++ ++GrpTable: Grp6 ++0: SLDT Rv/Mw ++1: STR Rv/Mw ++2: LLDT Ew ++3: LTR Ew ++4: VERR Ew ++5: VERW Ew ++EndTable ++ ++GrpTable: Grp7 ++0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) ++1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) ++2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) ++3: LIDT Ms ++4: SMSW Mw/Rv ++5: rdpkru (110),(11B) | wrpkru (111),(11B) ++6: LMSW Ew ++7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) ++EndTable ++ ++GrpTable: Grp8 ++4: BT ++5: BTS ++6: BTR ++7: BTC ++EndTable ++ ++GrpTable: Grp9 ++1: CMPXCHG8B/16B Mq/Mdq ++3: xrstors ++4: xsavec ++5: xsaves ++6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) ++7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) ++EndTable ++ ++GrpTable: Grp10 ++# all are UD1 ++0: UD1 ++1: UD1 ++2: UD1 ++3: UD1 ++4: UD1 ++5: UD1 ++6: UD1 ++7: UD1 ++EndTable ++ ++# Grp11A and Grp11B are expressed as Grp11 in Intel SDM ++GrpTable: Grp11A ++0: MOV Eb,Ib ++7: XABORT Ib (000),(11B) ++EndTable ++ ++GrpTable: Grp11B ++0: MOV Eb,Iz ++7: XBEGIN Jz (000),(11B) ++EndTable ++ ++GrpTable: Grp12 ++2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) ++4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) ++6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) ++EndTable ++ ++GrpTable: Grp13 ++0: vprord/q Hx,Wx,Ib (66),(ev) ++1: vprold/q Hx,Wx,Ib (66),(ev) ++2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) ++4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) ++6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) ++EndTable ++ ++GrpTable: Grp14 ++2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) ++3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) ++6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) ++7: vpslldq Hx,Ux,Ib (66),(11B),(v1) ++EndTable ++ ++GrpTable: Grp15 ++0: fxsave | RDFSBASE Ry (F3),(11B) ++1: fxstor | RDGSBASE Ry (F3),(11B) ++2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) ++3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) ++4: XSAVE | ptwrite Ey (F3),(11B) ++5: XRSTOR | lfence (11B) ++6: XSAVEOPT | clwb (66) | mfence (11B) ++7: clflush | clflushopt (66) | sfence (11B) ++EndTable ++ ++GrpTable: Grp16 ++0: prefetch NTA ++1: prefetch T0 ++2: prefetch T1 ++3: prefetch T2 ++EndTable ++ ++GrpTable: Grp17 ++1: BLSR By,Ey (v) ++2: BLSMSK By,Ey (v) ++3: BLSI By,Ey (v) ++EndTable ++ ++GrpTable: Grp18 ++1: vgatherpf0dps/d Wx (66),(ev) ++2: vgatherpf1dps/d Wx (66),(ev) ++5: vscatterpf0dps/d Wx (66),(ev) ++6: vscatterpf1dps/d Wx (66),(ev) ++EndTable ++ ++GrpTable: Grp19 ++1: vgatherpf0qps/d Wx (66),(ev) ++2: vgatherpf1qps/d Wx (66),(ev) ++5: vscatterpf0qps/d Wx (66),(ev) ++6: vscatterpf1qps/d Wx (66),(ev) ++EndTable ++ ++# AMD's Prefetch Group ++GrpTable: GrpP ++0: PREFETCH ++1: PREFETCHW ++EndTable ++ ++GrpTable: GrpPDLK ++0: MONTMUL ++1: XSHA1 ++2: XSHA2 ++EndTable ++ ++GrpTable: GrpRNG ++0: xstore-rng ++1: xcrypt-ecb ++2: xcrypt-cbc ++4: xcrypt-cfb ++5: xcrypt-ofb ++EndTable +diff --git a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk +new file mode 100644 +index 0000000..b02a36b +--- /dev/null ++++ b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk +@@ -0,0 +1,393 @@ ++#!/bin/awk -f ++# SPDX-License-Identifier: GPL-2.0 ++# gen-insn-attr-x86.awk: Instruction attribute table generator ++# Written by Masami Hiramatsu <mhiramat@redhat.com> ++# ++# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c ++ ++# Awk implementation sanity check ++function check_awk_implement() { ++ if (sprintf("%x", 0) != "0") ++ return "Your awk has a printf-format problem." ++ return "" ++} ++ ++# Clear working vars ++function clear_vars() { ++ delete table ++ delete lptable2 ++ delete lptable1 ++ delete lptable3 ++ eid = -1 # escape id ++ gid = -1 # group id ++ aid = -1 # AVX id ++ tname = "" ++} ++ ++BEGIN { ++ # Implementation error checking ++ awkchecked = check_awk_implement() ++ if (awkchecked != "") { ++ print "Error: " awkchecked > "/dev/stderr" ++ print "Please try to use gawk." > "/dev/stderr" ++ exit 1 ++ } ++ ++ # Setup generating tables ++ print "/* x86 opcode map generated from x86-opcode-map.txt */" ++ print "/* Do not change this code. */\n" ++ ggid = 1 ++ geid = 1 ++ gaid = 0 ++ delete etable ++ delete gtable ++ delete atable ++ ++ opnd_expr = "^[A-Za-z/]" ++ ext_expr = "^\\(" ++ sep_expr = "^\\|$" ++ group_expr = "^Grp[0-9A-Za-z]+" ++ ++ imm_expr = "^[IJAOL][a-z]" ++ imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" ++ imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" ++ imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" ++ imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" ++ imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" ++ imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" ++ imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" ++ imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" ++ imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" ++ imm_flag["Ob"] = "INAT_MOFFSET" ++ imm_flag["Ov"] = "INAT_MOFFSET" ++ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" ++ ++ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" ++ force64_expr = "\\([df]64\\)" ++ rex_expr = "^REX(\\.[XRWB]+)*" ++ fpu_expr = "^ESC" # TODO ++ ++ lprefix1_expr = "\\((66|!F3)\\)" ++ lprefix2_expr = "\\(F3\\)" ++ lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" ++ lprefix_expr = "\\((66|F2|F3)\\)" ++ max_lprefix = 4 ++ ++ # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript ++ # accepts VEX prefix ++ vexok_opcode_expr = "^[vk].*" ++ vexok_expr = "\\(v1\\)" ++ # All opcodes with (v) superscript supports *only* VEX prefix ++ vexonly_expr = "\\(v\\)" ++ # All opcodes with (ev) superscript supports *only* EVEX prefix ++ evexonly_expr = "\\(ev\\)" ++ ++ prefix_expr = "\\(Prefix\\)" ++ prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" ++ prefix_num["REPNE"] = "INAT_PFX_REPNE" ++ prefix_num["REP/REPE"] = "INAT_PFX_REPE" ++ prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" ++ prefix_num["XRELEASE"] = "INAT_PFX_REPE" ++ prefix_num["LOCK"] = "INAT_PFX_LOCK" ++ prefix_num["SEG=CS"] = "INAT_PFX_CS" ++ prefix_num["SEG=DS"] = "INAT_PFX_DS" ++ prefix_num["SEG=ES"] = "INAT_PFX_ES" ++ prefix_num["SEG=FS"] = "INAT_PFX_FS" ++ prefix_num["SEG=GS"] = "INAT_PFX_GS" ++ prefix_num["SEG=SS"] = "INAT_PFX_SS" ++ prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" ++ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" ++ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" ++ prefix_num["EVEX"] = "INAT_PFX_EVEX" ++ ++ clear_vars() ++} ++ ++function semantic_error(msg) { ++ print "Semantic error at " NR ": " msg > "/dev/stderr" ++ exit 1 ++} ++ ++function debug(msg) { ++ print "DEBUG: " msg ++} ++ ++function array_size(arr, i,c) { ++ c = 0 ++ for (i in arr) ++ c++ ++ return c ++} ++ ++/^Table:/ { ++ print "/* " $0 " */" ++ if (tname != "") ++ semantic_error("Hit Table: before EndTable:."); ++} ++ ++/^Referrer:/ { ++ if (NF != 1) { ++ # escape opcode table ++ ref = "" ++ for (i = 2; i <= NF; i++) ++ ref = ref $i ++ eid = escape[ref] ++ tname = sprintf("inat_escape_table_%d", eid) ++ } ++} ++ ++/^AVXcode:/ { ++ if (NF != 1) { ++ # AVX/escape opcode table ++ aid = $2 ++ if (gaid <= aid) ++ gaid = aid + 1 ++ if (tname == "") # AVX only opcode table ++ tname = sprintf("inat_avx_table_%d", $2) ++ } ++ if (aid == -1 && eid == -1) # primary opcode table ++ tname = "inat_primary_table" ++} ++ ++/^GrpTable:/ { ++ print "/* " $0 " */" ++ if (!($2 in group)) ++ semantic_error("No group: " $2 ) ++ gid = group[$2] ++ tname = "inat_group_table_" gid ++} ++ ++function print_table(tbl,name,fmt,n) ++{ ++ print "const insn_attr_t " name " = {" ++ for (i = 0; i < n; i++) { ++ id = sprintf(fmt, i) ++ if (tbl[id]) ++ print " [" id "] = " tbl[id] "," ++ } ++ print "};" ++} ++ ++/^EndTable/ { ++ if (gid != -1) { ++ # print group tables ++ if (array_size(table) != 0) { ++ print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", ++ "0x%x", 8) ++ gtable[gid,0] = tname ++ } ++ if (array_size(lptable1) != 0) { ++ print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", ++ "0x%x", 8) ++ gtable[gid,1] = tname "_1" ++ } ++ if (array_size(lptable2) != 0) { ++ print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", ++ "0x%x", 8) ++ gtable[gid,2] = tname "_2" ++ } ++ if (array_size(lptable3) != 0) { ++ print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", ++ "0x%x", 8) ++ gtable[gid,3] = tname "_3" ++ } ++ } else { ++ # print primary/escaped tables ++ if (array_size(table) != 0) { ++ print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", ++ "0x%02x", 256) ++ etable[eid,0] = tname ++ if (aid >= 0) ++ atable[aid,0] = tname ++ } ++ if (array_size(lptable1) != 0) { ++ print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", ++ "0x%02x", 256) ++ etable[eid,1] = tname "_1" ++ if (aid >= 0) ++ atable[aid,1] = tname "_1" ++ } ++ if (array_size(lptable2) != 0) { ++ print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", ++ "0x%02x", 256) ++ etable[eid,2] = tname "_2" ++ if (aid >= 0) ++ atable[aid,2] = tname "_2" ++ } ++ if (array_size(lptable3) != 0) { ++ print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", ++ "0x%02x", 256) ++ etable[eid,3] = tname "_3" ++ if (aid >= 0) ++ atable[aid,3] = tname "_3" ++ } ++ } ++ print "" ++ clear_vars() ++} ++ ++function add_flags(old,new) { ++ if (old && new) ++ return old " | " new ++ else if (old) ++ return old ++ else ++ return new ++} ++ ++# convert operands to flags. ++function convert_operands(count,opnd, i,j,imm,mod) ++{ ++ imm = null ++ mod = null ++ for (j = 1; j <= count; j++) { ++ i = opnd[j] ++ if (match(i, imm_expr) == 1) { ++ if (!imm_flag[i]) ++ semantic_error("Unknown imm opnd: " i) ++ if (imm) { ++ if (i != "Ib") ++ semantic_error("Second IMM error") ++ imm = add_flags(imm, "INAT_SCNDIMM") ++ } else ++ imm = imm_flag[i] ++ } else if (match(i, modrm_expr)) ++ mod = "INAT_MODRM" ++ } ++ return add_flags(imm, mod) ++} ++ ++/^[0-9a-f]+\:/ { ++ if (NR == 1) ++ next ++ # get index ++ idx = "0x" substr($1, 1, index($1,":") - 1) ++ if (idx in table) ++ semantic_error("Redefine " idx " in " tname) ++ ++ # check if escaped opcode ++ if ("escape" == $2) { ++ if ($3 != "#") ++ semantic_error("No escaped name") ++ ref = "" ++ for (i = 4; i <= NF; i++) ++ ref = ref $i ++ if (ref in escape) ++ semantic_error("Redefine escape (" ref ")") ++ escape[ref] = geid ++ geid++ ++ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" ++ next ++ } ++ ++ variant = null ++ # converts ++ i = 2 ++ while (i <= NF) { ++ opcode = $(i++) ++ delete opnds ++ ext = null ++ flags = null ++ opnd = null ++ # parse one opcode ++ if (match($i, opnd_expr)) { ++ opnd = $i ++ count = split($(i++), opnds, ",") ++ flags = convert_operands(count, opnds) ++ } ++ if (match($i, ext_expr)) ++ ext = $(i++) ++ if (match($i, sep_expr)) ++ i++ ++ else if (i < NF) ++ semantic_error($i " is not a separator") ++ ++ # check if group opcode ++ if (match(opcode, group_expr)) { ++ if (!(opcode in group)) { ++ group[opcode] = ggid ++ ggid++ ++ } ++ flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") ++ } ++ # check force(or default) 64bit ++ if (match(ext, force64_expr)) ++ flags = add_flags(flags, "INAT_FORCE64") ++ ++ # check REX prefix ++ if (match(opcode, rex_expr)) ++ flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") ++ ++ # check coprocessor escape : TODO ++ if (match(opcode, fpu_expr)) ++ flags = add_flags(flags, "INAT_MODRM") ++ ++ # check VEX codes ++ if (match(ext, evexonly_expr)) ++ flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") ++ else if (match(ext, vexonly_expr)) ++ flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") ++ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) ++ flags = add_flags(flags, "INAT_VEXOK") ++ ++ # check prefixes ++ if (match(ext, prefix_expr)) { ++ if (!prefix_num[opcode]) ++ semantic_error("Unknown prefix: " opcode) ++ flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") ++ } ++ if (length(flags) == 0) ++ continue ++ # check if last prefix ++ if (match(ext, lprefix1_expr)) { ++ lptable1[idx] = add_flags(lptable1[idx],flags) ++ variant = "INAT_VARIANT" ++ } ++ if (match(ext, lprefix2_expr)) { ++ lptable2[idx] = add_flags(lptable2[idx],flags) ++ variant = "INAT_VARIANT" ++ } ++ if (match(ext, lprefix3_expr)) { ++ lptable3[idx] = add_flags(lptable3[idx],flags) ++ variant = "INAT_VARIANT" ++ } ++ if (!match(ext, lprefix_expr)){ ++ table[idx] = add_flags(table[idx],flags) ++ } ++ } ++ if (variant) ++ table[idx] = add_flags(table[idx],variant) ++} ++ ++END { ++ if (awkchecked != "") ++ exit 1 ++ # print escape opcode map's array ++ print "/* Escape opcode map array */" ++ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ ++ "[INAT_LSTPFX_MAX + 1] = {" ++ for (i = 0; i < geid; i++) ++ for (j = 0; j < max_lprefix; j++) ++ if (etable[i,j]) ++ print " ["i"]["j"] = "etable[i,j]"," ++ print "};\n" ++ # print group opcode map's array ++ print "/* Group opcode map array */" ++ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ ++ "[INAT_LSTPFX_MAX + 1] = {" ++ for (i = 0; i < ggid; i++) ++ for (j = 0; j < max_lprefix; j++) ++ if (gtable[i,j]) ++ print " ["i"]["j"] = "gtable[i,j]"," ++ print "};\n" ++ # print AVX opcode map's array ++ print "/* AVX opcode map array */" ++ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ ++ "[INAT_LSTPFX_MAX + 1] = {" ++ for (i = 0; i < gaid; i++) ++ for (j = 0; j < max_lprefix; j++) ++ if (atable[i,j]) ++ print " ["i"]["j"] = "atable[i,j]"," ++ print "};" ++} ++ +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index 365c34e..694abc6 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -29,7 +29,7 @@ + #include "builtin.h" + #include "check.h" + +-bool nofp; ++bool no_fp, no_unreachable, retpoline, module; + + static const char * const check_usage[] = { + "objtool check [<options>] file.o", +@@ -37,7 +37,10 @@ static const char * const check_usage[] = { + }; + + const struct option check_options[] = { +- OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), ++ OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), ++ OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), ++ OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), ++ OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), + OPT_END(), + }; + +@@ -52,5 +55,5 @@ int cmd_check(int argc, const char **argv) + + objname = argv[0]; + +- return check(objname, nofp); ++ return check(objname, false); + } +diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c +new file mode 100644 +index 0000000..77ea2b9 +--- /dev/null ++++ b/tools/objtool/builtin-orc.c +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++/* ++ * objtool orc: ++ * ++ * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip ++ * sections to it, which is used by the in-kernel ORC unwinder. ++ * ++ * This command is a superset of "objtool check". ++ */ ++ ++#include <string.h> ++#include "builtin.h" ++#include "check.h" ++ ++ ++static const char *orc_usage[] = { ++ "objtool orc generate [<options>] file.o", ++ "objtool orc dump file.o", ++ NULL, ++}; ++ ++int cmd_orc(int argc, const char **argv) ++{ ++ const char *objname; ++ ++ argc--; argv++; ++ if (argc <= 0) ++ usage_with_options(orc_usage, check_options); ++ ++ if (!strncmp(argv[0], "gen", 3)) { ++ argc = parse_options(argc, argv, check_options, orc_usage, 0); ++ if (argc != 1) ++ usage_with_options(orc_usage, check_options); ++ ++ objname = argv[0]; ++ ++ return check(objname, true); ++ } ++ ++ if (!strcmp(argv[0], "dump")) { ++ if (argc != 2) ++ usage_with_options(orc_usage, check_options); ++ ++ objname = argv[1]; ++ ++ return orc_dump(objname); ++ } ++ ++ usage_with_options(orc_usage, check_options); ++ ++ return 0; ++} +diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h +index 34d2ba7..28ff40e 100644 +--- a/tools/objtool/builtin.h ++++ b/tools/objtool/builtin.h +@@ -17,6 +17,12 @@ + #ifndef _BUILTIN_H + #define _BUILTIN_H + ++#include <subcmd/parse-options.h> ++ ++extern const struct option check_options[]; ++extern bool no_fp, no_unreachable, retpoline, module; ++ + extern int cmd_check(int argc, const char **argv); ++extern int cmd_orc(int argc, const char **argv); + + #endif /* _BUILTIN_H */ +diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h +new file mode 100644 +index 0000000..2fe883c +--- /dev/null ++++ b/tools/objtool/cfi.h +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#ifndef _OBJTOOL_CFI_H ++#define _OBJTOOL_CFI_H ++ ++#define CFI_UNDEFINED -1 ++#define CFI_CFA -2 ++#define CFI_SP_INDIRECT -3 ++#define CFI_BP_INDIRECT -4 ++ ++#define CFI_AX 0 ++#define CFI_DX 1 ++#define CFI_CX 2 ++#define CFI_BX 3 ++#define CFI_SI 4 ++#define CFI_DI 5 ++#define CFI_BP 6 ++#define CFI_SP 7 ++#define CFI_R8 8 ++#define CFI_R9 9 ++#define CFI_R10 10 ++#define CFI_R11 11 ++#define CFI_R12 12 ++#define CFI_R13 13 ++#define CFI_R14 14 ++#define CFI_R15 15 ++#define CFI_RA 16 ++#define CFI_NUM_REGS 17 ++ ++struct cfi_reg { ++ int base; ++ int offset; ++}; ++ ++struct cfi_state { ++ struct cfi_reg cfa; ++ struct cfi_reg regs[CFI_NUM_REGS]; ++}; ++ ++#endif /* _OBJTOOL_CFI_H */ +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index b7a0af5..c8b8b71 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -18,6 +18,7 @@ + #include <string.h> + #include <stdlib.h> + ++#include "builtin.h" + #include "check.h" + #include "elf.h" + #include "special.h" +@@ -25,12 +26,7 @@ + #include "warn.h" + + #include <linux/hashtable.h> +- +-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +- +-#define STATE_FP_SAVED 0x1 +-#define STATE_FP_SETUP 0x2 +-#define STATE_FENTRY 0x4 ++#include <linux/kernel.h> + + struct alternative { + struct list_head list; +@@ -38,10 +34,10 @@ struct alternative { + }; + + const char *objname; +-static bool nofp; ++struct cfi_state initial_func_cfi; + +-static struct instruction *find_insn(struct objtool_file *file, +- struct section *sec, unsigned long offset) ++struct instruction *find_insn(struct objtool_file *file, ++ struct section *sec, unsigned long offset) + { + struct instruction *insn; + +@@ -57,28 +53,12 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, + { + struct instruction *next = list_next_entry(insn, list); + +- if (&next->list == &file->insn_list || next->sec != insn->sec) ++ if (!next || &next->list == &file->insn_list || next->sec != insn->sec) + return NULL; + + return next; + } + +-static bool gcov_enabled(struct objtool_file *file) +-{ +- struct section *sec; +- struct symbol *sym; +- +- list_for_each_entry(sec, &file->elf->sections, list) +- list_for_each_entry(sym, &sec->symbol_list, list) +- if (!strncmp(sym->name, "__gcov_.", 8)) +- return true; +- +- return false; +-} +- +-#define for_each_insn(file, insn) \ +- list_for_each_entry(insn, &file->insn_list, list) +- + #define func_for_each_insn(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn && &insn->list != &file->insn_list && \ +@@ -95,6 +75,9 @@ static bool gcov_enabled(struct objtool_file *file) + #define sec_for_each_insn_from(file, insn) \ + for (; insn; insn = next_insn_same_sec(file, insn)) + ++#define sec_for_each_insn_continue(file, insn) \ ++ for (insn = next_insn_same_sec(file, insn); insn; \ ++ insn = next_insn_same_sec(file, insn)) + + /* + * Check if the function has been manually whitelisted with the +@@ -104,7 +87,6 @@ static bool gcov_enabled(struct objtool_file *file) + static bool ignore_func(struct objtool_file *file, struct symbol *func) + { + struct rela *rela; +- struct instruction *insn; + + /* check for STACK_FRAME_NON_STANDARD */ + if (file->whitelist && file->whitelist->rela) +@@ -117,11 +99,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func) + return true; + } + +- /* check if it has a context switching instruction */ +- func_for_each_insn(file, func, insn) +- if (insn->type == INSN_CONTEXT_SWITCH) +- return true; +- + return false; + } + +@@ -159,7 +136,8 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, + "complete_and_exit", + "kvm_spurious_fault", + "__reiserfs_panic", +- "lbug_with_loc" ++ "lbug_with_loc", ++ "fortify_panic", + }; + + if (func->bind == STB_WEAK) +@@ -234,6 +212,20 @@ static int dead_end_function(struct objtool_file *file, struct symbol *func) + return __dead_end_function(file, func, 0); + } + ++static void clear_insn_state(struct insn_state *state) ++{ ++ int i; ++ ++ memset(state, 0, sizeof(*state)); ++ state->cfa.base = CFI_UNDEFINED; ++ for (i = 0; i < CFI_NUM_REGS; i++) { ++ state->regs[i].base = CFI_UNDEFINED; ++ state->vals[i].base = CFI_UNDEFINED; ++ } ++ state->drap_reg = CFI_UNDEFINED; ++ state->drap_offset = -1; ++} ++ + /* + * Call the arch-specific instruction decoder for all the instructions and add + * them to the global instruction list. +@@ -246,30 +238,42 @@ static int decode_instructions(struct objtool_file *file) + struct instruction *insn; + int ret; + +- list_for_each_entry(sec, &file->elf->sections, list) { ++ for_each_sec(file, sec) { + + if (!(sec->sh.sh_flags & SHF_EXECINSTR)) + continue; + ++ if (strcmp(sec->name, ".altinstr_replacement") && ++ strcmp(sec->name, ".altinstr_aux") && ++ strncmp(sec->name, ".discard.", 9)) ++ sec->text = true; ++ + for (offset = 0; offset < sec->len; offset += insn->len) { + insn = malloc(sizeof(*insn)); ++ if (!insn) { ++ WARN("malloc failed"); ++ return -1; ++ } + memset(insn, 0, sizeof(*insn)); +- + INIT_LIST_HEAD(&insn->alts); ++ clear_insn_state(&insn->state); ++ + insn->sec = sec; + insn->offset = offset; + + ret = arch_decode_instruction(file->elf, sec, offset, + sec->len - offset, + &insn->len, &insn->type, +- &insn->immediate); ++ &insn->immediate, ++ &insn->stack_op); + if (ret) +- return ret; ++ goto err; + + if (!insn->type || insn->type > INSN_LAST) { + WARN_FUNC("invalid instruction type %d", + insn->sec, insn->offset, insn->type); +- return -1; ++ ret = -1; ++ goto err; + } + + hash_add(file->insn_hash, &insn->hash, insn->offset); +@@ -293,10 +297,14 @@ static int decode_instructions(struct objtool_file *file) + } + + return 0; ++ ++err: ++ free(insn); ++ return ret; + } + + /* +- * Find all uses of the unreachable() macro, which are code path dead ends. ++ * Mark "ud2" instructions and manually annotated dead ends. + */ + static int add_dead_ends(struct objtool_file *file) + { +@@ -305,13 +313,24 @@ static int add_dead_ends(struct objtool_file *file) + struct instruction *insn; + bool found; + +- sec = find_section_by_name(file->elf, ".rela__unreachable"); ++ /* ++ * By default, "ud2" is a dead end unless otherwise annotated, because ++ * GCC 7 inserts it for certain divide-by-zero cases. ++ */ ++ for_each_insn(file, insn) ++ if (insn->type == INSN_BUG) ++ insn->dead_end = true; ++ ++ /* ++ * Check for manually annotated dead ends. ++ */ ++ sec = find_section_by_name(file->elf, ".rela.discard.unreachable"); + if (!sec) +- return 0; ++ goto reachable; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { +- WARN("unexpected relocation symbol type in .rela__unreachable"); ++ WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + insn = find_insn(file, rela->sym->sec, rela->addend); +@@ -340,6 +359,48 @@ static int add_dead_ends(struct objtool_file *file) + insn->dead_end = true; + } + ++reachable: ++ /* ++ * These manually annotated reachable checks are needed for GCC 4.4, ++ * where the Linux unreachable() macro isn't supported. In that case ++ * GCC doesn't know the "ud2" is fatal, so it generates code as if it's ++ * not a dead end. ++ */ ++ sec = find_section_by_name(file->elf, ".rela.discard.reachable"); ++ if (!sec) ++ return 0; ++ ++ list_for_each_entry(rela, &sec->rela_list, list) { ++ if (rela->sym->type != STT_SECTION) { ++ WARN("unexpected relocation symbol type in %s", sec->name); ++ return -1; ++ } ++ insn = find_insn(file, rela->sym->sec, rela->addend); ++ if (insn) ++ insn = list_prev_entry(insn, list); ++ else if (rela->addend == rela->sym->sec->len) { ++ found = false; ++ list_for_each_entry_reverse(insn, &file->insn_list, list) { ++ if (insn->sec == rela->sym->sec) { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) { ++ WARN("can't find reachable insn at %s+0x%x", ++ rela->sym->sec->name, rela->addend); ++ return -1; ++ } ++ } else { ++ WARN("can't find reachable insn at %s+0x%x", ++ rela->sym->sec->name, rela->addend); ++ return -1; ++ } ++ ++ insn->dead_end = false; ++ } ++ + return 0; + } + +@@ -352,7 +413,7 @@ static void add_ignores(struct objtool_file *file) + struct section *sec; + struct symbol *func; + +- list_for_each_entry(sec, &file->elf->sections, list) { ++ for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; +@@ -361,7 +422,7 @@ static void add_ignores(struct objtool_file *file) + continue; + + func_for_each_insn(file, func, insn) +- insn->visited = true; ++ insn->ignore = true; + } + } + } +@@ -415,8 +476,7 @@ static int add_jump_destinations(struct objtool_file *file) + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; + +- /* skip ignores */ +- if (insn->visited) ++ if (insn->ignore) + continue; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, +@@ -436,6 +496,7 @@ static int add_jump_destinations(struct objtool_file *file) + * disguise, so convert them accordingly. + */ + insn->type = INSN_JUMP_DYNAMIC; ++ insn->retpoline_safe = true; + continue; + } else { + /* sibling call */ +@@ -483,18 +544,15 @@ static int add_call_destinations(struct objtool_file *file) + dest_off = insn->offset + insn->len + insn->immediate; + insn->call_dest = find_symbol_by_offset(insn->sec, + dest_off); +- /* +- * FIXME: Thanks to retpolines, it's now considered +- * normal for a function to call within itself. So +- * disable this warning for now. +- */ +-#if 0 +- if (!insn->call_dest) { +- WARN_FUNC("can't find call dest symbol at offset 0x%lx", +- insn->sec, insn->offset, dest_off); ++ ++ if (!insn->call_dest && !insn->ignore) { ++ WARN_FUNC("unsupported intra-function call", ++ insn->sec, insn->offset); ++ if (retpoline) ++ WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); + return -1; + } +-#endif ++ + } else if (rela->sym->type == STT_SECTION) { + insn->call_dest = find_symbol_by_offset(rela->sym->sec, + rela->addend+4); +@@ -538,7 +596,7 @@ static int handle_group_alt(struct objtool_file *file, + struct instruction *orig_insn, + struct instruction **new_insn) + { +- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; ++ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; + unsigned long dest_off; + + last_orig_insn = NULL; +@@ -554,25 +612,30 @@ static int handle_group_alt(struct objtool_file *file, + last_orig_insn = insn; + } + +- if (!next_insn_same_sec(file, last_orig_insn)) { +- WARN("%s: don't know how to handle alternatives at end of section", +- special_alt->orig_sec->name); +- return -1; +- } +- +- fake_jump = malloc(sizeof(*fake_jump)); +- if (!fake_jump) { +- WARN("malloc failed"); +- return -1; ++ if (next_insn_same_sec(file, last_orig_insn)) { ++ fake_jump = malloc(sizeof(*fake_jump)); ++ if (!fake_jump) { ++ WARN("malloc failed"); ++ return -1; ++ } ++ memset(fake_jump, 0, sizeof(*fake_jump)); ++ INIT_LIST_HEAD(&fake_jump->alts); ++ clear_insn_state(&fake_jump->state); ++ ++ fake_jump->sec = special_alt->new_sec; ++ fake_jump->offset = -1; ++ fake_jump->type = INSN_JUMP_UNCONDITIONAL; ++ fake_jump->jump_dest = list_next_entry(last_orig_insn, list); ++ fake_jump->ignore = true; + } +- memset(fake_jump, 0, sizeof(*fake_jump)); +- INIT_LIST_HEAD(&fake_jump->alts); +- fake_jump->sec = special_alt->new_sec; +- fake_jump->offset = -1; +- fake_jump->type = INSN_JUMP_UNCONDITIONAL; +- fake_jump->jump_dest = list_next_entry(last_orig_insn, list); + + if (!special_alt->new_len) { ++ if (!fake_jump) { ++ WARN("%s: empty alternative at end of section", ++ special_alt->orig_sec->name); ++ return -1; ++ } ++ + *new_insn = fake_jump; + return 0; + } +@@ -585,6 +648,8 @@ static int handle_group_alt(struct objtool_file *file, + + last_new_insn = insn; + ++ insn->ignore = orig_insn->ignore_alts; ++ + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; +@@ -593,8 +658,14 @@ static int handle_group_alt(struct objtool_file *file, + continue; + + dest_off = insn->offset + insn->len + insn->immediate; +- if (dest_off == special_alt->new_off + special_alt->new_len) ++ if (dest_off == special_alt->new_off + special_alt->new_len) { ++ if (!fake_jump) { ++ WARN("%s: alternative jump to end of section", ++ special_alt->orig_sec->name); ++ return -1; ++ } + insn->jump_dest = fake_jump; ++ } + + if (!insn->jump_dest) { + WARN_FUNC("can't find alternative jump destination", +@@ -609,7 +680,8 @@ static int handle_group_alt(struct objtool_file *file, + return -1; + } + +- list_add(&fake_jump->list, &last_new_insn->list); ++ if (fake_jump) ++ list_add(&fake_jump->list, &last_new_insn->list); + + return 0; + } +@@ -656,6 +728,7 @@ static int add_special_section_alts(struct objtool_file *file) + return ret; + + list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { ++ + orig_insn = find_insn(file, special_alt->orig_sec, + special_alt->orig_off); + if (!orig_insn) { +@@ -665,10 +738,6 @@ static int add_special_section_alts(struct objtool_file *file) + goto out; + } + +- /* Ignore retpoline alternatives. */ +- if (orig_insn->ignore_alts) +- continue; +- + new_insn = NULL; + if (!special_alt->group || special_alt->new_len) { + new_insn = find_insn(file, special_alt->new_sec, +@@ -784,8 +853,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, + * This is a fairly uncommon pattern which is new for GCC 6. As of this + * writing, there are 11 occurrences of it in the allmodconfig kernel. + * ++ * As of GCC 7 there are quite a few more of these and the 'in between' code ++ * is significant. Esp. with KASAN enabled some of the code between the mov ++ * and jmpq uses .rodata itself, which can confuse things. ++ * + * TODO: Once we have DWARF CFI and smarter instruction decoding logic, + * ensure the same register is used in the mov and jump instructions. ++ * ++ * NOTE: RETPOLINE made it harder still to decode dynamic jumps. + */ + static struct rela *find_switch_table(struct objtool_file *file, + struct symbol *func, +@@ -807,12 +882,25 @@ static struct rela *find_switch_table(struct objtool_file *file, + text_rela->addend + 4); + if (!rodata_rela) + return NULL; ++ + file->ignore_unreachables = true; + return rodata_rela; + } + + /* case 3 */ +- func_for_each_insn_continue_reverse(file, func, insn) { ++ /* ++ * Backward search using the @first_jump_src links, these help avoid ++ * much of the 'in between' code. Which avoids us getting confused by ++ * it. ++ */ ++ for (insn = list_prev_entry(insn, list); ++ ++ &insn->list != &file->insn_list && ++ insn->sec == func->sec && ++ insn->offset >= func->offset; ++ ++ insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { ++ + if (insn->type == INSN_JUMP_DYNAMIC) + break; + +@@ -836,20 +924,42 @@ static struct rela *find_switch_table(struct objtool_file *file, + if (find_symbol_containing(file->rodata, text_rela->addend)) + continue; + +- return find_rela_by_dest(file->rodata, text_rela->addend); ++ rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); ++ if (!rodata_rela) ++ continue; ++ ++ return rodata_rela; + } + + return NULL; + } + ++ + static int add_func_switch_tables(struct objtool_file *file, + struct symbol *func) + { +- struct instruction *insn, *prev_jump = NULL; ++ struct instruction *insn, *last = NULL, *prev_jump = NULL; + struct rela *rela, *prev_rela = NULL; + int ret; + + func_for_each_insn(file, func, insn) { ++ if (!last) ++ last = insn; ++ ++ /* ++ * Store back-pointers for unconditional forward jumps such ++ * that find_switch_table() can back-track using those and ++ * avoid some potentially confusing code. ++ */ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && ++ insn->offset > last->offset && ++ insn->jump_dest->offset > insn->offset && ++ !insn->jump_dest->first_jump_src) { ++ ++ insn->jump_dest->first_jump_src = insn; ++ last = insn->jump_dest; ++ } ++ + if (insn->type != INSN_JUMP_DYNAMIC) + continue; + +@@ -896,7 +1006,7 @@ static int add_switch_table_alts(struct objtool_file *file) + if (!file->rodata || !file->rodata->rela) + return 0; + +- list_for_each_entry(sec, &file->elf->sections, list) { ++ for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; +@@ -910,6 +1020,134 @@ static int add_switch_table_alts(struct objtool_file *file) + return 0; + } + ++static int read_unwind_hints(struct objtool_file *file) ++{ ++ struct section *sec, *relasec; ++ struct rela *rela; ++ struct unwind_hint *hint; ++ struct instruction *insn; ++ struct cfi_reg *cfa; ++ int i; ++ ++ sec = find_section_by_name(file->elf, ".discard.unwind_hints"); ++ if (!sec) ++ return 0; ++ ++ relasec = sec->rela; ++ if (!relasec) { ++ WARN("missing .rela.discard.unwind_hints section"); ++ return -1; ++ } ++ ++ if (sec->len % sizeof(struct unwind_hint)) { ++ WARN("struct unwind_hint size mismatch"); ++ return -1; ++ } ++ ++ file->hints = true; ++ ++ for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) { ++ hint = (struct unwind_hint *)sec->data->d_buf + i; ++ ++ rela = find_rela_by_dest(sec, i * sizeof(*hint)); ++ if (!rela) { ++ WARN("can't find rela for unwind_hints[%d]", i); ++ return -1; ++ } ++ ++ insn = find_insn(file, rela->sym->sec, rela->addend); ++ if (!insn) { ++ WARN("can't find insn for unwind_hints[%d]", i); ++ return -1; ++ } ++ ++ cfa = &insn->state.cfa; ++ ++ if (hint->type == UNWIND_HINT_TYPE_SAVE) { ++ insn->save = true; ++ continue; ++ ++ } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) { ++ insn->restore = true; ++ insn->hint = true; ++ continue; ++ } ++ ++ insn->hint = true; ++ ++ switch (hint->sp_reg) { ++ case ORC_REG_UNDEFINED: ++ cfa->base = CFI_UNDEFINED; ++ break; ++ case ORC_REG_SP: ++ cfa->base = CFI_SP; ++ break; ++ case ORC_REG_BP: ++ cfa->base = CFI_BP; ++ break; ++ case ORC_REG_SP_INDIRECT: ++ cfa->base = CFI_SP_INDIRECT; ++ break; ++ case ORC_REG_R10: ++ cfa->base = CFI_R10; ++ break; ++ case ORC_REG_R13: ++ cfa->base = CFI_R13; ++ break; ++ case ORC_REG_DI: ++ cfa->base = CFI_DI; ++ break; ++ case ORC_REG_DX: ++ cfa->base = CFI_DX; ++ break; ++ default: ++ WARN_FUNC("unsupported unwind_hint sp base reg %d", ++ insn->sec, insn->offset, hint->sp_reg); ++ return -1; ++ } ++ ++ cfa->offset = hint->sp_offset; ++ insn->state.type = hint->type; ++ } ++ ++ return 0; ++} ++ ++static int read_retpoline_hints(struct objtool_file *file) ++{ ++ struct section *sec; ++ struct instruction *insn; ++ struct rela *rela; ++ ++ sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe"); ++ if (!sec) ++ return 0; ++ ++ list_for_each_entry(rela, &sec->rela_list, list) { ++ if (rela->sym->type != STT_SECTION) { ++ WARN("unexpected relocation symbol type in %s", sec->name); ++ return -1; ++ } ++ ++ insn = find_insn(file, rela->sym->sec, rela->addend); ++ if (!insn) { ++ WARN("bad .discard.retpoline_safe entry"); ++ return -1; ++ } ++ ++ if (insn->type != INSN_JUMP_DYNAMIC && ++ insn->type != INSN_CALL_DYNAMIC) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ++ insn->retpoline_safe = true; ++ } ++ ++ return 0; ++} ++ + static int decode_sections(struct objtool_file *file) + { + int ret; +@@ -932,11 +1170,11 @@ static int decode_sections(struct objtool_file *file) + if (ret) + return ret; + +- ret = add_call_destinations(file); ++ ret = add_special_section_alts(file); + if (ret) + return ret; + +- ret = add_special_section_alts(file); ++ ret = add_call_destinations(file); + if (ret) + return ret; + +@@ -944,6 +1182,14 @@ static int decode_sections(struct objtool_file *file) + if (ret) + return ret; + ++ ret = read_unwind_hints(file); ++ if (ret) ++ return ret; ++ ++ ret = read_retpoline_hints(file); ++ if (ret) ++ return ret; ++ + return 0; + } + +@@ -957,125 +1203,647 @@ static bool is_fentry_call(struct instruction *insn) + return false; + } + +-static bool has_modified_stack_frame(struct instruction *insn) ++static bool has_modified_stack_frame(struct insn_state *state) ++{ ++ int i; ++ ++ if (state->cfa.base != initial_func_cfi.cfa.base || ++ state->cfa.offset != initial_func_cfi.cfa.offset || ++ state->stack_size != initial_func_cfi.cfa.offset || ++ state->drap) ++ return true; ++ ++ for (i = 0; i < CFI_NUM_REGS; i++) ++ if (state->regs[i].base != initial_func_cfi.regs[i].base || ++ state->regs[i].offset != initial_func_cfi.regs[i].offset) ++ return true; ++ ++ return false; ++} ++ ++static bool has_valid_stack_frame(struct insn_state *state) ++{ ++ if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA && ++ state->regs[CFI_BP].offset == -16) ++ return true; ++ ++ if (state->drap && state->regs[CFI_BP].base == CFI_BP) ++ return true; ++ ++ return false; ++} ++ ++static int update_insn_state_regs(struct instruction *insn, struct insn_state *state) + { +- return (insn->state & STATE_FP_SAVED) || +- (insn->state & STATE_FP_SETUP); ++ struct cfi_reg *cfa = &state->cfa; ++ struct stack_op *op = &insn->stack_op; ++ ++ if (cfa->base != CFI_SP) ++ return 0; ++ ++ /* push */ ++ if (op->dest.type == OP_DEST_PUSH) ++ cfa->offset += 8; ++ ++ /* pop */ ++ if (op->src.type == OP_SRC_POP) ++ cfa->offset -= 8; ++ ++ /* add immediate to sp */ ++ if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD && ++ op->dest.reg == CFI_SP && op->src.reg == CFI_SP) ++ cfa->offset -= op->src.offset; ++ ++ return 0; + } + +-static bool has_valid_stack_frame(struct instruction *insn) ++static void save_reg(struct insn_state *state, unsigned char reg, int base, ++ int offset) + { +- return (insn->state & STATE_FP_SAVED) && +- (insn->state & STATE_FP_SETUP); ++ if (arch_callee_saved_reg(reg) && ++ state->regs[reg].base == CFI_UNDEFINED) { ++ state->regs[reg].base = base; ++ state->regs[reg].offset = offset; ++ } + } + +-static unsigned int frame_state(unsigned long state) ++static void restore_reg(struct insn_state *state, unsigned char reg) + { +- return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); ++ state->regs[reg].base = CFI_UNDEFINED; ++ state->regs[reg].offset = 0; + } + + /* +- * Follow the branch starting at the given instruction, and recursively follow +- * any other branches (jumps). Meanwhile, track the frame pointer state at +- * each instruction and validate all the rules described in +- * tools/objtool/Documentation/stack-validation.txt. ++ * A note about DRAP stack alignment: ++ * ++ * GCC has the concept of a DRAP register, which is used to help keep track of ++ * the stack pointer when aligning the stack. r10 or r13 is used as the DRAP ++ * register. The typical DRAP pattern is: ++ * ++ * 4c 8d 54 24 08 lea 0x8(%rsp),%r10 ++ * 48 83 e4 c0 and $0xffffffffffffffc0,%rsp ++ * 41 ff 72 f8 pushq -0x8(%r10) ++ * 55 push %rbp ++ * 48 89 e5 mov %rsp,%rbp ++ * (more pushes) ++ * 41 52 push %r10 ++ * ... ++ * 41 5a pop %r10 ++ * (more pops) ++ * 5d pop %rbp ++ * 49 8d 62 f8 lea -0x8(%r10),%rsp ++ * c3 retq ++ * ++ * There are some variations in the epilogues, like: ++ * ++ * 5b pop %rbx ++ * 41 5a pop %r10 ++ * 41 5c pop %r12 ++ * 41 5d pop %r13 ++ * 41 5e pop %r14 ++ * c9 leaveq ++ * 49 8d 62 f8 lea -0x8(%r10),%rsp ++ * c3 retq ++ * ++ * and: ++ * ++ * 4c 8b 55 e8 mov -0x18(%rbp),%r10 ++ * 48 8b 5d e0 mov -0x20(%rbp),%rbx ++ * 4c 8b 65 f0 mov -0x10(%rbp),%r12 ++ * 4c 8b 6d f8 mov -0x8(%rbp),%r13 ++ * c9 leaveq ++ * 49 8d 62 f8 lea -0x8(%r10),%rsp ++ * c3 retq ++ * ++ * Sometimes r13 is used as the DRAP register, in which case it's saved and ++ * restored beforehand: ++ * ++ * 41 55 push %r13 ++ * 4c 8d 6c 24 10 lea 0x10(%rsp),%r13 ++ * 48 83 e4 f0 and $0xfffffffffffffff0,%rsp ++ * ... ++ * 49 8d 65 f0 lea -0x10(%r13),%rsp ++ * 41 5d pop %r13 ++ * c3 retq + */ +-static int validate_branch(struct objtool_file *file, +- struct instruction *first, unsigned char first_state) ++static int update_insn_state(struct instruction *insn, struct insn_state *state) + { +- struct alternative *alt; +- struct instruction *insn; +- struct section *sec; +- struct symbol *func = NULL; +- unsigned char state; +- int ret; ++ struct stack_op *op = &insn->stack_op; ++ struct cfi_reg *cfa = &state->cfa; ++ struct cfi_reg *regs = state->regs; ++ ++ /* stack operations don't make sense with an undefined CFA */ ++ if (cfa->base == CFI_UNDEFINED) { ++ if (insn->func) { ++ WARN_FUNC("undefined stack state", insn->sec, insn->offset); ++ return -1; ++ } ++ return 0; ++ } + +- insn = first; +- sec = insn->sec; +- state = first_state; ++ if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET) ++ return update_insn_state_regs(insn, state); + +- if (insn->alt_group && list_empty(&insn->alts)) { +- WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", +- sec, insn->offset); +- return 1; +- } ++ switch (op->dest.type) { + +- while (1) { +- if (file->c_file && insn->func) { +- if (func && func != insn->func) { +- WARN("%s() falls through to next function %s()", +- func->name, insn->func->name); +- return 1; +- } ++ case OP_DEST_REG: ++ switch (op->src.type) { + +- func = insn->func; +- } ++ case OP_SRC_REG: ++ if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && ++ cfa->base == CFI_SP && ++ regs[CFI_BP].base == CFI_CFA && ++ regs[CFI_BP].offset == -cfa->offset) { + +- if (insn->visited) { +- if (frame_state(insn->state) != frame_state(state)) { +- WARN_FUNC("frame pointer state mismatch", +- sec, insn->offset); +- return 1; ++ /* mov %rsp, %rbp */ ++ cfa->base = op->dest.reg; ++ state->bp_scratch = false; + } + +- return 0; ++ else if (op->src.reg == CFI_SP && ++ op->dest.reg == CFI_BP && state->drap) { ++ ++ /* drap: mov %rsp, %rbp */ ++ regs[CFI_BP].base = CFI_BP; ++ regs[CFI_BP].offset = -state->stack_size; ++ state->bp_scratch = false; ++ } ++ ++ else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { ++ ++ /* ++ * mov %rsp, %reg ++ * ++ * This is needed for the rare case where GCC ++ * does: ++ * ++ * mov %rsp, %rax ++ * ... ++ * mov %rax, %rsp ++ */ ++ state->vals[op->dest.reg].base = CFI_CFA; ++ state->vals[op->dest.reg].offset = -state->stack_size; ++ } ++ ++ else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && ++ cfa->base == CFI_BP) { ++ ++ /* ++ * mov %rbp, %rsp ++ * ++ * Restore the original stack pointer (Clang). ++ */ ++ state->stack_size = -state->regs[CFI_BP].offset; ++ } ++ ++ else if (op->dest.reg == cfa->base) { ++ ++ /* mov %reg, %rsp */ ++ if (cfa->base == CFI_SP && ++ state->vals[op->src.reg].base == CFI_CFA) { ++ ++ /* ++ * This is needed for the rare case ++ * where GCC does something dumb like: ++ * ++ * lea 0x8(%rsp), %rcx ++ * ... ++ * mov %rcx, %rsp ++ */ ++ cfa->offset = -state->vals[op->src.reg].offset; ++ state->stack_size = cfa->offset; ++ ++ } else { ++ cfa->base = CFI_UNDEFINED; ++ cfa->offset = 0; ++ } ++ } ++ ++ break; ++ ++ case OP_SRC_ADD: ++ if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) { ++ ++ /* add imm, %rsp */ ++ state->stack_size -= op->src.offset; ++ if (cfa->base == CFI_SP) ++ cfa->offset -= op->src.offset; ++ break; ++ } ++ ++ if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { ++ ++ /* lea disp(%rbp), %rsp */ ++ state->stack_size = -(op->src.offset + regs[CFI_BP].offset); ++ break; ++ } ++ ++ if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { ++ ++ /* drap: lea disp(%rsp), %drap */ ++ state->drap_reg = op->dest.reg; ++ ++ /* ++ * lea disp(%rsp), %reg ++ * ++ * This is needed for the rare case where GCC ++ * does something dumb like: ++ * ++ * lea 0x8(%rsp), %rcx ++ * ... ++ * mov %rcx, %rsp ++ */ ++ state->vals[op->dest.reg].base = CFI_CFA; ++ state->vals[op->dest.reg].offset = \ ++ -state->stack_size + op->src.offset; ++ ++ break; ++ } ++ ++ if (state->drap && op->dest.reg == CFI_SP && ++ op->src.reg == state->drap_reg) { ++ ++ /* drap: lea disp(%drap), %rsp */ ++ cfa->base = CFI_SP; ++ cfa->offset = state->stack_size = -op->src.offset; ++ state->drap_reg = CFI_UNDEFINED; ++ state->drap = false; ++ break; ++ } ++ ++ if (op->dest.reg == state->cfa.base) { ++ WARN_FUNC("unsupported stack register modification", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ++ break; ++ ++ case OP_SRC_AND: ++ if (op->dest.reg != CFI_SP || ++ (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) || ++ (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) { ++ WARN_FUNC("unsupported stack pointer realignment", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ++ if (state->drap_reg != CFI_UNDEFINED) { ++ /* drap: and imm, %rsp */ ++ cfa->base = state->drap_reg; ++ cfa->offset = state->stack_size = 0; ++ state->drap = true; ++ } ++ ++ /* ++ * Older versions of GCC (4.8ish) realign the stack ++ * without DRAP, with a frame pointer. ++ */ ++ ++ break; ++ ++ case OP_SRC_POP: ++ if (!state->drap && op->dest.type == OP_DEST_REG && ++ op->dest.reg == cfa->base) { ++ ++ /* pop %rbp */ ++ cfa->base = CFI_SP; ++ } ++ ++ if (state->drap && cfa->base == CFI_BP_INDIRECT && ++ op->dest.type == OP_DEST_REG && ++ op->dest.reg == state->drap_reg && ++ state->drap_offset == -state->stack_size) { ++ ++ /* drap: pop %drap */ ++ cfa->base = state->drap_reg; ++ cfa->offset = 0; ++ state->drap_offset = -1; ++ ++ } else if (regs[op->dest.reg].offset == -state->stack_size) { ++ ++ /* pop %reg */ ++ restore_reg(state, op->dest.reg); ++ } ++ ++ state->stack_size -= 8; ++ if (cfa->base == CFI_SP) ++ cfa->offset -= 8; ++ ++ break; ++ ++ case OP_SRC_REG_INDIRECT: ++ if (state->drap && op->src.reg == CFI_BP && ++ op->src.offset == state->drap_offset) { ++ ++ /* drap: mov disp(%rbp), %drap */ ++ cfa->base = state->drap_reg; ++ cfa->offset = 0; ++ state->drap_offset = -1; ++ } ++ ++ if (state->drap && op->src.reg == CFI_BP && ++ op->src.offset == regs[op->dest.reg].offset) { ++ ++ /* drap: mov disp(%rbp), %reg */ ++ restore_reg(state, op->dest.reg); ++ ++ } else if (op->src.reg == cfa->base && ++ op->src.offset == regs[op->dest.reg].offset + cfa->offset) { ++ ++ /* mov disp(%rbp), %reg */ ++ /* mov disp(%rsp), %reg */ ++ restore_reg(state, op->dest.reg); ++ } ++ ++ break; ++ ++ default: ++ WARN_FUNC("unknown stack-related instruction", ++ insn->sec, insn->offset); ++ return -1; + } + +- insn->visited = true; +- insn->state = state; ++ break; + +- list_for_each_entry(alt, &insn->alts, list) { +- ret = validate_branch(file, alt->insn, state); +- if (ret) ++ case OP_DEST_PUSH: ++ state->stack_size += 8; ++ if (cfa->base == CFI_SP) ++ cfa->offset += 8; ++ ++ if (op->src.type != OP_SRC_REG) ++ break; ++ ++ if (state->drap) { ++ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { ++ ++ /* drap: push %drap */ ++ cfa->base = CFI_BP_INDIRECT; ++ cfa->offset = -state->stack_size; ++ ++ /* save drap so we know when to restore it */ ++ state->drap_offset = -state->stack_size; ++ ++ } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) { ++ ++ /* drap: push %rbp */ ++ state->stack_size = 0; ++ ++ } else if (regs[op->src.reg].base == CFI_UNDEFINED) { ++ ++ /* drap: push %reg */ ++ save_reg(state, op->src.reg, CFI_BP, -state->stack_size); ++ } ++ ++ } else { ++ ++ /* push %reg */ ++ save_reg(state, op->src.reg, CFI_CFA, -state->stack_size); ++ } ++ ++ /* detect when asm code uses rbp as a scratch register */ ++ if (!no_fp && insn->func && op->src.reg == CFI_BP && ++ cfa->base != CFI_BP) ++ state->bp_scratch = true; ++ break; ++ ++ case OP_DEST_REG_INDIRECT: ++ ++ if (state->drap) { ++ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { ++ ++ /* drap: mov %drap, disp(%rbp) */ ++ cfa->base = CFI_BP_INDIRECT; ++ cfa->offset = op->dest.offset; ++ ++ /* save drap offset so we know when to restore it */ ++ state->drap_offset = op->dest.offset; ++ } ++ ++ else if (regs[op->src.reg].base == CFI_UNDEFINED) { ++ ++ /* drap: mov reg, disp(%rbp) */ ++ save_reg(state, op->src.reg, CFI_BP, op->dest.offset); ++ } ++ ++ } else if (op->dest.reg == cfa->base) { ++ ++ /* mov reg, disp(%rbp) */ ++ /* mov reg, disp(%rsp) */ ++ save_reg(state, op->src.reg, CFI_CFA, ++ op->dest.offset - state->cfa.offset); ++ } ++ ++ break; ++ ++ case OP_DEST_LEAVE: ++ if ((!state->drap && cfa->base != CFI_BP) || ++ (state->drap && cfa->base != state->drap_reg)) { ++ WARN_FUNC("leave instruction with modified stack frame", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ++ /* leave (mov %rbp, %rsp; pop %rbp) */ ++ ++ state->stack_size = -state->regs[CFI_BP].offset - 8; ++ restore_reg(state, CFI_BP); ++ ++ if (!state->drap) { ++ cfa->base = CFI_SP; ++ cfa->offset -= 8; ++ } ++ ++ break; ++ ++ case OP_DEST_MEM: ++ if (op->src.type != OP_SRC_POP) { ++ WARN_FUNC("unknown stack-related memory operation", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ++ /* pop mem */ ++ state->stack_size -= 8; ++ if (cfa->base == CFI_SP) ++ cfa->offset -= 8; ++ ++ break; ++ ++ default: ++ WARN_FUNC("unknown stack-related instruction", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static bool insn_state_match(struct instruction *insn, struct insn_state *state) ++{ ++ struct insn_state *state1 = &insn->state, *state2 = state; ++ int i; ++ ++ if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) { ++ WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", ++ insn->sec, insn->offset, ++ state1->cfa.base, state1->cfa.offset, ++ state2->cfa.base, state2->cfa.offset); ++ ++ } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) { ++ for (i = 0; i < CFI_NUM_REGS; i++) { ++ if (!memcmp(&state1->regs[i], &state2->regs[i], ++ sizeof(struct cfi_reg))) ++ continue; ++ ++ WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d", ++ insn->sec, insn->offset, ++ i, state1->regs[i].base, state1->regs[i].offset, ++ i, state2->regs[i].base, state2->regs[i].offset); ++ break; ++ } ++ ++ } else if (state1->type != state2->type) { ++ WARN_FUNC("stack state mismatch: type1=%d type2=%d", ++ insn->sec, insn->offset, state1->type, state2->type); ++ ++ } else if (state1->drap != state2->drap || ++ (state1->drap && state1->drap_reg != state2->drap_reg) || ++ (state1->drap && state1->drap_offset != state2->drap_offset)) { ++ WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)", ++ insn->sec, insn->offset, ++ state1->drap, state1->drap_reg, state1->drap_offset, ++ state2->drap, state2->drap_reg, state2->drap_offset); ++ ++ } else ++ return true; ++ ++ return false; ++} ++ ++/* ++ * Follow the branch starting at the given instruction, and recursively follow ++ * any other branches (jumps). Meanwhile, track the frame pointer state at ++ * each instruction and validate all the rules described in ++ * tools/objtool/Documentation/stack-validation.txt. ++ */ ++static int validate_branch(struct objtool_file *file, struct instruction *first, ++ struct insn_state state) ++{ ++ struct alternative *alt; ++ struct instruction *insn, *next_insn; ++ struct section *sec; ++ struct symbol *func = NULL; ++ int ret; ++ ++ insn = first; ++ sec = insn->sec; ++ ++ if (insn->alt_group && list_empty(&insn->alts)) { ++ WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ while (1) { ++ next_insn = next_insn_same_sec(file, insn); ++ ++ ++ if (file->c_file && func && insn->func && func != insn->func) { ++ WARN("%s() falls through to next function %s()", ++ func->name, insn->func->name); ++ return 1; ++ } ++ ++ if (insn->func) ++ func = insn->func; ++ ++ if (func && insn->ignore) { ++ WARN_FUNC("BUG: why am I validating an ignored function?", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ if (insn->visited) { ++ if (!insn->hint && !insn_state_match(insn, &state)) + return 1; ++ ++ return 0; + } + +- switch (insn->type) { ++ if (insn->hint) { ++ if (insn->restore) { ++ struct instruction *save_insn, *i; ++ ++ i = insn; ++ save_insn = NULL; ++ func_for_each_insn_continue_reverse(file, func, i) { ++ if (i->save) { ++ save_insn = i; ++ break; ++ } ++ } + +- case INSN_FP_SAVE: +- if (!nofp) { +- if (state & STATE_FP_SAVED) { +- WARN_FUNC("duplicate frame pointer save", ++ if (!save_insn) { ++ WARN_FUNC("no corresponding CFI save for CFI restore", + sec, insn->offset); + return 1; + } +- state |= STATE_FP_SAVED; +- } +- break; + +- case INSN_FP_SETUP: +- if (!nofp) { +- if (state & STATE_FP_SETUP) { +- WARN_FUNC("duplicate frame pointer setup", ++ if (!save_insn->visited) { ++ /* ++ * Oops, no state to copy yet. ++ * Hopefully we can reach this ++ * instruction from another branch ++ * after the save insn has been ++ * visited. ++ */ ++ if (insn == first) ++ return 0; ++ ++ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", + sec, insn->offset); + return 1; + } +- state |= STATE_FP_SETUP; ++ ++ insn->state = save_insn->state; + } +- break; + +- case INSN_FP_RESTORE: +- if (!nofp) { +- if (has_valid_stack_frame(insn)) +- state &= ~STATE_FP_SETUP; ++ state = insn->state; ++ ++ } else ++ insn->state = state; ++ ++ insn->visited = true; + +- state &= ~STATE_FP_SAVED; ++ if (!insn->ignore_alts) { ++ list_for_each_entry(alt, &insn->alts, list) { ++ ret = validate_branch(file, alt->insn, state); ++ if (ret) ++ return 1; + } +- break; ++ } ++ ++ switch (insn->type) { + + case INSN_RETURN: +- if (!nofp && has_modified_stack_frame(insn)) { +- WARN_FUNC("return without frame pointer restore", ++ if (func && has_modified_stack_frame(&state)) { ++ WARN_FUNC("return with modified stack frame", + sec, insn->offset); + return 1; + } ++ ++ if (state.bp_scratch) { ++ WARN("%s uses BP as a scratch register", ++ insn->func->name); ++ return 1; ++ } ++ + return 0; + + case INSN_CALL: +- if (is_fentry_call(insn)) { +- state |= STATE_FENTRY; ++ if (is_fentry_call(insn)) + break; +- } + + ret = dead_end_function(file, insn->call_dest); + if (ret == 1) +@@ -1085,7 +1853,7 @@ static int validate_branch(struct objtool_file *file, + + /* fallthrough */ + case INSN_CALL_DYNAMIC: +- if (!nofp && !has_valid_stack_frame(insn)) { ++ if (!no_fp && func && !has_valid_stack_frame(&state)) { + WARN_FUNC("call without frame pointer save/setup", + sec, insn->offset); + return 1; +@@ -1094,16 +1862,19 @@ static int validate_branch(struct objtool_file *file, + + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: +- if (insn->jump_dest) { ++ if (insn->jump_dest && ++ (!func || !insn->jump_dest->func || ++ func == insn->jump_dest->func)) { + ret = validate_branch(file, insn->jump_dest, + state); + if (ret) + return 1; +- } else if (has_modified_stack_frame(insn)) { +- WARN_FUNC("sibling call from callable instruction with changed frame pointer", ++ ++ } else if (func && has_modified_stack_frame(&state)) { ++ WARN_FUNC("sibling call from callable instruction with modified stack frame", + sec, insn->offset); + return 1; +- } /* else it's a sibling call */ ++ } + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; +@@ -1111,15 +1882,29 @@ static int validate_branch(struct objtool_file *file, + break; + + case INSN_JUMP_DYNAMIC: +- if (list_empty(&insn->alts) && +- has_modified_stack_frame(insn)) { +- WARN_FUNC("sibling call from callable instruction with changed frame pointer", ++ if (func && list_empty(&insn->alts) && ++ has_modified_stack_frame(&state)) { ++ WARN_FUNC("sibling call from callable instruction with modified stack frame", + sec, insn->offset); + return 1; + } + + return 0; + ++ case INSN_CONTEXT_SWITCH: ++ if (func && (!next_insn || !next_insn->hint)) { ++ WARN_FUNC("unsupported instruction in callable function", ++ sec, insn->offset); ++ return 1; ++ } ++ return 0; ++ ++ case INSN_STACK: ++ if (update_insn_state(insn, &state)) ++ return 1; ++ ++ break; ++ + default: + break; + } +@@ -1127,16 +1912,72 @@ static int validate_branch(struct objtool_file *file, + if (insn->dead_end) + return 0; + +- insn = next_insn_same_sec(file, insn); +- if (!insn) { ++ if (!next_insn) { ++ if (state.cfa.base == CFI_UNDEFINED) ++ return 0; + WARN("%s: unexpected end of section", sec->name); + return 1; + } ++ ++ insn = next_insn; + } + + return 0; + } + ++static int validate_unwind_hints(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ int ret, warnings = 0; ++ struct insn_state state; ++ ++ if (!file->hints) ++ return 0; ++ ++ clear_insn_state(&state); ++ ++ for_each_insn(file, insn) { ++ if (insn->hint && !insn->visited) { ++ ret = validate_branch(file, insn, state); ++ warnings += ret; ++ } ++ } ++ ++ return warnings; ++} ++ ++static int validate_retpoline(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ int warnings = 0; ++ ++ for_each_insn(file, insn) { ++ if (insn->type != INSN_JUMP_DYNAMIC && ++ insn->type != INSN_CALL_DYNAMIC) ++ continue; ++ ++ if (insn->retpoline_safe) ++ continue; ++ ++ /* ++ * .init.text code is ran before userspace and thus doesn't ++ * strictly need retpolines, except for modules which are ++ * loaded late, they very much do need retpoline in their ++ * .init.text ++ */ ++ if (!strcmp(insn->sec->name, ".init.text") && !module) ++ continue; ++ ++ WARN_FUNC("indirect %s found in RETPOLINE build", ++ insn->sec, insn->offset, ++ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ ++ warnings++; ++ } ++ ++ return warnings; ++} ++ + static bool is_kasan_insn(struct instruction *insn) + { + return (insn->type == INSN_CALL && +@@ -1150,12 +1991,23 @@ static bool is_ubsan_insn(struct instruction *insn) + "__ubsan_handle_builtin_unreachable")); + } + +-static bool ignore_unreachable_insn(struct symbol *func, +- struct instruction *insn) ++static bool ignore_unreachable_insn(struct instruction *insn) + { + int i; + +- if (insn->type == INSN_NOP) ++ if (insn->ignore || insn->type == INSN_NOP) ++ return true; ++ ++ /* ++ * Ignore any unused exceptions. This can happen when a whitelisted ++ * function has an exception table entry. ++ * ++ * Also ignore alternative replacement instructions. This can happen ++ * when a whitelisted function uses one of the ALTERNATIVE macros. ++ */ ++ if (!strcmp(insn->sec->name, ".fixup") || ++ !strcmp(insn->sec->name, ".altinstr_replacement") || ++ !strcmp(insn->sec->name, ".altinstr_aux")) + return true; + + /* +@@ -1164,18 +2016,26 @@ static bool ignore_unreachable_insn(struct symbol *func, + * + * End the search at 5 instructions to avoid going into the weeds. + */ ++ if (!insn->func) ++ return false; + for (i = 0; i < 5; i++) { + + if (is_kasan_insn(insn) || is_ubsan_insn(insn)) + return true; + +- if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { +- insn = insn->jump_dest; +- continue; ++ if (insn->type == INSN_JUMP_UNCONDITIONAL) { ++ if (insn->jump_dest && ++ insn->jump_dest->func == insn->func) { ++ insn = insn->jump_dest; ++ continue; ++ } ++ ++ break; + } + +- if (insn->offset + insn->len >= func->offset + func->len) ++ if (insn->offset + insn->len >= insn->func->offset + insn->func->len) + break; ++ + insn = list_next_entry(insn, list); + } + +@@ -1187,81 +2047,49 @@ static int validate_functions(struct objtool_file *file) + struct section *sec; + struct symbol *func; + struct instruction *insn; ++ struct insn_state state; + int ret, warnings = 0; + +- list_for_each_entry(sec, &file->elf->sections, list) { ++ clear_insn_state(&state); ++ ++ state.cfa = initial_func_cfi.cfa; ++ memcpy(&state.regs, &initial_func_cfi.regs, ++ CFI_NUM_REGS * sizeof(struct cfi_reg)); ++ state.stack_size = initial_func_cfi.cfa.offset; ++ ++ for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + insn = find_insn(file, sec, func->offset); +- if (!insn) ++ if (!insn || insn->ignore) + continue; + +- ret = validate_branch(file, insn, 0); ++ ret = validate_branch(file, insn, state); + warnings += ret; + } + } + +- list_for_each_entry(sec, &file->elf->sections, list) { +- list_for_each_entry(func, &sec->symbol_list, list) { +- if (func->type != STT_FUNC) +- continue; +- +- func_for_each_insn(file, func, insn) { +- if (insn->visited) +- continue; +- +- insn->visited = true; +- +- if (file->ignore_unreachables || warnings || +- ignore_unreachable_insn(func, insn)) +- continue; +- +- /* +- * gcov produces a lot of unreachable +- * instructions. If we get an unreachable +- * warning and the file has gcov enabled, just +- * ignore it, and all other such warnings for +- * the file. +- */ +- if (!file->ignore_unreachables && +- gcov_enabled(file)) { +- file->ignore_unreachables = true; +- continue; +- } +- +- WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); +- warnings++; +- } +- } +- } +- + return warnings; + } + +-static int validate_uncallable_instructions(struct objtool_file *file) ++static int validate_reachable_instructions(struct objtool_file *file) + { + struct instruction *insn; +- int warnings = 0; + +- for_each_insn(file, insn) { +- if (!insn->visited && insn->type == INSN_RETURN) { ++ if (file->ignore_unreachables) ++ return 0; + +- /* +- * Don't warn about call instructions in unvisited +- * retpoline alternatives. +- */ +- if (!strcmp(insn->sec->name, ".altinstr_replacement")) +- continue; ++ for_each_insn(file, insn) { ++ if (insn->visited || ignore_unreachable_insn(insn)) ++ continue; + +- WARN_FUNC("return instruction outside of a callable function", +- insn->sec, insn->offset); +- warnings++; +- } ++ WARN_FUNC("unreachable instruction", insn->sec, insn->offset); ++ return 1; + } + +- return warnings; ++ return 0; + } + + static void cleanup(struct objtool_file *file) +@@ -1281,42 +2109,73 @@ static void cleanup(struct objtool_file *file) + elf_close(file->elf); + } + +-int check(const char *_objname, bool _nofp) ++int check(const char *_objname, bool orc) + { + struct objtool_file file; + int ret, warnings = 0; + + objname = _objname; +- nofp = _nofp; + +- file.elf = elf_open(objname); +- if (!file.elf) { +- fprintf(stderr, "error reading elf file %s\n", objname); ++ file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); ++ if (!file.elf) + return 1; +- } + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); + file.rodata = find_section_by_name(file.elf, ".rodata"); +- file.ignore_unreachables = false; + file.c_file = find_section_by_name(file.elf, ".comment"); ++ file.ignore_unreachables = no_unreachable; ++ file.hints = false; ++ ++ arch_initial_func_cfi_state(&initial_func_cfi); + + ret = decode_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + ++ if (list_empty(&file.insn_list)) ++ goto out; ++ ++ if (retpoline) { ++ ret = validate_retpoline(&file); ++ if (ret < 0) ++ return ret; ++ warnings += ret; ++ } ++ + ret = validate_functions(&file); + if (ret < 0) + goto out; + warnings += ret; + +- ret = validate_uncallable_instructions(&file); ++ ret = validate_unwind_hints(&file); + if (ret < 0) + goto out; + warnings += ret; + ++ if (!warnings) { ++ ret = validate_reachable_instructions(&file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; ++ } ++ ++ if (orc) { ++ ret = create_orc(&file); ++ if (ret < 0) ++ goto out; ++ ++ ret = create_orc_sections(&file); ++ if (ret < 0) ++ goto out; ++ ++ ret = elf_write(file.elf); ++ if (ret < 0) ++ goto out; ++ } ++ + out: + cleanup(&file); + +diff --git a/tools/objtool/check.h b/tools/objtool/check.h +index aca248a..c6b68fc 100644 +--- a/tools/objtool/check.h ++++ b/tools/objtool/check.h +@@ -20,22 +20,40 @@ + + #include <stdbool.h> + #include "elf.h" ++#include "cfi.h" + #include "arch.h" ++#include "orc.h" + #include <linux/hashtable.h> + ++struct insn_state { ++ struct cfi_reg cfa; ++ struct cfi_reg regs[CFI_NUM_REGS]; ++ int stack_size; ++ unsigned char type; ++ bool bp_scratch; ++ bool drap; ++ int drap_reg, drap_offset; ++ struct cfi_reg vals[CFI_NUM_REGS]; ++}; ++ + struct instruction { + struct list_head list; + struct hlist_node hash; + struct section *sec; + unsigned long offset; +- unsigned int len, state; ++ unsigned int len; + unsigned char type; + unsigned long immediate; +- bool alt_group, visited, dead_end, ignore_alts; ++ bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; ++ bool retpoline_safe; + struct symbol *call_dest; + struct instruction *jump_dest; ++ struct instruction *first_jump_src; + struct list_head alts; + struct symbol *func; ++ struct stack_op stack_op; ++ struct insn_state state; ++ struct orc_entry orc; + }; + + struct objtool_file { +@@ -43,9 +61,22 @@ struct objtool_file { + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 16); + struct section *rodata, *whitelist; +- bool ignore_unreachables, c_file; ++ bool ignore_unreachables, c_file, hints; + }; + +-int check(const char *objname, bool nofp); ++int check(const char *objname, bool orc); ++ ++struct instruction *find_insn(struct objtool_file *file, ++ struct section *sec, unsigned long offset); ++ ++#define for_each_insn(file, insn) \ ++ list_for_each_entry(insn, &file->insn_list, list) ++ ++#define sec_for_each_insn(file, sec, insn) \ ++ for (insn = find_insn(file, sec, 0); \ ++ insn && &insn->list != &file->insn_list && \ ++ insn->sec == sec; \ ++ insn = list_next_entry(insn, list)) ++ + + #endif /* _CHECK_H */ +diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c +index 14a74d4..b31b7a6 100644 +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -31,13 +31,6 @@ + #include "elf.h" + #include "warn.h" + +-/* +- * Fallback for systems without this "read, mmaping if possible" cmd. +- */ +-#ifndef ELF_C_READ_MMAP +-#define ELF_C_READ_MMAP ELF_C_READ +-#endif +- + struct section *find_section_by_name(struct elf *elf, const char *name) + { + struct section *sec; +@@ -128,12 +121,12 @@ static int read_sections(struct elf *elf) + int i; + + if (elf_getshdrnum(elf->elf, §ions_nr)) { +- perror("elf_getshdrnum"); ++ WARN_ELF("elf_getshdrnum"); + return -1; + } + + if (elf_getshdrstrndx(elf->elf, &shstrndx)) { +- perror("elf_getshdrstrndx"); ++ WARN_ELF("elf_getshdrstrndx"); + return -1; + } + +@@ -154,37 +147,37 @@ static int read_sections(struct elf *elf) + + s = elf_getscn(elf->elf, i); + if (!s) { +- perror("elf_getscn"); ++ WARN_ELF("elf_getscn"); + return -1; + } + + sec->idx = elf_ndxscn(s); + + if (!gelf_getshdr(s, &sec->sh)) { +- perror("gelf_getshdr"); ++ WARN_ELF("gelf_getshdr"); + return -1; + } + + sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name); + if (!sec->name) { +- perror("elf_strptr"); +- return -1; +- } +- +- sec->elf_data = elf_getdata(s, NULL); +- if (!sec->elf_data) { +- perror("elf_getdata"); ++ WARN_ELF("elf_strptr"); + return -1; + } + +- if (sec->elf_data->d_off != 0 || +- sec->elf_data->d_size != sec->sh.sh_size) { +- WARN("unexpected data attributes for %s", sec->name); +- return -1; ++ if (sec->sh.sh_size != 0) { ++ sec->data = elf_getdata(s, NULL); ++ if (!sec->data) { ++ WARN_ELF("elf_getdata"); ++ return -1; ++ } ++ if (sec->data->d_off != 0 || ++ sec->data->d_size != sec->sh.sh_size) { ++ WARN("unexpected data attributes for %s", ++ sec->name); ++ return -1; ++ } + } +- +- sec->data = (unsigned long)sec->elf_data->d_buf; +- sec->len = sec->elf_data->d_size; ++ sec->len = sec->sh.sh_size; + } + + /* sanity check, one more call to elf_nextscn() should return NULL */ +@@ -221,15 +214,15 @@ static int read_symbols(struct elf *elf) + + sym->idx = i; + +- if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) { +- perror("gelf_getsym"); ++ if (!gelf_getsym(symtab->data, i, &sym->sym)) { ++ WARN_ELF("gelf_getsym"); + goto err; + } + + sym->name = elf_strptr(elf->elf, symtab->sh.sh_link, + sym->sym.st_name); + if (!sym->name) { +- perror("elf_strptr"); ++ WARN_ELF("elf_strptr"); + goto err; + } + +@@ -311,8 +304,8 @@ static int read_relas(struct elf *elf) + } + memset(rela, 0, sizeof(*rela)); + +- if (!gelf_getrela(sec->elf_data, i, &rela->rela)) { +- perror("gelf_getrela"); ++ if (!gelf_getrela(sec->data, i, &rela->rela)) { ++ WARN_ELF("gelf_getrela"); + return -1; + } + +@@ -336,9 +329,10 @@ static int read_relas(struct elf *elf) + return 0; + } + +-struct elf *elf_open(const char *name) ++struct elf *elf_open(const char *name, int flags) + { + struct elf *elf; ++ Elf_Cmd cmd; + + elf_version(EV_CURRENT); + +@@ -351,27 +345,28 @@ struct elf *elf_open(const char *name) + + INIT_LIST_HEAD(&elf->sections); + +- elf->name = strdup(name); +- if (!elf->name) { +- perror("strdup"); +- goto err; +- } +- +- elf->fd = open(name, O_RDONLY); ++ elf->fd = open(name, flags); + if (elf->fd == -1) { + fprintf(stderr, "objtool: Can't open '%s': %s\n", + name, strerror(errno)); + goto err; + } + +- elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL); ++ if ((flags & O_ACCMODE) == O_RDONLY) ++ cmd = ELF_C_READ_MMAP; ++ else if ((flags & O_ACCMODE) == O_RDWR) ++ cmd = ELF_C_RDWR; ++ else /* O_WRONLY */ ++ cmd = ELF_C_WRITE; ++ ++ elf->elf = elf_begin(elf->fd, cmd, NULL); + if (!elf->elf) { +- perror("elf_begin"); ++ WARN_ELF("elf_begin"); + goto err; + } + + if (!gelf_getehdr(elf->elf, &elf->ehdr)) { +- perror("gelf_getehdr"); ++ WARN_ELF("gelf_getehdr"); + goto err; + } + +@@ -391,12 +386,212 @@ struct elf *elf_open(const char *name) + return NULL; + } + ++struct section *elf_create_section(struct elf *elf, const char *name, ++ size_t entsize, int nr) ++{ ++ struct section *sec, *shstrtab; ++ size_t size = entsize * nr; ++ struct Elf_Scn *s; ++ Elf_Data *data; ++ ++ sec = malloc(sizeof(*sec)); ++ if (!sec) { ++ perror("malloc"); ++ return NULL; ++ } ++ memset(sec, 0, sizeof(*sec)); ++ ++ INIT_LIST_HEAD(&sec->symbol_list); ++ INIT_LIST_HEAD(&sec->rela_list); ++ hash_init(sec->rela_hash); ++ hash_init(sec->symbol_hash); ++ ++ list_add_tail(&sec->list, &elf->sections); ++ ++ s = elf_newscn(elf->elf); ++ if (!s) { ++ WARN_ELF("elf_newscn"); ++ return NULL; ++ } ++ ++ sec->name = strdup(name); ++ if (!sec->name) { ++ perror("strdup"); ++ return NULL; ++ } ++ ++ sec->idx = elf_ndxscn(s); ++ sec->len = size; ++ sec->changed = true; ++ ++ sec->data = elf_newdata(s); ++ if (!sec->data) { ++ WARN_ELF("elf_newdata"); ++ return NULL; ++ } ++ ++ sec->data->d_size = size; ++ sec->data->d_align = 1; ++ ++ if (size) { ++ sec->data->d_buf = malloc(size); ++ if (!sec->data->d_buf) { ++ perror("malloc"); ++ return NULL; ++ } ++ memset(sec->data->d_buf, 0, size); ++ } ++ ++ if (!gelf_getshdr(s, &sec->sh)) { ++ WARN_ELF("gelf_getshdr"); ++ return NULL; ++ } ++ ++ sec->sh.sh_size = size; ++ sec->sh.sh_entsize = entsize; ++ sec->sh.sh_type = SHT_PROGBITS; ++ sec->sh.sh_addralign = 1; ++ sec->sh.sh_flags = SHF_ALLOC; ++ ++ ++ /* Add section name to .shstrtab */ ++ shstrtab = find_section_by_name(elf, ".shstrtab"); ++ if (!shstrtab) { ++ WARN("can't find .shstrtab section"); ++ return NULL; ++ } ++ ++ s = elf_getscn(elf->elf, shstrtab->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return NULL; ++ } ++ ++ data = elf_newdata(s); ++ if (!data) { ++ WARN_ELF("elf_newdata"); ++ return NULL; ++ } ++ ++ data->d_buf = sec->name; ++ data->d_size = strlen(name) + 1; ++ data->d_align = 1; ++ ++ sec->sh.sh_name = shstrtab->len; ++ ++ shstrtab->len += strlen(name) + 1; ++ shstrtab->changed = true; ++ ++ return sec; ++} ++ ++struct section *elf_create_rela_section(struct elf *elf, struct section *base) ++{ ++ char *relaname; ++ struct section *sec; ++ ++ relaname = malloc(strlen(base->name) + strlen(".rela") + 1); ++ if (!relaname) { ++ perror("malloc"); ++ return NULL; ++ } ++ strcpy(relaname, ".rela"); ++ strcat(relaname, base->name); ++ ++ sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0); ++ free(relaname); ++ if (!sec) ++ return NULL; ++ ++ base->rela = sec; ++ sec->base = base; ++ ++ sec->sh.sh_type = SHT_RELA; ++ sec->sh.sh_addralign = 8; ++ sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx; ++ sec->sh.sh_info = base->idx; ++ sec->sh.sh_flags = SHF_INFO_LINK; ++ ++ return sec; ++} ++ ++int elf_rebuild_rela_section(struct section *sec) ++{ ++ struct rela *rela; ++ int nr, idx = 0, size; ++ GElf_Rela *relas; ++ ++ nr = 0; ++ list_for_each_entry(rela, &sec->rela_list, list) ++ nr++; ++ ++ size = nr * sizeof(*relas); ++ relas = malloc(size); ++ if (!relas) { ++ perror("malloc"); ++ return -1; ++ } ++ ++ sec->data->d_buf = relas; ++ sec->data->d_size = size; ++ ++ sec->sh.sh_size = size; ++ ++ idx = 0; ++ list_for_each_entry(rela, &sec->rela_list, list) { ++ relas[idx].r_offset = rela->offset; ++ relas[idx].r_addend = rela->addend; ++ relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type); ++ idx++; ++ } ++ ++ return 0; ++} ++ ++int elf_write(struct elf *elf) ++{ ++ struct section *sec; ++ Elf_Scn *s; ++ ++ /* Update section headers for changed sections: */ ++ list_for_each_entry(sec, &elf->sections, list) { ++ if (sec->changed) { ++ s = elf_getscn(elf->elf, sec->idx); ++ if (!s) { ++ WARN_ELF("elf_getscn"); ++ return -1; ++ } ++ if (!gelf_update_shdr(s, &sec->sh)) { ++ WARN_ELF("gelf_update_shdr"); ++ return -1; ++ } ++ } ++ } ++ ++ /* Make sure the new section header entries get updated properly. */ ++ elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY); ++ ++ /* Write all changes to the file. */ ++ if (elf_update(elf->elf, ELF_C_WRITE) < 0) { ++ WARN_ELF("elf_update"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + void elf_close(struct elf *elf) + { + struct section *sec, *tmpsec; + struct symbol *sym, *tmpsym; + struct rela *rela, *tmprela; + ++ if (elf->elf) ++ elf_end(elf->elf); ++ ++ if (elf->fd > 0) ++ close(elf->fd); ++ + list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) { + list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) { + list_del(&sym->list); +@@ -411,11 +606,6 @@ void elf_close(struct elf *elf) + list_del(&sec->list); + free(sec); + } +- if (elf->name) +- free(elf->name); +- if (elf->fd > 0) +- close(elf->fd); +- if (elf->elf) +- elf_end(elf->elf); ++ + free(elf); + } +diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h +index aa1ff65..440b83b 100644 +--- a/tools/objtool/elf.h ++++ b/tools/objtool/elf.h +@@ -28,6 +28,13 @@ + # define elf_getshdrstrndx elf_getshstrndx + #endif + ++/* ++ * Fallback for systems without this "read, mmaping if possible" cmd. ++ */ ++#ifndef ELF_C_READ_MMAP ++#define ELF_C_READ_MMAP ELF_C_READ ++#endif ++ + struct section { + struct list_head list; + GElf_Shdr sh; +@@ -37,11 +44,11 @@ struct section { + DECLARE_HASHTABLE(rela_hash, 16); + struct section *base, *rela; + struct symbol *sym; +- Elf_Data *elf_data; ++ Elf_Data *data; + char *name; + int idx; +- unsigned long data; + unsigned int len; ++ bool changed, text; + }; + + struct symbol { +@@ -76,15 +83,21 @@ struct elf { + }; + + +-struct elf *elf_open(const char *name); ++struct elf *elf_open(const char *name, int flags); + struct section *find_section_by_name(struct elf *elf, const char *name); + struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); + struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); + struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, + unsigned int len); + struct symbol *find_containing_func(struct section *sec, unsigned long offset); ++struct section *elf_create_section(struct elf *elf, const char *name, size_t ++ entsize, int nr); ++struct section *elf_create_rela_section(struct elf *elf, struct section *base); ++int elf_rebuild_rela_section(struct section *sec); ++int elf_write(struct elf *elf); + void elf_close(struct elf *elf); + +- ++#define for_each_sec(file, sec) \ ++ list_for_each_entry(sec, &file->elf->sections, list) + + #endif /* _OBJTOOL_ELF_H */ +diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c +index 46c326d..07f3299 100644 +--- a/tools/objtool/objtool.c ++++ b/tools/objtool/objtool.c +@@ -31,11 +31,10 @@ + #include <stdlib.h> + #include <subcmd/exec-cmd.h> + #include <subcmd/pager.h> ++#include <linux/kernel.h> + + #include "builtin.h" + +-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +- + struct cmd_struct { + const char *name; + int (*fn)(int, const char **); +@@ -43,10 +42,11 @@ struct cmd_struct { + }; + + static const char objtool_usage_string[] = +- "objtool [OPTIONS] COMMAND [ARGS]"; ++ "objtool COMMAND [ARGS]"; + + static struct cmd_struct objtool_cmds[] = { + {"check", cmd_check, "Perform stack metadata validation on an object file" }, ++ {"orc", cmd_orc, "Generate in-place ORC unwind tables for an object file" }, + }; + + bool help; +@@ -70,7 +70,7 @@ static void cmd_usage(void) + + printf("\n"); + +- exit(1); ++ exit(129); + } + + static void handle_options(int *argc, const char ***argv) +@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv) + break; + } else { + fprintf(stderr, "Unknown option: %s\n", cmd); +- fprintf(stderr, "\n Usage: %s\n", +- objtool_usage_string); +- exit(1); ++ cmd_usage(); + } + + (*argv)++; +diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h +new file mode 100644 +index 0000000..b0e92a6 +--- /dev/null ++++ b/tools/objtool/orc.h +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#ifndef _ORC_H ++#define _ORC_H ++ ++#include <asm/orc_types.h> ++ ++struct objtool_file; ++ ++int create_orc(struct objtool_file *file); ++int create_orc_sections(struct objtool_file *file); ++ ++int orc_dump(const char *objname); ++ ++#endif /* _ORC_H */ +diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c +new file mode 100644 +index 0000000..c334382 +--- /dev/null ++++ b/tools/objtool/orc_dump.c +@@ -0,0 +1,213 @@ ++/* ++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#include <unistd.h> ++#include "orc.h" ++#include "warn.h" ++ ++static const char *reg_name(unsigned int reg) ++{ ++ switch (reg) { ++ case ORC_REG_PREV_SP: ++ return "prevsp"; ++ case ORC_REG_DX: ++ return "dx"; ++ case ORC_REG_DI: ++ return "di"; ++ case ORC_REG_BP: ++ return "bp"; ++ case ORC_REG_SP: ++ return "sp"; ++ case ORC_REG_R10: ++ return "r10"; ++ case ORC_REG_R13: ++ return "r13"; ++ case ORC_REG_BP_INDIRECT: ++ return "bp(ind)"; ++ case ORC_REG_SP_INDIRECT: ++ return "sp(ind)"; ++ default: ++ return "?"; ++ } ++} ++ ++static const char *orc_type_name(unsigned int type) ++{ ++ switch (type) { ++ case ORC_TYPE_CALL: ++ return "call"; ++ case ORC_TYPE_REGS: ++ return "regs"; ++ case ORC_TYPE_REGS_IRET: ++ return "iret"; ++ default: ++ return "?"; ++ } ++} ++ ++static void print_reg(unsigned int reg, int offset) ++{ ++ if (reg == ORC_REG_BP_INDIRECT) ++ printf("(bp%+d)", offset); ++ else if (reg == ORC_REG_SP_INDIRECT) ++ printf("(sp%+d)", offset); ++ else if (reg == ORC_REG_UNDEFINED) ++ printf("(und)"); ++ else ++ printf("%s%+d", reg_name(reg), offset); ++} ++ ++int orc_dump(const char *_objname) ++{ ++ int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0; ++ struct orc_entry *orc = NULL; ++ char *name; ++ size_t nr_sections; ++ Elf64_Addr orc_ip_addr = 0; ++ size_t shstrtab_idx; ++ Elf *elf; ++ Elf_Scn *scn; ++ GElf_Shdr sh; ++ GElf_Rela rela; ++ GElf_Sym sym; ++ Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL; ++ ++ ++ objname = _objname; ++ ++ elf_version(EV_CURRENT); ++ ++ fd = open(objname, O_RDONLY); ++ if (fd == -1) { ++ perror("open"); ++ return -1; ++ } ++ ++ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); ++ if (!elf) { ++ WARN_ELF("elf_begin"); ++ return -1; ++ } ++ ++ if (elf_getshdrnum(elf, &nr_sections)) { ++ WARN_ELF("elf_getshdrnum"); ++ return -1; ++ } ++ ++ if (elf_getshdrstrndx(elf, &shstrtab_idx)) { ++ WARN_ELF("elf_getshdrstrndx"); ++ return -1; ++ } ++ ++ for (i = 0; i < nr_sections; i++) { ++ scn = elf_getscn(elf, i); ++ if (!scn) { ++ WARN_ELF("elf_getscn"); ++ return -1; ++ } ++ ++ if (!gelf_getshdr(scn, &sh)) { ++ WARN_ELF("gelf_getshdr"); ++ return -1; ++ } ++ ++ name = elf_strptr(elf, shstrtab_idx, sh.sh_name); ++ if (!name) { ++ WARN_ELF("elf_strptr"); ++ return -1; ++ } ++ ++ data = elf_getdata(scn, NULL); ++ if (!data) { ++ WARN_ELF("elf_getdata"); ++ return -1; ++ } ++ ++ if (!strcmp(name, ".symtab")) { ++ symtab = data; ++ } else if (!strcmp(name, ".orc_unwind")) { ++ orc = data->d_buf; ++ orc_size = sh.sh_size; ++ } else if (!strcmp(name, ".orc_unwind_ip")) { ++ orc_ip = data->d_buf; ++ orc_ip_addr = sh.sh_addr; ++ } else if (!strcmp(name, ".rela.orc_unwind_ip")) { ++ rela_orc_ip = data; ++ } ++ } ++ ++ if (!symtab || !orc || !orc_ip) ++ return 0; ++ ++ if (orc_size % sizeof(*orc) != 0) { ++ WARN("bad .orc_unwind section size"); ++ return -1; ++ } ++ ++ nr_entries = orc_size / sizeof(*orc); ++ for (i = 0; i < nr_entries; i++) { ++ if (rela_orc_ip) { ++ if (!gelf_getrela(rela_orc_ip, i, &rela)) { ++ WARN_ELF("gelf_getrela"); ++ return -1; ++ } ++ ++ if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) { ++ WARN_ELF("gelf_getsym"); ++ return -1; ++ } ++ ++ scn = elf_getscn(elf, sym.st_shndx); ++ if (!scn) { ++ WARN_ELF("elf_getscn"); ++ return -1; ++ } ++ ++ if (!gelf_getshdr(scn, &sh)) { ++ WARN_ELF("gelf_getshdr"); ++ return -1; ++ } ++ ++ name = elf_strptr(elf, shstrtab_idx, sh.sh_name); ++ if (!name || !*name) { ++ WARN_ELF("elf_strptr"); ++ return -1; ++ } ++ ++ printf("%s+%llx:", name, (unsigned long long)rela.r_addend); ++ ++ } else { ++ printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i])); ++ } ++ ++ ++ printf(" sp:"); ++ ++ print_reg(orc[i].sp_reg, orc[i].sp_offset); ++ ++ printf(" bp:"); ++ ++ print_reg(orc[i].bp_reg, orc[i].bp_offset); ++ ++ printf(" type:%s\n", orc_type_name(orc[i].type)); ++ } ++ ++ elf_end(elf); ++ close(fd); ++ ++ return 0; ++} +diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c +new file mode 100644 +index 0000000..18384d9 +--- /dev/null ++++ b/tools/objtool/orc_gen.c +@@ -0,0 +1,221 @@ ++/* ++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see <http://www.gnu.org/licenses/>. ++ */ ++ ++#include <stdlib.h> ++#include <string.h> ++ ++#include "orc.h" ++#include "check.h" ++#include "warn.h" ++ ++int create_orc(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ ++ for_each_insn(file, insn) { ++ struct orc_entry *orc = &insn->orc; ++ struct cfi_reg *cfa = &insn->state.cfa; ++ struct cfi_reg *bp = &insn->state.regs[CFI_BP]; ++ ++ if (cfa->base == CFI_UNDEFINED) { ++ orc->sp_reg = ORC_REG_UNDEFINED; ++ continue; ++ } ++ ++ switch (cfa->base) { ++ case CFI_SP: ++ orc->sp_reg = ORC_REG_SP; ++ break; ++ case CFI_SP_INDIRECT: ++ orc->sp_reg = ORC_REG_SP_INDIRECT; ++ break; ++ case CFI_BP: ++ orc->sp_reg = ORC_REG_BP; ++ break; ++ case CFI_BP_INDIRECT: ++ orc->sp_reg = ORC_REG_BP_INDIRECT; ++ break; ++ case CFI_R10: ++ orc->sp_reg = ORC_REG_R10; ++ break; ++ case CFI_R13: ++ orc->sp_reg = ORC_REG_R13; ++ break; ++ case CFI_DI: ++ orc->sp_reg = ORC_REG_DI; ++ break; ++ case CFI_DX: ++ orc->sp_reg = ORC_REG_DX; ++ break; ++ default: ++ WARN_FUNC("unknown CFA base reg %d", ++ insn->sec, insn->offset, cfa->base); ++ return -1; ++ } ++ ++ switch(bp->base) { ++ case CFI_UNDEFINED: ++ orc->bp_reg = ORC_REG_UNDEFINED; ++ break; ++ case CFI_CFA: ++ orc->bp_reg = ORC_REG_PREV_SP; ++ break; ++ case CFI_BP: ++ orc->bp_reg = ORC_REG_BP; ++ break; ++ default: ++ WARN_FUNC("unknown BP base reg %d", ++ insn->sec, insn->offset, bp->base); ++ return -1; ++ } ++ ++ orc->sp_offset = cfa->offset; ++ orc->bp_offset = bp->offset; ++ orc->type = insn->state.type; ++ } ++ ++ return 0; ++} ++ ++static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, ++ unsigned int idx, struct section *insn_sec, ++ unsigned long insn_off, struct orc_entry *o) ++{ ++ struct orc_entry *orc; ++ struct rela *rela; ++ ++ if (!insn_sec->sym) { ++ WARN("missing symbol for section %s", insn_sec->name); ++ return -1; ++ } ++ ++ /* populate ORC data */ ++ orc = (struct orc_entry *)u_sec->data->d_buf + idx; ++ memcpy(orc, o, sizeof(*orc)); ++ ++ /* populate rela for ip */ ++ rela = malloc(sizeof(*rela)); ++ if (!rela) { ++ perror("malloc"); ++ return -1; ++ } ++ memset(rela, 0, sizeof(*rela)); ++ ++ rela->sym = insn_sec->sym; ++ rela->addend = insn_off; ++ rela->type = R_X86_64_PC32; ++ rela->offset = idx * sizeof(int); ++ ++ list_add_tail(&rela->list, &ip_relasec->rela_list); ++ hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset); ++ ++ return 0; ++} ++ ++int create_orc_sections(struct objtool_file *file) ++{ ++ struct instruction *insn, *prev_insn; ++ struct section *sec, *u_sec, *ip_relasec; ++ unsigned int idx; ++ ++ struct orc_entry empty = { ++ .sp_reg = ORC_REG_UNDEFINED, ++ .bp_reg = ORC_REG_UNDEFINED, ++ .type = ORC_TYPE_CALL, ++ }; ++ ++ sec = find_section_by_name(file->elf, ".orc_unwind"); ++ if (sec) { ++ WARN("file already has .orc_unwind section, skipping"); ++ return -1; ++ } ++ ++ /* count the number of needed orcs */ ++ idx = 0; ++ for_each_sec(file, sec) { ++ if (!sec->text) ++ continue; ++ ++ prev_insn = NULL; ++ sec_for_each_insn(file, sec, insn) { ++ if (!prev_insn || ++ memcmp(&insn->orc, &prev_insn->orc, ++ sizeof(struct orc_entry))) { ++ idx++; ++ } ++ prev_insn = insn; ++ } ++ ++ /* section terminator */ ++ if (prev_insn) ++ idx++; ++ } ++ if (!idx) ++ return -1; ++ ++ ++ /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ ++ sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); ++ if (!sec) ++ return -1; ++ ++ ip_relasec = elf_create_rela_section(file->elf, sec); ++ if (!ip_relasec) ++ return -1; ++ ++ /* create .orc_unwind section */ ++ u_sec = elf_create_section(file->elf, ".orc_unwind", ++ sizeof(struct orc_entry), idx); ++ ++ /* populate sections */ ++ idx = 0; ++ for_each_sec(file, sec) { ++ if (!sec->text) ++ continue; ++ ++ prev_insn = NULL; ++ sec_for_each_insn(file, sec, insn) { ++ if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, ++ sizeof(struct orc_entry))) { ++ ++ if (create_orc_entry(u_sec, ip_relasec, idx, ++ insn->sec, insn->offset, ++ &insn->orc)) ++ return -1; ++ ++ idx++; ++ } ++ prev_insn = insn; ++ } ++ ++ /* section terminator */ ++ if (prev_insn) { ++ if (create_orc_entry(u_sec, ip_relasec, idx, ++ prev_insn->sec, ++ prev_insn->offset + prev_insn->len, ++ &empty)) ++ return -1; ++ ++ idx++; ++ } ++ } ++ ++ if (elf_rebuild_rela_section(ip_relasec)) ++ return -1; ++ ++ return 0; ++} +diff --git a/tools/objtool/special.c b/tools/objtool/special.c +index bff8abb..84f001d 100644 +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -91,16 +91,16 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, + alt->jump_or_nop = entry->jump_or_nop; + + if (alt->group) { +- alt->orig_len = *(unsigned char *)(sec->data + offset + ++ alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset + + entry->orig_len); +- alt->new_len = *(unsigned char *)(sec->data + offset + ++ alt->new_len = *(unsigned char *)(sec->data->d_buf + offset + + entry->new_len); + } + + if (entry->feature) { + unsigned short feature; + +- feature = *(unsigned short *)(sec->data + offset + ++ feature = *(unsigned short *)(sec->data->d_buf + offset + + entry->feature); + + /* +diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh +new file mode 100755 +index 0000000..1470e74 +--- /dev/null ++++ b/tools/objtool/sync-check.sh +@@ -0,0 +1,29 @@ ++#!/bin/sh ++# SPDX-License-Identifier: GPL-2.0 ++ ++FILES=' ++arch/x86/lib/insn.c ++arch/x86/lib/inat.c ++arch/x86/lib/x86-opcode-map.txt ++arch/x86/tools/gen-insn-attr-x86.awk ++arch/x86/include/asm/insn.h ++arch/x86/include/asm/inat.h ++arch/x86/include/asm/inat_types.h ++arch/x86/include/asm/orc_types.h ++' ++ ++check() ++{ ++ local file=$1 ++ ++ diff $file ../../$file > /dev/null || ++ echo "Warning: synced file at 'tools/objtool/$file' differs from latest kernel version at '$file'" ++} ++ ++if [ ! -d ../../kernel ] || [ ! -d ../../tools ] || [ ! -d ../objtool ]; then ++ exit 0 ++fi ++ ++for i in $FILES; do ++ check $i ++done +diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h +index ac7e075..afd9f7a 100644 +--- a/tools/objtool/warn.h ++++ b/tools/objtool/warn.h +@@ -18,6 +18,13 @@ + #ifndef _WARN_H + #define _WARN_H + ++#include <stdlib.h> ++#include <string.h> ++#include <sys/types.h> ++#include <sys/stat.h> ++#include <fcntl.h> ++#include "elf.h" ++ + extern const char *objname; + + static inline char *offstr(struct section *sec, unsigned long offset) +@@ -57,4 +64,7 @@ static inline char *offstr(struct section *sec, unsigned long offset) + free(_str); \ + }) + ++#define WARN_ELF(format, ...) \ ++ WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) ++ + #endif /* _WARN_H */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch new file mode 100644 index 00000000..6bdeb9f8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch @@ -0,0 +1,316 @@ +From 1a39c1b7d192d62e6d4203ea7acfc35eb3317c48 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf <jpoimboe@redhat.com> +Date: Wed, 28 Jun 2017 10:11:06 -0500 +Subject: [PATCH 89/93] objtool, x86: Add several functions and files to the + objtool whitelist + +commit c207aee48037abca71c669cbec407b9891965c34 upstream. + +In preparation for an objtool rewrite which will have broader checks, +whitelist functions and files which cause problems because they do +unusual things with the stack. + +These whitelists serve as a TODO list for which functions and files +don't yet have undwarf unwinder coverage. Eventually most of the +whitelists can be removed in favor of manual CFI hint annotations or +objtool improvements. + +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Jiri Slaby <jslaby@suse.cz> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: live-patching@vger.kernel.org +Link: http://lkml.kernel.org/r/7f934a5d707a574bda33ea282e9478e627fb1829.1498659915.git.jpoimboe@redhat.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/crypto/Makefile | 2 ++ + arch/x86/crypto/sha1-mb/Makefile | 2 ++ + arch/x86/crypto/sha256-mb/Makefile | 2 ++ + arch/x86/kernel/Makefile | 1 + + arch/x86/kernel/acpi/Makefile | 2 ++ + arch/x86/kernel/kprobes/opt.c | 9 ++++++++- + arch/x86/kernel/reboot.c | 2 ++ + arch/x86/kvm/svm.c | 2 ++ + arch/x86/kvm/vmx.c | 3 +++ + arch/x86/lib/msr-reg.S | 8 ++++---- + arch/x86/net/Makefile | 2 ++ + arch/x86/platform/efi/Makefile | 1 + + arch/x86/power/Makefile | 2 ++ + arch/x86/xen/Makefile | 3 +++ + kernel/kexec_core.c | 4 +++- + 15 files changed, 39 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile +index 34b3fa2..9e32d40 100644 +--- a/arch/x86/crypto/Makefile ++++ b/arch/x86/crypto/Makefile +@@ -2,6 +2,8 @@ + # Arch-specific CryptoAPI modules. + # + ++OBJECT_FILES_NON_STANDARD := y ++ + avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) + avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ + $(comma)4)$(comma)%ymm2,yes,no) +diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile +index 2f87563..2e14acc 100644 +--- a/arch/x86/crypto/sha1-mb/Makefile ++++ b/arch/x86/crypto/sha1-mb/Makefile +@@ -2,6 +2,8 @@ + # Arch-specific CryptoAPI modules. + # + ++OBJECT_FILES_NON_STANDARD := y ++ + avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ + $(comma)4)$(comma)%ymm2,yes,no) + ifeq ($(avx2_supported),yes) +diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile +index 41089e7..45b4fca 100644 +--- a/arch/x86/crypto/sha256-mb/Makefile ++++ b/arch/x86/crypto/sha256-mb/Makefile +@@ -2,6 +2,8 @@ + # Arch-specific CryptoAPI modules. + # + ++OBJECT_FILES_NON_STANDARD := y ++ + avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ + $(comma)4)$(comma)%ymm2,yes,no) + ifeq ($(avx2_supported),yes) +diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile +index 79076d7..4c9c615 100644 +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -29,6 +29,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y + OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y + OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y + OBJECT_FILES_NON_STANDARD_test_nx.o := y ++OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y + + # If instrumentation of this dir is enabled, boot hangs during first second. + # Probably could be more selective here, but note that files related to irqs, +diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile +index 26b78d8..85a9e17 100644 +--- a/arch/x86/kernel/acpi/Makefile ++++ b/arch/x86/kernel/acpi/Makefile +@@ -1,3 +1,5 @@ ++OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y ++ + obj-$(CONFIG_ACPI) += boot.o + obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o + obj-$(CONFIG_ACPI_APEI) += apei.o +diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c +index 90f8cd6..6a7b428 100644 +--- a/arch/x86/kernel/kprobes/opt.c ++++ b/arch/x86/kernel/kprobes/opt.c +@@ -28,6 +28,7 @@ + #include <linux/kdebug.h> + #include <linux/kallsyms.h> + #include <linux/ftrace.h> ++#include <linux/frame.h> + + #include <asm/text-patching.h> + #include <asm/cacheflush.h> +@@ -91,6 +92,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) + } + + asm ( ++ "optprobe_template_func:\n" + ".global optprobe_template_entry\n" + "optprobe_template_entry:\n" + #ifdef CONFIG_X86_64 +@@ -128,7 +130,12 @@ asm ( + " popf\n" + #endif + ".global optprobe_template_end\n" +- "optprobe_template_end:\n"); ++ "optprobe_template_end:\n" ++ ".type optprobe_template_func, @function\n" ++ ".size optprobe_template_func, .-optprobe_template_func\n"); ++ ++void optprobe_template_func(void); ++STACK_FRAME_NON_STANDARD(optprobe_template_func); + + #define TMPL_MOVE_IDX \ + ((long)&optprobe_template_val - (long)&optprobe_template_entry) +diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c +index e244c19..acca20d 100644 +--- a/arch/x86/kernel/reboot.c ++++ b/arch/x86/kernel/reboot.c +@@ -9,6 +9,7 @@ + #include <linux/sched.h> + #include <linux/tboot.h> + #include <linux/delay.h> ++#include <linux/frame.h> + #include <acpi/reboot.h> + #include <asm/io.h> + #include <asm/apic.h> +@@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type) + #ifdef CONFIG_APM_MODULE + EXPORT_SYMBOL(machine_real_restart); + #endif ++STACK_FRAME_NON_STANDARD(machine_real_restart); + + /* + * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index c60d8fc..2672102 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -36,6 +36,7 @@ + #include <linux/slab.h> + #include <linux/amd-iommu.h> + #include <linux/hashtable.h> ++#include <linux/frame.h> + + #include <asm/apic.h> + #include <asm/perf_event.h> +@@ -5099,6 +5100,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + + mark_all_clean(svm->vmcb); + } ++STACK_FRAME_NON_STANDARD(svm_vcpu_run); + + static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) + { +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 9307c0d..d39062c 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -33,6 +33,7 @@ + #include <linux/slab.h> + #include <linux/tboot.h> + #include <linux/hrtimer.h> ++#include <linux/frame.h> + #include <linux/nospec.h> + #include "kvm_cache_regs.h" + #include "x86.h" +@@ -8680,6 +8681,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) + ); + } + } ++STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); + + static bool vmx_has_emulated_msr(int index) + { +@@ -9120,6 +9122,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + vmx_recover_nmi_blocking(vmx); + vmx_complete_interrupts(vmx); + } ++STACK_FRAME_NON_STANDARD(vmx_vcpu_run); + + static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) + { +diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S +index c815564..10ffa7e 100644 +--- a/arch/x86/lib/msr-reg.S ++++ b/arch/x86/lib/msr-reg.S +@@ -13,14 +13,14 @@ + .macro op_safe_regs op + ENTRY(\op\()_safe_regs) + pushq %rbx +- pushq %rbp ++ pushq %r12 + movq %rdi, %r10 /* Save pointer */ + xorl %r11d, %r11d /* Return value */ + movl (%rdi), %eax + movl 4(%rdi), %ecx + movl 8(%rdi), %edx + movl 12(%rdi), %ebx +- movl 20(%rdi), %ebp ++ movl 20(%rdi), %r12d + movl 24(%rdi), %esi + movl 28(%rdi), %edi + 1: \op +@@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs) + movl %ecx, 4(%r10) + movl %edx, 8(%r10) + movl %ebx, 12(%r10) +- movl %ebp, 20(%r10) ++ movl %r12d, 20(%r10) + movl %esi, 24(%r10) + movl %edi, 28(%r10) +- popq %rbp ++ popq %r12 + popq %rbx + ret + 3: +diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile +index 90568c3..fefb4b6 100644 +--- a/arch/x86/net/Makefile ++++ b/arch/x86/net/Makefile +@@ -1,4 +1,6 @@ + # + # Arch-specific network modules + # ++OBJECT_FILES_NON_STANDARD_bpf_jit.o += y ++ + obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o +diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile +index 066619b..7a25502 100644 +--- a/arch/x86/platform/efi/Makefile ++++ b/arch/x86/platform/efi/Makefile +@@ -1,4 +1,5 @@ + OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y ++OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y + + obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o + obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o +diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile +index a6a198c..0504187 100644 +--- a/arch/x86/power/Makefile ++++ b/arch/x86/power/Makefile +@@ -1,3 +1,5 @@ ++OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y ++ + # __restore_processor_state() restores %gs after S3 resume and so should not + # itself be stack-protected + nostackp := $(call cc-option, -fno-stack-protector) +diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile +index e47e527..4a54059 100644 +--- a/arch/x86/xen/Makefile ++++ b/arch/x86/xen/Makefile +@@ -1,3 +1,6 @@ ++OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y ++OBJECT_FILES_NON_STANDARD_xen-pvh.o := y ++ + ifdef CONFIG_FUNCTION_TRACER + # Do not profile debug and lowlevel utilities + CFLAGS_REMOVE_spinlock.o = -pg +diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c +index 5616755..f5ab72e 100644 +--- a/kernel/kexec_core.c ++++ b/kernel/kexec_core.c +@@ -38,6 +38,7 @@ + #include <linux/syscore_ops.h> + #include <linux/compiler.h> + #include <linux/hugetlb.h> ++#include <linux/frame.h> + + #include <asm/page.h> + #include <asm/sections.h> +@@ -878,7 +879,7 @@ int kexec_load_disabled; + * only when panic_cpu holds the current CPU number; this is the only CPU + * which processes crash_kexec routines. + */ +-void __crash_kexec(struct pt_regs *regs) ++void __noclone __crash_kexec(struct pt_regs *regs) + { + /* Take the kexec_mutex here to prevent sys_kexec_load + * running on one cpu from replacing the crash kernel +@@ -900,6 +901,7 @@ void __crash_kexec(struct pt_regs *regs) + mutex_unlock(&kexec_mutex); + } + } ++STACK_FRAME_NON_STANDARD(__crash_kexec); + + void crash_kexec(struct pt_regs *regs) + { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch new file mode 100644 index 00000000..3fdd5b1b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch @@ -0,0 +1,47 @@ +From 666ab0b0ab5ec0645aca319b17218c976d98b950 Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Date: Sun, 3 Jun 2018 13:37:03 +0200 +Subject: [PATCH 90/93] x86/xen: Add unwind hint annotations to xen_setup_gdt + +Not needed in mainline as this function got rewritten in 4.12 + +This enables objtool to grok the iret in the middle of a C function. + +This matches commit 76846bf3cb09 ("x86/asm: Add unwind hint annotations +to sync_core()") + +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/xen/enlighten.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index 8b97c87..af79764 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -75,6 +75,7 @@ + #include <asm/mwait.h> + #include <asm/pci_x86.h> + #include <asm/cpu.h> ++#include <asm/unwind_hints.h> + + #ifdef CONFIG_ACPI + #include <linux/acpi.h> +@@ -1450,10 +1451,12 @@ static void __ref xen_setup_gdt(int cpu) + * GDT. The new GDT has __KERNEL_CS with CS.L = 1 + * and we are jumping to reload it. + */ +- asm volatile ("pushq %0\n" ++ asm volatile (UNWIND_HINT_SAVE ++ "pushq %0\n" + "leaq 1f(%%rip),%0\n" + "pushq %0\n" + "lretq\n" ++ UNWIND_HINT_RESTORE + "1:\n" + : "=&r" (dummy) : "0" (__KERNEL_CS)); + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch new file mode 100644 index 00000000..3b05904d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch @@ -0,0 +1,51 @@ +From 0ccf96d6eee2affd66ebde69247397542a03185b Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Wed, 30 May 2018 13:09:56 +0200 +Subject: [PATCH 91/93] x86/amd: revert commit 944e0fc51a89c9827b9 + +Revert commit 944e0fc51a89c9827b98813d65dc083274777c7f ("x86/amd: don't +set X86_BUG_SYSRET_SS_ATTRS when running under Xen") as it is lacking +a prerequisite patch and is making things worse. + +Signed-off-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/amd.c | 5 ++--- + arch/x86/xen/enlighten.c | 4 +++- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 4c2be99..cd0abf8 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -857,9 +857,8 @@ static void init_amd(struct cpuinfo_x86 *c) + if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) + set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); + +- /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ +- if (!cpu_has(c, X86_FEATURE_XENPV)) +- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); ++ /* AMD CPUs don't reset SS attributes on SYSRET */ ++ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + } + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index af79764..c926db0 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -1971,8 +1971,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); + + static void xen_set_cpu_features(struct cpuinfo_x86 *c) + { +- if (xen_pv_domain()) ++ if (xen_pv_domain()) { ++ clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + set_cpu_cap(c, X86_FEATURE_XENPV); ++ } + } + + static void xen_pin_vcpu(int cpu) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch new file mode 100644 index 00000000..7860a717 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch @@ -0,0 +1,72 @@ +From 4f306d0db3c6e46b730cba2b9b59a93b46a0a315 Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Wed, 30 May 2018 13:09:57 +0200 +Subject: [PATCH 92/93] xen: set cpu capabilities from xen_start_kernel() + +Upstream commit: 0808e80cb760de2733c0527d2090ed2205a1eef8 ("xen: set +cpu capabilities from xen_start_kernel()") + +There is no need to set the same capabilities for each cpu +individually. This can easily be done for all cpus when starting the +kernel. + +Signed-off-by: Juergen Gross <jgross@suse.com> +Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/xen/enlighten.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index c926db0..01120da 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -470,6 +470,14 @@ static void __init xen_init_cpuid_mask(void) + cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); + } + ++static void __init xen_init_capabilities(void) ++{ ++ if (xen_pv_domain()) { ++ setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS); ++ setup_force_cpu_cap(X86_FEATURE_XENPV); ++ } ++} ++ + static void xen_set_debugreg(int reg, unsigned long val) + { + HYPERVISOR_set_debugreg(reg, val); +@@ -1629,6 +1637,7 @@ asmlinkage __visible void __init xen_start_kernel(void) + + xen_init_irq_ops(); + xen_init_cpuid_mask(); ++ xen_init_capabilities(); + + #ifdef CONFIG_X86_LOCAL_APIC + /* +@@ -1969,14 +1978,6 @@ bool xen_hvm_need_lapic(void) + } + EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); + +-static void xen_set_cpu_features(struct cpuinfo_x86 *c) +-{ +- if (xen_pv_domain()) { +- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); +- set_cpu_cap(c, X86_FEATURE_XENPV); +- } +-} +- + static void xen_pin_vcpu(int cpu) + { + static bool disable_pinning; +@@ -2023,7 +2024,6 @@ const struct hypervisor_x86 x86_hyper_xen = { + .init_platform = xen_hvm_guest_init, + #endif + .x2apic_available = xen_x2apic_para_available, +- .set_cpu_features = xen_set_cpu_features, + .pin_vcpu = xen_pin_vcpu, + }; + EXPORT_SYMBOL(x86_hyper_xen); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch new file mode 100644 index 00000000..e2416627 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch @@ -0,0 +1,65 @@ +From 7a5ded1f7c88ae162c525486f4be8030fa54fa26 Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Wed, 30 May 2018 13:09:58 +0200 +Subject: [PATCH 93/93] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running + under Xen + +Upstream commit: def9331a12977770cc6132d79f8e6565871e8e38 ("x86/amd: +don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen") + +When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set +on AMD cpus. + +This bug/feature bit is kind of special as it will be used very early +when switching threads. Setting the bit and clearing it a little bit +later leaves a critical window where things can go wrong. This time +window has enlarged a little bit by using setup_clear_cpu_cap() instead +of the hypervisor's set_cpu_features callback. It seems this larger +window now makes it rather easy to hit the problem. + +The proper solution is to never set the bit in case of Xen. + +Signed-off-by: Juergen Gross <jgross@suse.com> +Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/cpu/amd.c | 5 +++-- + arch/x86/xen/enlighten.c | 4 +--- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index cd0abf8..4c2be99 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -857,8 +857,9 @@ static void init_amd(struct cpuinfo_x86 *c) + if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) + set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); + +- /* AMD CPUs don't reset SS attributes on SYSRET */ +- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); ++ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ ++ if (!cpu_has(c, X86_FEATURE_XENPV)) ++ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + } + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index 01120da..fc63c84 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -472,10 +472,8 @@ static void __init xen_init_cpuid_mask(void) + + static void __init xen_init_capabilities(void) + { +- if (xen_pv_domain()) { +- setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS); ++ if (xen_pv_domain()) + setup_force_cpu_cap(X86_FEATURE_XENPV); +- } + } + + static void xen_set_debugreg(int reg, unsigned long val) +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc b/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc index 06d6de30..e7fb7846 100644 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc @@ -167,3 +167,139 @@ patch 0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch patch 0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch patch 0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch patch 0014-x86-speculation-Move-firmware_restrict_branch_specul.patch +patch 0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch +patch 0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch +patch 0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch +patch 0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch +patch 0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch +patch 0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch +patch 0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch +patch 0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch +patch 0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch +patch 0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch +patch 0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch +patch 0012-KVM-VMX-Make-indirect-call-speculation-safe.patch +patch 0013-x86-kvm-Update-spectre-v1-mitigation.patch +patch 0014-KVM-nVMX-kmap-can-t-fail.patch +patch 0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch +patch 0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch +patch 0017-KVM-nVMX-Eliminate-vmcs02-pool.patch +patch 0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch +patch 0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch +patch 0020-KVM-x86-Add-IBPB-support.patch +patch 0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch +patch 0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch +patch 0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch +patch 0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch +patch 0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch +patch 0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch +patch 0027-x86-add-MULTIUSER-dependency-for-KVM.patch +patch 0028-KVM-add-X86_LOCAL_APIC-dependency.patch +patch 0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch +patch 0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch +patch 0031-KVM-nVMX-invvpid-handling-improvements.patch +patch 0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch +patch 0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch +patch 0001-x86-paravirt-objtool-Annotate-indirect-calls.patch +patch 0002-x86-module-Detect-and-skip-invalid-relocations.patch +patch 0003-kvm-svm-Setup-MCG_CAP-on-AMD-properly.patch +patch 0004-kvm-nVMX-Disallow-userspace-injected-exceptions-in-g.patch +patch 0005-x86-cpufeatures-Add-Intel-PCONFIG-cpufeature.patch +patch 0006-x86-speculation-objtool-Annotate-indirect-calls-jump.patch +patch 0007-x86-speculation-Remove-Skylake-C2-from-Speculation-C.patch +patch 0008-x86-reboot-Turn-off-KVM-when-halting-a-CPU.patch +patch 0009-x86-KASLR-Fix-kexec-kernel-boot-crash-when-KASLR-ran.patch +patch 0010-kvm-x86-fix-icebp-instruction-handling.patch +patch 0011-bpf-x64-increase-number-of-passes.patch +patch 0012-x86-mm-kaslr-Use-the-_ASM_MUL-macro-for-multiplicati.patch +patch 0013-KVM-X86-Fix-preempt-the-preemption-timer-cancel.patch +patch 0014-KVM-nVMX-Fix-handling-of-lmsw-instruction.patch +patch 0015-KVM-SVM-do-not-zero-out-segment-attributes-if-segmen.patch +patch 0016-KVM-nVMX-Update-vmcs12-guest_linear_address-on-neste.patch +patch 0017-perf-x86-Fix-possible-Spectre-v1-indexing-for-hw_per.patch +patch 0018-perf-x86-cstate-Fix-possible-Spectre-v1-indexing-for.patch +patch 0019-perf-x86-msr-Fix-possible-Spectre-v1-indexing-in-the.patch +patch 0020-perf-x86-Fix-possible-Spectre-v1-indexing-for-x86_pm.patch +patch 0021-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch +patch 0022-x86-nospec-Simplify-alternative_msr_write.patch +patch 0023-x86-bugs-Concentrate-bug-detection-into-a-separate-f.patch +patch 0024-x86-bugs-Concentrate-bug-reporting-into-a-separate-f.patch +patch 0025-x86-bugs-Read-SPEC_CTRL-MSR-during-boot-and-re-use-r.patch +patch 0026-x86-bugs-KVM-Support-the-combination-of-guest-and-ho.patch +patch 0027-x86-bugs-Expose-sys-.-spec_store_bypass.patch +patch 0028-x86-cpufeatures-Add-X86_FEATURE_RDS.patch +patch 0029-x86-bugs-Provide-boot-parameters-for-the-spec_store_.patch +patch 0030-x86-bugs-intel-Set-proper-CPU-features-and-setup-RDS.patch +patch 0031-x86-bugs-Whitelist-allowed-SPEC_CTRL-MSR-values.patch +patch 0032-x86-bugs-AMD-Add-support-to-disable-RDS-on-Fam-15-16.patch +patch 0033-x86-KVM-VMX-Expose-SPEC_CTRL-Bit-2-to-the-guest.patch +patch 0034-x86-speculation-Create-spec-ctrl.h-to-avoid-include-.patch +patch 0035-x86-process-Optimize-TIF-checks-in-__switch_to_xtra.patch +patch 0036-x86-process-Correct-and-optimize-TIF_BLOCKSTEP-switc.patch +patch 0037-x86-process-Optimize-TIF_NOTSC-switch.patch +patch 0038-x86-process-Allow-runtime-control-of-Speculative-Sto.patch +patch 0039-x86-speculation-Add-prctl-for-Speculative-Store-Bypa.patch +patch 0040-nospec-Move-array_index_nospec-parameter-checking-in.patch +patch 0041-nospec-Allow-index-argument-to-have-const-qualified-.patch +patch 0042-nospec-Kill-array_index_nospec_mask_check.patch +patch 0043-nospec-Include-asm-barrier.h-dependency.patch +patch 0044-prctl-Add-speculation-control-prctls.patch +patch 0045-nospec-Allow-getting-setting-on-non-current-task.patch +patch 0046-x86-bugs-Make-boot-modes-__ro_after_init.patch +patch 0047-fs-proc-Report-eip-esp-in-prod-PID-stat-for-coredump.patch +patch 0048-proc-fix-coredump-vs-read-proc-stat-race.patch +patch 0049-proc-Provide-details-on-speculation-flaw-mitigations.patch +patch 0050-prctl-Add-force-disable-speculation.patch +patch 0051-seccomp-fix-the-usage-of-get-put_seccomp_filter-in-s.patch +patch 0052-seccomp-Enable-speculation-flaw-mitigations.patch +patch 0053-seccomp-Use-PR_SPEC_FORCE_DISABLE.patch +patch 0054-seccomp-Add-filter-flag-to-opt-out-of-SSB-mitigation.patch +patch 0055-seccomp-Move-speculation-migitation-control-to-arch-.patch +patch 0056-x86-speculation-Make-seccomp-the-default-mode-for-Sp.patch +patch 0057-x86-bugs-Rename-_RDS-to-_SSBD.patch +patch 0058-x86-bugs-Fix-__ssb_select_mitigation-return-type.patch +patch 0059-x86-bugs-Make-cpu_show_common-static.patch +patch 0060-x86-bugs-Fix-the-parameters-alignment-and-missing-vo.patch +patch 0061-x86-cpu-Make-alternative_msr_write-work-for-32-bit-c.patch +patch 0062-KVM-SVM-Move-spec-control-call-after-restore-of-GS.patch +patch 0063-x86-speculation-Use-synthetic-bits-for-IBRS-IBPB-STI.patch +patch 0064-x86-cpufeatures-Disentangle-MSR_SPEC_CTRL-enumeratio.patch +patch 0065-x86-cpufeatures-Disentangle-SSBD-enumeration.patch +patch 0066-x86-cpu-AMD-Fix-erratum-1076-CPB-bit.patch +patch 0067-x86-cpufeatures-Add-FEATURE_ZEN.patch +patch 0068-x86-speculation-Handle-HT-correctly-on-AMD.patch +patch 0069-x86-bugs-KVM-Extend-speculation-control-for-VIRT_SPE.patch +patch 0070-x86-speculation-Add-virtualized-speculative-store-by.patch +patch 0071-x86-speculation-Rework-speculative_store_bypass_upda.patch +patch 0072-x86-bugs-Unify-x86_spec_ctrl_-set_guest-restore_host.patch +patch 0073-x86-bugs-Expose-x86_spec_ctrl_base-directly.patch +patch 0074-x86-bugs-Remove-x86_spec_ctrl_set.patch +patch 0075-x86-bugs-Rework-spec_ctrl-base-and-mask-logic.patch +patch 0076-x86-speculation-KVM-Implement-support-for-VIRT_SPEC_.patch +patch 0077-KVM-SVM-Implement-VIRT_SPEC_CTRL-support-for-SSBD.patch +patch 0078-x86-bugs-Rename-SSBD_NO-to-SSB_NO.patch +patch 0079-x86-kexec-Avoid-double-free_page-upon-do_kexec_load-.patch +patch 0080-KVM-VMX-Expose-SSBD-properly-to-guests.patch +patch 0081-KVM-x86-Update-cpuid-properly-when-CR4.OSXAVE-or-CR4.patch +patch 0082-kvm-x86-IA32_ARCH_CAPABILITIES-is-always-supported.patch +patch 0083-kvm-x86-fix-KVM_XEN_HVM_CONFIG-ioctl.patch +patch 0084-KVM-VMX-raise-internal-error-for-exception-during-in.patch +patch 0085-KVM-lapic-stop-advertising-DIRECTED_EOI-when-in-kern.patch +patch 0086-objtool-Improve-detection-of-BUG-and-other-dead-ends.patch +patch 0087-objtool-Move-checking-code-to-check.c.patch +patch 0088-objtool-sync-up-with-the-4.14.47-version-of-objtool.patch +patch 0089-objtool-x86-Add-several-functions-and-files-to-the-o.patch +patch 0090-x86-xen-Add-unwind-hint-annotations-to-xen_setup_gdt.patch +patch 0091-x86-amd-revert-commit-944e0fc51a89c9827b9.patch +patch 0092-xen-set-cpu-capabilities-from-xen_start_kernel.patch +patch 0093-x86-amd-don-t-set-X86_BUG_SYSRET_SS_ATTRS-when-runni.patch +patch 0001-KVM-VMX-Expose-SSBD-properly-to-guests-4.9-supplemen.patch +patch 0002-complete-e390f9a-port-for-v4.9.106.patch +patch 0003-KVM-x86-introduce-linear_-read-write-_system.patch +patch 0004-KVM-x86-pass-kvm_vcpu-to-kvm_read_guest_virt-and-kvm.patch +patch 0005-kvm-x86-use-correct-privilege-level-for-sgdt-sidt-fx.patch +patch 0006-x86-spectre_v1-Disable-compiler-optimizations-over-a.patch +patch 0007-x86-mce-Improve-error-message-when-kernel-cannot-rec.patch +patch 0008-x86-mce-Check-for-alternate-indication-of-machine-ch.patch +patch 0009-x86-mce-Fix-incorrect-Machine-check-from-unknown-sou.patch +patch 0010-x86-mce-Do-not-overwrite-MCi_STATUS-in-mce_no_way_ou.patch diff --git a/meta-snowyowl/recipes-kernel/linux/files/0165-License-cleanup-add-SPDX-GPL-2.0-license-identifier-.patch b/meta-snowyowl/recipes-kernel/linux/files/0165-License-cleanup-add-SPDX-GPL-2.0-license-identifier-.patch new file mode 100644 index 00000000..2c2e70e7 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0165-License-cleanup-add-SPDX-GPL-2.0-license-identifier-.patch @@ -0,0 +1,179 @@ +From 574c9f1f7a89d3f9179d3c4500223d0fee962153 Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Date: Wed, 1 Nov 2017 15:07:57 +0100 +Subject: [PATCH 2/9] License cleanup: add SPDX GPL-2.0 license identifier to + files with no license + +Many source files in the tree are missing licensing information, which +makes it harder for compliance tools to determine the correct license. + +By default all files without license information are under the default +license of the kernel, which is GPL version 2. + +Update the files which contain no license information with the 'GPL-2.0' +SPDX license identifier. The SPDX identifier is a legally binding +shorthand, which can be used instead of the full boiler plate text. + +This patch is based on work done by Thomas Gleixner and Kate Stewart and +Philippe Ombredanne. + +How this work was done: + +Patches were generated and checked against linux-4.14-rc6 for a subset of +the use cases: + - file had no licensing information it it. + - file was a */uapi/* one with no licensing information in it, + - file was a */uapi/* one with existing licensing information, + +Further patches will be generated in subsequent months to fix up cases +where non-standard license headers were used, and references to license +had to be inferred by heuristics based on keywords. + +The analysis to determine which SPDX License Identifier to be applied to +a file was done in a spreadsheet of side by side results from of the +output of two independent scanners (ScanCode & Windriver) producing SPDX +tag:value files created by Philippe Ombredanne. Philippe prepared the +base worksheet, and did an initial spot review of a few 1000 files. + +The 4.13 kernel was the starting point of the analysis with 60,537 files +assessed. Kate Stewart did a file by file comparison of the scanner +results in the spreadsheet to determine which SPDX license identifier(s) +to be applied to the file. She confirmed any determination that was not +immediately clear with lawyers working with the Linux Foundation. + +Criteria used to select files for SPDX license identifier tagging was: + - Files considered eligible had to be source code files. + - Make and config files were included as candidates if they contained >5 + lines of source + - File already had some variant of a license header in it (even if <5 + lines). + +All documentation files were explicitly excluded. + +The following heuristics were used to determine which SPDX license +identifiers to apply. + + - when both scanners couldn't find any license traces, file was + considered to have no license information in it, and the top level + COPYING file license applied. + + For non */uapi/* files that summary was: + + SPDX license identifier # files + ---------------------------------------------------|------- + GPL-2.0 11139 + + and resulted in the first patch in this series. + + If that file was a */uapi/* path one, it was "GPL-2.0 WITH + Linux-syscall-note" otherwise it was "GPL-2.0". Results of that was: + + SPDX license identifier # files + ---------------------------------------------------|------- + GPL-2.0 WITH Linux-syscall-note 930 + + and resulted in the second patch in this series. + + - if a file had some form of licensing information in it, and was one + of the */uapi/* ones, it was denoted with the Linux-syscall-note if + any GPL family license was found in the file or had no licensing in + it (per prior point). Results summary: + + SPDX license identifier # files + ---------------------------------------------------|------ + GPL-2.0 WITH Linux-syscall-note 270 + GPL-2.0+ WITH Linux-syscall-note 169 + ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) 21 + ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) 17 + LGPL-2.1+ WITH Linux-syscall-note 15 + GPL-1.0+ WITH Linux-syscall-note 14 + ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) 5 + LGPL-2.0+ WITH Linux-syscall-note 4 + LGPL-2.1 WITH Linux-syscall-note 3 + ((GPL-2.0 WITH Linux-syscall-note) OR MIT) 3 + ((GPL-2.0 WITH Linux-syscall-note) AND MIT) 1 + + and that resulted in the third patch in this series. + + - when the two scanners agreed on the detected license(s), that became + the concluded license(s). + + - when there was disagreement between the two scanners (one detected a + license but the other didn't, or they both detected different + licenses) a manual inspection of the file occurred. + + - In most cases a manual inspection of the information in the file + resulted in a clear resolution of the license that should apply (and + which scanner probably needed to revisit its heuristics). + + - When it was not immediately clear, the license identifier was + confirmed with lawyers working with the Linux Foundation. + + - If there was any question as to the appropriate license identifier, + the file was flagged for further research and to be revisited later + in time. + +In total, over 70 hours of logged manual review was done on the +spreadsheet to determine the SPDX license identifiers to apply to the +source files by Kate, Philippe, Thomas and, in some cases, confirmation +by lawyers working with the Linux Foundation. + +Kate also obtained a third independent scan of the 4.13 code base from +FOSSology, and compared selected files where the other two scanners +disagreed against that SPDX file, to see if there was new insights. The +Windriver scanner is based on an older version of FOSSology in part, so +they are related. + +Thomas did random spot checks in about 500 files from the spreadsheets +for the uapi headers and agreed with SPDX license identifier in the +files he inspected. For the non-uapi files Thomas did random spot checks +in about 15000 files. + +In initial set of patches against 4.14-rc6, 3 files were found to have +copy/paste license identifier errors, and have been fixed to reflect the +correct identifier. + +Additionally Philippe spent 10 hours this week doing a detailed manual +inspection and review of the 12,461 patched files from the initial patch +version early this week with: + - a full scancode scan run, collecting the matched texts, detected + license ids and scores + - reviewing anything where there was a license detected (about 500+ + files) to ensure that the applied SPDX license was correct + - reviewing anything where there was no detection but the patch license + was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied + SPDX license was correct + +This produced a worksheet with 20 files needing minor correction. This +worksheet was then exported into 3 different .csv files for the +different types of files to be modified. + +These .csv files were then reviewed by Greg. Thomas wrote a script to +parse the csv files and add the proper SPDX tag to the file, in the +format that the file expected. This script was further refined by Greg +based on the output to detect more types of files automatically and to +distinguish between header and source .c files (which need different +comment types.) Finally Greg ran the script using the .csv files to +generate the patches. + +Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> +Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile +index 0dea8f5..620785f 100755 +--- a/drivers/net/ethernet/amd/xgbe/Makefile ++++ b/drivers/net/ethernet/amd/xgbe/Makefile +@@ -1,3 +1,4 @@ ++# SPDX-License-Identifier: GPL-2.0 + obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o + + amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \ +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0166-mm-remove-__GFP_COLD.patch b/meta-snowyowl/recipes-kernel/linux/files/0166-mm-remove-__GFP_COLD.patch new file mode 100644 index 00000000..9a4c674a --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0166-mm-remove-__GFP_COLD.patch @@ -0,0 +1,58 @@ +From bfe5cf0c74a52b490e713d81fb60d3467a8dea30 Mon Sep 17 00:00:00 2001 +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed, 15 Nov 2017 17:38:03 -0800 +Subject: [PATCH 3/9] mm: remove __GFP_COLD + +As the page free path makes no distinction between cache hot and cold +pages, there is no real useful ordering of pages in the free list that +allocation requests can take advantage of. Juding from the users of +__GFP_COLD, it is likely that a number of them are the result of copying +other sites instead of actually measuring the impact. Remove the +__GFP_COLD parameter which simplifies a number of paths in the page +allocator. + +This is potentially controversial but bear in mind that the size of the +per-cpu pagelists versus modern cache sizes means that the whole per-cpu +list can often fit in the L3 cache. Hence, there is only a potential +benefit for microbenchmarks that alloc/free pages in a tight loop. It's +even worse when THP is taken into account which has little or no chance +of getting a cache-hot page as the per-cpu list is bypassed and the +zeroing of multiple pages will thrash the cache anyway. + +The truncate microbenchmarks are not shown as this patch affects the +allocation path and not the free path. A page fault microbenchmark was +tested but it showed no sigificant difference which is not surprising +given that the __GFP_COLD branches are a miniscule percentage of the +fault path. + +Link: http://lkml.kernel.org/r/20171018075952.10627-9-mgorman@techsingularity.net +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Cc: Andi Kleen <ak@linux.intel.com> +Cc: Dave Chinner <david@fromorbit.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Jan Kara <jack@suse.cz> +Cc: Johannes Weiner <hannes@cmpxchg.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +index 45d9230..cc1e4f8 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +@@ -295,7 +295,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata, + order = alloc_order; + + /* Try to obtain pages, decreasing order if necessary */ +- gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN; ++ gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; + while (order >= 0) { + pages = alloc_pages_node(node, gfp, order); + if (pages) +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0167-net-amd-xgbe-Get-rid-of-custom-hex_dump_to_buffer.patch b/meta-snowyowl/recipes-kernel/linux/files/0167-net-amd-xgbe-Get-rid-of-custom-hex_dump_to_buffer.patch new file mode 100644 index 00000000..35ba333f --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0167-net-amd-xgbe-Get-rid-of-custom-hex_dump_to_buffer.patch @@ -0,0 +1,67 @@ +From 52147ee16906c134c01a75cb72ab134c1bd98f89 Mon Sep 17 00:00:00 2001 +From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> +Date: Tue, 19 Dec 2017 23:22:15 +0200 +Subject: [PATCH 4/9] net: amd-xgbe: Get rid of custom hex_dump_to_buffer() + +Get rid of yet another custom hex_dump_to_buffer(). + +The output is slightly changed, i.e. each byte followed by white space. + +Note, we don't use print_hex_dump() here since the original code uses +nedev_dbg(). + +Acked-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +index 1cb532b..e6984ac 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -2930,9 +2930,8 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *pdata, struct xgbe_ring *ring, + void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx) + { + struct ethhdr *eth = (struct ethhdr *)skb->data; +- unsigned char *buf = skb->data; + unsigned char buffer[128]; +- unsigned int i, j; ++ unsigned int i; + + netdev_dbg(netdev, "\n************** SKB dump ****************\n"); + +@@ -2943,22 +2942,13 @@ void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx) + netdev_dbg(netdev, "Src MAC addr: %pM\n", eth->h_source); + netdev_dbg(netdev, "Protocol: %#06hx\n", ntohs(eth->h_proto)); + +- for (i = 0, j = 0; i < skb->len;) { +- j += snprintf(buffer + j, sizeof(buffer) - j, "%02hhx", +- buf[i++]); +- +- if ((i % 32) == 0) { +- netdev_dbg(netdev, " %#06x: %s\n", i - 32, buffer); +- j = 0; +- } else if ((i % 16) == 0) { +- buffer[j++] = ' '; +- buffer[j++] = ' '; +- } else if ((i % 4) == 0) { +- buffer[j++] = ' '; +- } ++ for (i = 0; i < skb->len; i += 32) { ++ unsigned int len = min(skb->len - i, 32U); ++ ++ hex_dump_to_buffer(&skb->data[i], len, 32, 1, ++ buffer, sizeof(buffer), false); ++ netdev_dbg(netdev, " %#06x: %s\n", i, buffer); + } +- if (i % 32) +- netdev_dbg(netdev, " %#06x: %s\n", i - (i % 32), buffer); + + netdev_dbg(netdev, "\n************** SKB dump ****************\n"); + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0168-net-amd-xgbe-fix-comparison-to-bitshift-when-dealing.patch b/meta-snowyowl/recipes-kernel/linux/files/0168-net-amd-xgbe-fix-comparison-to-bitshift-when-dealing.patch new file mode 100644 index 00000000..6aba56fd --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0168-net-amd-xgbe-fix-comparison-to-bitshift-when-dealing.patch @@ -0,0 +1,33 @@ +From 88ff6467a9863d680484ab0afcf400c45b47ba78 Mon Sep 17 00:00:00 2001 +From: Wolfram Sang <wsa+renesas@sang-engineering.com> +Date: Mon, 5 Feb 2018 21:10:01 +0100 +Subject: [PATCH 5/9] net: amd-xgbe: fix comparison to bitshift when dealing + with a mask + +Due to a typo, the mask was destroyed by a comparison instead of a bit +shift. + +Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com> +Acked-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +index e6984ac..5d47b69 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -594,7 +594,7 @@ static void xgbe_isr_task(unsigned long data) + + reissue_mask = 1 << 0; + if (!pdata->per_channel_irq) +- reissue_mask |= 0xffff < 4; ++ reissue_mask |= 0xffff << 4; + + XP_IOWRITE(pdata, XP_INT_REISSUE_EN, reissue_mask); + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0169-amd-xgbe-Restore-PCI-interrupt-enablement-setting-on.patch b/meta-snowyowl/recipes-kernel/linux/files/0169-amd-xgbe-Restore-PCI-interrupt-enablement-setting-on.patch new file mode 100644 index 00000000..c4e099c4 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0169-amd-xgbe-Restore-PCI-interrupt-enablement-setting-on.patch @@ -0,0 +1,38 @@ +From 6be6051f2873b517d8eeb6c302930d44ebe295f1 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Tue, 20 Feb 2018 15:22:05 -0600 +Subject: [PATCH 6/9] amd-xgbe: Restore PCI interrupt enablement setting on + resume + +After resuming from suspend, the PCI device support must re-enable the +interrupt setting so that interrupts are actually delivered. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +index 464824b..82d1f41 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +@@ -426,12 +426,11 @@ static int xgbe_pci_resume(struct pci_dev *pdev) + struct net_device *netdev = pdata->netdev; + int ret = 0; + ++ XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); ++ + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + +- /* Enable all interrupts in the hardware */ +- XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); +- + if (netif_running(netdev)) { + ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0170-ethernet-Use-octal-not-symbolic-permissions.patch b/meta-snowyowl/recipes-kernel/linux/files/0170-ethernet-Use-octal-not-symbolic-permissions.patch new file mode 100644 index 00000000..98031fc3 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0170-ethernet-Use-octal-not-symbolic-permissions.patch @@ -0,0 +1,69 @@ +From 46fa149f6cd1172c5a6c8299cc47ef949e085788 Mon Sep 17 00:00:00 2001 +From: Joe Perches <joe@perches.com> +Date: Fri, 23 Mar 2018 16:34:44 -0700 +Subject: [PATCH 7/9] ethernet: Use octal not symbolic permissions + +Prefer the direct use of octal for permissions. + +Done with checkpatch -f --types=SYMBOLIC_PERMS --fix-inplace +and some typing. + +Miscellanea: + +o Whitespace neatening around these conversions. + +Signed-off-by: Joe Perches <joe@perches.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 10 +++++----- + drivers/net/ethernet/amd/xgbe/xgbe-main.c | 2 +- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +index 5d47b69..6bd8f38 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -136,21 +136,21 @@ static unsigned int ecc_ded_period = 600; + + #ifdef CONFIG_AMD_XGBE_HAVE_ECC + /* Only expose the ECC parameters if supported */ +-module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO); ++module_param(ecc_sec_info_threshold, uint, 0644); + MODULE_PARM_DESC(ecc_sec_info_threshold, + " ECC corrected error informational threshold setting"); + +-module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO); ++module_param(ecc_sec_warn_threshold, uint, 0644); + MODULE_PARM_DESC(ecc_sec_warn_threshold, + " ECC corrected error warning threshold setting"); + +-module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO); ++module_param(ecc_sec_period, uint, 0644); + MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)"); + +-module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO); ++module_param(ecc_ded_threshold, uint, 0644); + MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting"); + +-module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO); ++module_param(ecc_ded_period, uint, 0644); + MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)"); + #endif + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c +index d91fa59..795e556 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c +@@ -131,7 +131,7 @@ MODULE_VERSION(XGBE_DRV_VERSION); + MODULE_DESCRIPTION(XGBE_DRV_DESC); + + static int debug = -1; +-module_param(debug, int, S_IWUSR | S_IRUGO); ++module_param(debug, int, 0644); + MODULE_PARM_DESC(debug, " Network interface message level setting"); + + static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN | +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0171-amd-xgbe-Only-use-the-SFP-supported-transceiver-sign.patch b/meta-snowyowl/recipes-kernel/linux/files/0171-amd-xgbe-Only-use-the-SFP-supported-transceiver-sign.patch new file mode 100644 index 00000000..f4761c30 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0171-amd-xgbe-Only-use-the-SFP-supported-transceiver-sign.patch @@ -0,0 +1,137 @@ +From 884f0679eda94d4cc39f4b85cfc2697e66d4773f Mon Sep 17 00:00:00 2001 +From: Tom Lendacky <thomas.lendacky@amd.com> +Date: Mon, 23 Apr 2018 11:43:34 -0500 +Subject: [PATCH 8/9] amd-xgbe: Only use the SFP supported transceiver signals + +The SFP eeprom indicates the transceiver signals (Rx LOS, Tx Fault, etc.) +that it supports. Update the driver to include checking the eeprom data +when deciding whether to use a transceiver signal. + +Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 71 ++++++++++++++++++++++------- + 1 file changed, 54 insertions(+), 17 deletions(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +index f68e920..23139cf 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +@@ -250,6 +250,10 @@ enum xgbe_sfp_speed { + #define XGBE_SFP_BASE_VENDOR_SN 4 + #define XGBE_SFP_BASE_VENDOR_SN_LEN 16 + ++#define XGBE_SFP_EXTD_OPT1 1 ++#define XGBE_SFP_EXTD_OPT1_RX_LOS BIT(1) ++#define XGBE_SFP_EXTD_OPT1_TX_FAULT BIT(3) ++ + #define XGBE_SFP_EXTD_DIAG 28 + #define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE BIT(2) + +@@ -329,6 +333,7 @@ struct xgbe_phy_data { + + unsigned int sfp_gpio_address; + unsigned int sfp_gpio_mask; ++ unsigned int sfp_gpio_inputs; + unsigned int sfp_gpio_rx_los; + unsigned int sfp_gpio_tx_fault; + unsigned int sfp_gpio_mod_absent; +@@ -983,6 +988,49 @@ static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata) + phy_data->sfp_phy_avail = 1; + } + ++static bool xgbe_phy_check_sfp_rx_los(struct xgbe_phy_data *phy_data) ++{ ++ u8 *sfp_extd = phy_data->sfp_eeprom.extd; ++ ++ if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_RX_LOS)) ++ return false; ++ ++ if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) ++ return false; ++ ++ if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_rx_los)) ++ return true; ++ ++ return false; ++} ++ ++static bool xgbe_phy_check_sfp_tx_fault(struct xgbe_phy_data *phy_data) ++{ ++ u8 *sfp_extd = phy_data->sfp_eeprom.extd; ++ ++ if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_TX_FAULT)) ++ return false; ++ ++ if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) ++ return false; ++ ++ if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_tx_fault)) ++ return true; ++ ++ return false; ++} ++ ++static bool xgbe_phy_check_sfp_mod_absent(struct xgbe_phy_data *phy_data) ++{ ++ if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) ++ return false; ++ ++ if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_mod_absent)) ++ return true; ++ ++ return false; ++} ++ + static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata) + { + struct xgbe_phy_data *phy_data = pdata->phy_data; +@@ -1028,6 +1076,10 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) + if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP) + return; + ++ /* Update transceiver signals (eeprom extd/options) */ ++ phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data); ++ phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data); ++ + if (xgbe_phy_sfp_parse_quirks(pdata)) + return; + +@@ -1193,7 +1245,6 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata) + static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) + { + struct xgbe_phy_data *phy_data = pdata->phy_data; +- unsigned int gpio_input; + u8 gpio_reg, gpio_ports[2]; + int ret; + +@@ -1208,23 +1259,9 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) + return; + } + +- gpio_input = (gpio_ports[1] << 8) | gpio_ports[0]; +- +- if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) { +- /* No GPIO, just assume the module is present for now */ +- phy_data->sfp_mod_absent = 0; +- } else { +- if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent))) +- phy_data->sfp_mod_absent = 0; +- } +- +- if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) && +- (gpio_input & (1 << phy_data->sfp_gpio_rx_los))) +- phy_data->sfp_rx_los = 1; ++ phy_data->sfp_gpio_inputs = (gpio_ports[1] << 8) | gpio_ports[0]; + +- if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) && +- (gpio_input & (1 << phy_data->sfp_gpio_tx_fault))) +- phy_data->sfp_tx_fault = 1; ++ phy_data->sfp_mod_absent = xgbe_phy_check_sfp_mod_absent(phy_data); + } + + static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata) +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0172-Modification-to-previous-commit-305f3ad05fec3a5f0d7b.patch b/meta-snowyowl/recipes-kernel/linux/files/0172-Modification-to-previous-commit-305f3ad05fec3a5f0d7b.patch new file mode 100644 index 00000000..70a98481 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0172-Modification-to-previous-commit-305f3ad05fec3a5f0d7b.patch @@ -0,0 +1,184 @@ +From 9ce3ea9ed41bd4ecab41d7c23c8a924deb047616 Mon Sep 17 00:00:00 2001 +From: Sudheesh Mavila <sudheesh.mavila@amd.com> +Date: Thu, 3 May 2018 11:52:34 +0530 +Subject: [PATCH 9/9] Modification to previous commit + 305f3ad05fec3a5f0d7b51857b4bc99f527db7a4 and commit + 41fb5f9d75199370d9b3adc05bf642b0b13e29d4 + + Based on upstream commit 4d945663a6a0acf3cbe45940503f2eb9584bfee7 and 96f4d430c507ed4856048c2dc9c1a2ea5b5e74e4 + amd-xgbe: Improve KR auto-negotiation and training + amd-xgbe: Add pre/post auto-negotiation phy hooks + +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c | 20 +++++++++++++++----- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 1 + + drivers/net/ethernet/amd/xgbe/xgbe-i2c.c | 1 + + drivers/net/ethernet/amd/xgbe/xgbe-main.c | 1 + + drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 1 + + drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 15 ++++++++++----- + drivers/net/ethernet/amd/xgbe/xgbe.h | 3 ++- + 7 files changed, 31 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +index 79387b4..b911439 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +@@ -519,11 +519,21 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata) + "debugfs_create_file failed\n"); + } + +- pfile = debugfs_create_bool("cdr_track_early", 0600, +- pdata->xgbe_debugfs, +- &pdata->debugfs_cdr_track_early); +- if (!pfile) +- netdev_err(pdata->netdev, "debugfs_create_bool failed\n"); ++ if (pdata->vdata->an_cdr_workaround) { ++ pfile = debugfs_create_bool("an_cdr_workaround", 0600, ++ pdata->xgbe_debugfs, ++ &pdata->debugfs_an_cdr_workaround); ++ if (!pfile) ++ netdev_err(pdata->netdev, ++ "debugfs_create_bool failed\n"); ++ ++ pfile = debugfs_create_bool("an_cdr_track_early", 0600, ++ pdata->xgbe_debugfs, ++ &pdata->debugfs_an_cdr_track_early); ++ if (!pfile) ++ netdev_err(pdata->netdev, ++ "debugfs_create_bool failed\n"); ++ } + + kfree(buf); + } +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +index 6bd8f38..b70832e 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -118,6 +118,7 @@ + #include <linux/spinlock.h> + #include <linux/tcp.h> + #include <linux/if_vlan.h> ++#include <linux/interrupt.h> + #include <net/busy_poll.h> + #include <linux/clk.h> + #include <linux/if_ether.h> +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +index dc74341..4d9062d 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +@@ -115,6 +115,7 @@ + */ + + #include <linux/module.h> ++#include <linux/interrupt.h> + #include <linux/kmod.h> + #include <linux/delay.h> + #include <linux/completion.h> +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c +index 795e556..441d0973 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c +@@ -349,6 +349,7 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata) + XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); + + /* Call MDIO/PHY initialization routine */ ++ pdata->debugfs_an_cdr_workaround = pdata->vdata->an_cdr_workaround; + ret = pdata->phy_if.phy_init(pdata); + if (ret) + return ret; +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +index a511e61..1b45cd7 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +@@ -114,6 +114,7 @@ + * THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include <linux/interrupt.h> + #include <linux/module.h> + #include <linux/kmod.h> + #include <linux/mdio.h> +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +index 23139cf..aac8843 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +@@ -152,6 +152,9 @@ + #define XGBE_CDR_DELAY_INC 10000 + #define XGBE_CDR_DELAY_MAX 100000 + ++/* RRC frequency during link status check */ ++#define XGBE_RRC_FREQUENCY 10 ++ + enum xgbe_port_mode { + XGBE_PORT_MODE_RSVD = 0, + XGBE_PORT_MODE_BACKPLANE, +@@ -2407,7 +2410,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) + return 1; + + /* No link, attempt a receiver reset cycle */ +- if (phy_data->rrc_count++) { ++ if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) { + phy_data->rrc_count = 0; + xgbe_phy_rrc(pdata); + } +@@ -2719,7 +2722,7 @@ static void xgbe_phy_cdr_track(struct xgbe_prv_data *pdata) + { + struct xgbe_phy_data *phy_data = pdata->phy_data; + +- if (!pdata->vdata->an_cdr_workaround) ++ if (!pdata->debugfs_an_cdr_workaround) + return; + + if (!phy_data->phy_cdr_notrack) +@@ -2739,7 +2742,7 @@ static void xgbe_phy_cdr_notrack(struct xgbe_prv_data *pdata) + { + struct xgbe_phy_data *phy_data = pdata->phy_data; + +- if (!pdata->vdata->an_cdr_workaround) ++ if (!pdata->debugfs_an_cdr_workaround) + return; + + if (phy_data->phy_cdr_notrack) +@@ -2756,13 +2759,13 @@ static void xgbe_phy_cdr_notrack(struct xgbe_prv_data *pdata) + + static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata) + { +- if (!pdata->debugfs_cdr_track_early) ++ if (!pdata->debugfs_an_cdr_track_early) + xgbe_phy_cdr_track(pdata); + } + + static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata) + { +- if (pdata->debugfs_cdr_track_early) ++ if (pdata->debugfs_an_cdr_track_early) + xgbe_phy_cdr_track(pdata); + } + +@@ -2785,6 +2788,8 @@ static void xgbe_phy_an_post(struct xgbe_prv_data *pdata) + default: + if (phy_data->phy_cdr_delay < XGBE_CDR_DELAY_MAX) + phy_data->phy_cdr_delay += XGBE_CDR_DELAY_INC; ++ else ++ phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT; + break; + } + break; +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h +index a9c197c..95d4b56 100755 +--- a/drivers/net/ethernet/amd/xgbe/xgbe.h ++++ b/drivers/net/ethernet/amd/xgbe/xgbe.h +@@ -1264,7 +1264,8 @@ struct xgbe_prv_data { + + unsigned int debugfs_xi2c_reg; + +- bool debugfs_cdr_track_early; ++ bool debugfs_an_cdr_workaround; ++ bool debugfs_an_cdr_track_early; + }; + + /* Function prototypes*/ +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0173-crypto-gcm-wait-for-crypto-op-not-signal-safe.patch b/meta-snowyowl/recipes-kernel/linux/files/0173-crypto-gcm-wait-for-crypto-op-not-signal-safe.patch new file mode 100755 index 00000000..b784e59e --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0173-crypto-gcm-wait-for-crypto-op-not-signal-safe.patch @@ -0,0 +1,43 @@ +From 49050074c6839e918b47394c39ccdd8d53959543 Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef <gilad@benyossef.com> +Date: Thu, 18 May 2017 16:29:25 +0300 +Subject: [PATCH 019/331] crypto: gcm - wait for crypto op not signal safe + +commit f3ad587070d6bd961ab942b3fd7a85d00dfc934b upstream. + +crypto_gcm_setkey() was using wait_for_completion_interruptible() to +wait for completion of async crypto op but if a signal occurs it +may return before DMA ops of HW crypto provider finish, thus +corrupting the data buffer that is kfree'ed in this case. + +Resolve this by using wait_for_completion() instead. + +Reported-by: Eric Biggers <ebiggers3@gmail.com> +Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + crypto/gcm.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/crypto/gcm.c b/crypto/gcm.c +index f624ac9..dd33fbd 100644 +--- a/crypto/gcm.c ++++ b/crypto/gcm.c +@@ -152,10 +152,8 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key, + + err = crypto_skcipher_encrypt(&data->req); + if (err == -EINPROGRESS || err == -EBUSY) { +- err = wait_for_completion_interruptible( +- &data->result.completion); +- if (!err) +- err = data->result.err; ++ wait_for_completion(&data->result.completion); ++ err = data->result.err; + } + + if (err) +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0174-crypto-drbg-wait-for-crypto-op-not-signal-safe.patch b/meta-snowyowl/recipes-kernel/linux/files/0174-crypto-drbg-wait-for-crypto-op-not-signal-safe.patch new file mode 100755 index 00000000..232d5dd8 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0174-crypto-drbg-wait-for-crypto-op-not-signal-safe.patch @@ -0,0 +1,42 @@ +From 1b7a262b9e41217cd8fe0c4b0bf7532e4ed5d4bb Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef <gilad@benyossef.com> +Date: Thu, 18 May 2017 16:29:24 +0300 +Subject: [PATCH 020/331] crypto: drbg - wait for crypto op not signal safe + +commit a5dfefb1c3f3db81662556393fd9283511e08430 upstream. + +drbg_kcapi_sym_ctr() was using wait_for_completion_interruptible() to +wait for completion of async crypto op but if a signal occurs it +may return before DMA ops of HW crypto provider finish, thus +corrupting the output buffer. + +Resolve this by using wait_for_completion() instead. + +Reported-by: Eric Biggers <ebiggers3@gmail.com> +Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + crypto/drbg.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/crypto/drbg.c b/crypto/drbg.c +index 053035b..123d211 100644 +--- a/crypto/drbg.c ++++ b/crypto/drbg.c +@@ -1768,9 +1768,8 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, + break; + case -EINPROGRESS: + case -EBUSY: +- ret = wait_for_completion_interruptible( +- &drbg->ctr_completion); +- if (!ret && !drbg->ctr_async_err) { ++ wait_for_completion(&drbg->ctr_completion); ++ if (!drbg->ctr_async_err) { + reinit_completion(&drbg->ctr_completion); + break; + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0175-crypto-asymmetric_keys-handle-EBUSY-due-to-backlog-c.patch b/meta-snowyowl/recipes-kernel/linux/files/0175-crypto-asymmetric_keys-handle-EBUSY-due-to-backlog-c.patch new file mode 100755 index 00000000..52df2713 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0175-crypto-asymmetric_keys-handle-EBUSY-due-to-backlog-c.patch @@ -0,0 +1,40 @@ +From ae89eb389dd82c3ff6ec07f28019f7f474605827 Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef <gilad@benyossef.com> +Date: Thu, 18 May 2017 16:29:23 +0300 +Subject: [PATCH 021/331] crypto: asymmetric_keys - handle EBUSY due to backlog + correctly + +commit e68368aed56324e2e38d4f6b044bb8cf82077fc2 upstream. + +public_key_verify_signature() was passing the CRYPTO_TFM_REQ_MAY_BACKLOG +flag to akcipher_request_set_callback() but was not handling correctly +the case where a -EBUSY error could be returned from the call to +crypto_akcipher_verify() if backlog was used, possibly casuing +data corruption due to use-after-free of buffers. + +Resolve this by handling -EBUSY correctly. + +Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + crypto/asymmetric_keys/public_key.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c +index fd76b5f..4955eb6 100644 +--- a/crypto/asymmetric_keys/public_key.c ++++ b/crypto/asymmetric_keys/public_key.c +@@ -140,7 +140,7 @@ int public_key_verify_signature(const struct public_key *pkey, + * signature and returns that to us. + */ + ret = crypto_akcipher_verify(req); +- if (ret == -EINPROGRESS) { ++ if ((ret == -EINPROGRESS) || (ret == -EBUSY)) { + wait_for_completion(&compl.completion); + ret = compl.err; + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0176-crypto-Work-around-deallocated-stack-frame-reference.patch b/meta-snowyowl/recipes-kernel/linux/files/0176-crypto-Work-around-deallocated-stack-frame-reference.patch new file mode 100755 index 00000000..c1370fee --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0176-crypto-Work-around-deallocated-stack-frame-reference.patch @@ -0,0 +1,119 @@ +From ab03c8d27dd8c8f17ea773d34cc3e69af89c911f Mon Sep 17 00:00:00 2001 +From: David Miller <davem@davemloft.net> +Date: Fri, 2 Jun 2017 11:28:54 -0400 +Subject: [PATCH 028/331] crypto: Work around deallocated stack frame reference + gcc bug on sparc. + +commit d41519a69b35b10af7fda867fb9100df24fdf403 upstream. + +On sparc, if we have an alloca() like situation, as is the case with +SHASH_DESC_ON_STACK(), we can end up referencing deallocated stack +memory. The result can be that the value is clobbered if a trap +or interrupt arrives at just the right instruction. + +It only occurs if the function ends returning a value from that +alloca() area and that value can be placed into the return value +register using a single instruction. + +For example, in lib/libcrc32c.c:crc32c() we end up with a return +sequence like: + + return %i7+8 + lduw [%o5+16], %o0 ! MEM[(u32 *)__shash_desc.1_10 + 16B], + +%o5 holds the base of the on-stack area allocated for the shash +descriptor. But the return released the stack frame and the +register window. + +So if an intererupt arrives between 'return' and 'lduw', then +the value read at %o5+16 can be corrupted. + +Add a data compiler barrier to work around this problem. This is +exactly what the gcc fix will end up doing as well, and it absolutely +should not change the code generated for other cpus (unless gcc +on them has the same bug :-) + +With crucial insight from Eric Sandeen. + +Reported-by: Anatoly Pugachev <matorola@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + fs/btrfs/hash.c | 5 ++++- + fs/f2fs/f2fs.h | 5 ++++- + lib/libcrc32c.c | 6 ++++-- + 3 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c +index a97fdc1..baacc18 100644 +--- a/fs/btrfs/hash.c ++++ b/fs/btrfs/hash.c +@@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length) + { + SHASH_DESC_ON_STACK(shash, tfm); + u32 *ctx = (u32 *)shash_desc_ctx(shash); ++ u32 retval; + int err; + + shash->tfm = tfm; +@@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length) + err = crypto_shash_update(shash, address, length); + BUG_ON(err); + +- return *ctx; ++ retval = *ctx; ++ barrier_data(ctx); ++ return retval; + } +diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h +index 3a1640b..4051c24 100644 +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -948,6 +948,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, + { + SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver); + u32 *ctx = (u32 *)shash_desc_ctx(shash); ++ u32 retval; + int err; + + shash->tfm = sbi->s_chksum_driver; +@@ -957,7 +958,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, + err = crypto_shash_update(shash, address, length); + BUG_ON(err); + +- return *ctx; ++ retval = *ctx; ++ barrier_data(ctx); ++ return retval; + } + + static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, +diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c +index 74a54b7..9f79547 100644 +--- a/lib/libcrc32c.c ++++ b/lib/libcrc32c.c +@@ -43,7 +43,7 @@ static struct crypto_shash *tfm; + u32 crc32c(u32 crc, const void *address, unsigned int length) + { + SHASH_DESC_ON_STACK(shash, tfm); +- u32 *ctx = (u32 *)shash_desc_ctx(shash); ++ u32 ret, *ctx = (u32 *)shash_desc_ctx(shash); + int err; + + shash->tfm = tfm; +@@ -53,7 +53,9 @@ u32 crc32c(u32 crc, const void *address, unsigned int length) + err = crypto_shash_update(shash, address, length); + BUG_ON(err); + +- return *ctx; ++ ret = *ctx; ++ barrier_data(ctx); ++ return ret; + } + + EXPORT_SYMBOL(crc32c); +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0177-crypto-drbg-Fixes-panic-in-wait_for_completion-call.patch b/meta-snowyowl/recipes-kernel/linux/files/0177-crypto-drbg-Fixes-panic-in-wait_for_completion-call.patch new file mode 100755 index 00000000..8b299a48 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0177-crypto-drbg-Fixes-panic-in-wait_for_completion-call.patch @@ -0,0 +1,35 @@ +From de190dfddee7280ff3405b9d5f4e04b8fd02d934 Mon Sep 17 00:00:00 2001 +From: Stephan Mueller <smueller@chronox.de> +Date: Fri, 26 May 2017 12:11:31 +0200 +Subject: [PATCH 048/331] crypto: drbg - Fixes panic in wait_for_completion + call + +commit b61929c654f2e725644935737c4c1ea9c741e2f8 upstream. + +Initialise ctr_completion variable before use. + +Cc: <stable@vger.kernel.org> +Signed-off-by: Harsh Jain <harshjain.prof@gmail.com> +Signed-off-by: Stephan Mueller <smueller@chronox.de> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + crypto/drbg.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/crypto/drbg.c b/crypto/drbg.c +index 123d211..8cac3d3 100644 +--- a/crypto/drbg.c ++++ b/crypto/drbg.c +@@ -1691,6 +1691,7 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg) + return PTR_ERR(sk_tfm); + } + drbg->ctr_handle = sk_tfm; ++ init_completion(&drbg->ctr_completion); + + req = skcipher_request_alloc(sk_tfm, GFP_KERNEL); + if (!req) { +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0178-crypto-ccp-remove-unused-variable-qim.patch b/meta-snowyowl/recipes-kernel/linux/files/0178-crypto-ccp-remove-unused-variable-qim.patch new file mode 100755 index 00000000..eb02fe33 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0178-crypto-ccp-remove-unused-variable-qim.patch @@ -0,0 +1,41 @@ +From 42d4ce02a3b03ad632e5333c2671c2a571a72ac7 Mon Sep 17 00:00:00 2001 +From: Colin Ian King <colin.king@canonical.com> +Date: Thu, 12 Oct 2017 17:55:41 +0100 +Subject: [PATCH 323/331] crypto: ccp - remove unused variable qim + +Variable qim is assigned but never read, it is redundant and can +be removed. + +Cleans up clang warning: Value stored to 'qim' is never read + +Fixes: 4b394a232df7 ("crypto: ccp - Let a v5 CCP provide the same function as v3") +Signed-off-by: Colin Ian King <colin.king@canonical.com> +Acked-by: Gary R Hook <gary.hook@amd.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/crypto/ccp/ccp-dev-v5.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c +index 65604fc..44a4d27 100644 +--- a/drivers/crypto/ccp/ccp-dev-v5.c ++++ b/drivers/crypto/ccp/ccp-dev-v5.c +@@ -788,13 +788,12 @@ static int ccp5_init(struct ccp_device *ccp) + struct ccp_cmd_queue *cmd_q; + struct dma_pool *dma_pool; + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; +- unsigned int qmr, qim, i; ++ unsigned int qmr, i; + u64 status; + u32 status_lo, status_hi; + int ret; + + /* Find available queues */ +- qim = 0; + qmr = ioread32(ccp->io_regs + Q_MASK_REG); + for (i = 0; i < MAX_HW_QUEUES; i++) { + +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0179-crypto-ccp-use-ENOSPC-for-transient-busy-indication.patch b/meta-snowyowl/recipes-kernel/linux/files/0179-crypto-ccp-use-ENOSPC-for-transient-busy-indication.patch new file mode 100755 index 00000000..3d3358cd --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0179-crypto-ccp-use-ENOSPC-for-transient-busy-indication.patch @@ -0,0 +1,67 @@ +From 6328660d4d0a5d1233544a052be7ed21a0a4ad8b Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef <gilad@benyossef.com> +Date: Wed, 18 Oct 2017 08:00:34 +0100 +Subject: [PATCH 324/331] crypto: ccp - use -ENOSPC for transient busy + indication + +Replace -EBUSY with -ENOSPC when reporting transient busy +indication in the absence of backlog. + +Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com> +Reviewed-by: Gary R Hook <gary.hook@amd.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/crypto/ccp/ccp-crypto-main.c | 8 +++----- + drivers/crypto/ccp/ccp-dev.c | 7 +++++-- + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c +index 35a9de7..b95d199 100644 +--- a/drivers/crypto/ccp/ccp-crypto-main.c ++++ b/drivers/crypto/ccp/ccp-crypto-main.c +@@ -222,9 +222,10 @@ static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) + + /* Check if the cmd can/should be queued */ + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { +- ret = -EBUSY; +- if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) ++ if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) { ++ ret = -ENOSPC; + goto e_lock; ++ } + } + + /* Look for an entry with the same tfm. If there is a cmd +@@ -243,9 +244,6 @@ static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) + ret = ccp_enqueue_cmd(crypto_cmd->cmd); + if (!ccp_crypto_success(ret)) + goto e_lock; /* Error, don't queue it */ +- if ((ret == -EBUSY) && +- !(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) +- goto e_lock; /* Not backlogging, don't queue it */ + } + + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { +diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c +index 4e029b1..1b5035d 100644 +--- a/drivers/crypto/ccp/ccp-dev.c ++++ b/drivers/crypto/ccp/ccp-dev.c +@@ -292,9 +292,12 @@ int ccp_enqueue_cmd(struct ccp_cmd *cmd) + i = ccp->cmd_q_count; + + if (ccp->cmd_count >= MAX_CMD_QLEN) { +- ret = -EBUSY; +- if (cmd->flags & CCP_CMD_MAY_BACKLOG) ++ if (cmd->flags & CCP_CMD_MAY_BACKLOG) { ++ ret = -EBUSY; + list_add_tail(&cmd->entry, &ccp->backlog); ++ } else { ++ ret = -ENOSPC; ++ } + } else { + ret = -EINPROGRESS; + ccp->cmd_count++; +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0180-crypto-ccp-Build-the-AMD-secure-processor-driver-onl.patch b/meta-snowyowl/recipes-kernel/linux/files/0180-crypto-ccp-Build-the-AMD-secure-processor-driver-onl.patch new file mode 100755 index 00000000..53282c22 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0180-crypto-ccp-Build-the-AMD-secure-processor-driver-onl.patch @@ -0,0 +1,37 @@ +From 67e00a07d618698c2cca6704dabaa6276c6831b0 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Mon, 4 Dec 2017 10:57:26 -0600 +Subject: [PATCH 325/331] crypto: ccp: Build the AMD secure processor driver + only with AMD CPU support + +This is AMD-specific hardware so present it in Kconfig only when AMD +CPU support is enabled or on ARM64 where it is also used. + +Signed-off-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> +Reviewed-by: Gary R Hook <gary.hook@amd.com> +Cc: Brijesh Singh <brijesh.singh@amd.com> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: Gary Hook <gary.hook@amd.com> +Cc: Herbert Xu <herbert@gondor.apana.org.au> +Cc: "David S. Miller" <davem@davemloft.net> +Cc: linux-crypto@vger.kernel.org +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/crypto/ccp/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig +index 6d62660..9c84f98 100644 +--- a/drivers/crypto/ccp/Kconfig ++++ b/drivers/crypto/ccp/Kconfig +@@ -1,5 +1,6 @@ + config CRYPTO_DEV_CCP_DD + tristate "Secure Processor device driver" ++ depends on CPU_SUP_AMD || ARM64 + default m + help + Provides AMD Secure Processor device driver. +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0181-crypto-ccp-Add-Platform-Security-Processor-PSP-devic.patch b/meta-snowyowl/recipes-kernel/linux/files/0181-crypto-ccp-Add-Platform-Security-Processor-PSP-devic.patch new file mode 100755 index 00000000..f40f4711 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0181-crypto-ccp-Add-Platform-Security-Processor-PSP-devic.patch @@ -0,0 +1,462 @@ +From 899458d8cedd0af7cf3e5fdbd1dbe50547b68db3 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh <brijesh.singh@amd.com> +Date: Mon, 4 Dec 2017 10:57:28 -0600 +Subject: [PATCH 326/331] crypto: ccp: Add Platform Security Processor (PSP) + device support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The Platform Security Processor (PSP) is part of the AMD Secure +Processor (AMD-SP) functionality. The PSP is a dedicated processor +that provides support for key management commands in Secure Encrypted +Virtualization (SEV) mode, along with software-based Trusted Execution +Environment (TEE) to enable third-party trusted applications. + +Note that the key management functionality provided by the SEV firmware +can be used outside of the kvm-amd driver hence it doesn't need to +depend on CONFIG_KVM_AMD. + +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: "Radim Krčmář" <rkrcmar@redhat.com> +Cc: Borislav Petkov <bp@suse.de> +Cc: Herbert Xu <herbert@gondor.apana.org.au> +Cc: Gary Hook <gary.hook@amd.com> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: linux-crypto@vger.kernel.org +Cc: kvm@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Improvements-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> +Reviewed-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/crypto/ccp/Kconfig | 11 +++++ + drivers/crypto/ccp/Makefile | 1 + + drivers/crypto/ccp/psp-dev.c | 105 +++++++++++++++++++++++++++++++++++++++++++ + drivers/crypto/ccp/psp-dev.h | 59 ++++++++++++++++++++++++ + drivers/crypto/ccp/sp-dev.c | 26 +++++++++++ + drivers/crypto/ccp/sp-dev.h | 24 +++++++++- + drivers/crypto/ccp/sp-pci.c | 52 +++++++++++++++++++++ + 7 files changed, 277 insertions(+), 1 deletion(-) + create mode 100644 drivers/crypto/ccp/psp-dev.c + create mode 100644 drivers/crypto/ccp/psp-dev.h + +diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig +index 9c84f98..b9dfae4 100644 +--- a/drivers/crypto/ccp/Kconfig ++++ b/drivers/crypto/ccp/Kconfig +@@ -33,3 +33,14 @@ config CRYPTO_DEV_CCP_CRYPTO + Support for using the cryptographic API with the AMD Cryptographic + Coprocessor. This module supports offload of SHA and AES algorithms. + If you choose 'M' here, this module will be called ccp_crypto. ++ ++config CRYPTO_DEV_SP_PSP ++ bool "Platform Security Processor (PSP) device" ++ default y ++ depends on CRYPTO_DEV_CCP_DD && X86_64 ++ help ++ Provide support for the AMD Platform Security Processor (PSP). ++ The PSP is a dedicated processor that provides support for key ++ management commands in Secure Encrypted Virtualization (SEV) mode, ++ along with software-based Trusted Execution Environment (TEE) to ++ enable third-party trusted applications. +diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile +index 57f8deb..008bae7 100644 +--- a/drivers/crypto/ccp/Makefile ++++ b/drivers/crypto/ccp/Makefile +@@ -7,6 +7,7 @@ ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \ + ccp-dmaengine.o \ + ccp-debugfs.o + ccp-$(CONFIG_PCI) += sp-pci.o ++ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o + + obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o + ccp-crypto-objs := ccp-crypto-main.o \ +diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c +new file mode 100644 +index 0000000..b5789f8 +--- /dev/null ++++ b/drivers/crypto/ccp/psp-dev.c +@@ -0,0 +1,105 @@ ++/* ++ * AMD Platform Security Processor (PSP) interface ++ * ++ * Copyright (C) 2016-2017 Advanced Micro Devices, Inc. ++ * ++ * Author: Brijesh Singh <brijesh.singh@amd.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include <linux/module.h> ++#include <linux/kernel.h> ++#include <linux/kthread.h> ++#include <linux/sched.h> ++#include <linux/interrupt.h> ++#include <linux/spinlock.h> ++#include <linux/spinlock_types.h> ++#include <linux/types.h> ++#include <linux/mutex.h> ++#include <linux/delay.h> ++#include <linux/hw_random.h> ++#include <linux/ccp.h> ++ ++#include "sp-dev.h" ++#include "psp-dev.h" ++ ++static struct psp_device *psp_alloc_struct(struct sp_device *sp) ++{ ++ struct device *dev = sp->dev; ++ struct psp_device *psp; ++ ++ psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL); ++ if (!psp) ++ return NULL; ++ ++ psp->dev = dev; ++ psp->sp = sp; ++ ++ snprintf(psp->name, sizeof(psp->name), "psp-%u", sp->ord); ++ ++ return psp; ++} ++ ++static irqreturn_t psp_irq_handler(int irq, void *data) ++{ ++ return IRQ_HANDLED; ++} ++ ++int psp_dev_init(struct sp_device *sp) ++{ ++ struct device *dev = sp->dev; ++ struct psp_device *psp; ++ int ret; ++ ++ ret = -ENOMEM; ++ psp = psp_alloc_struct(sp); ++ if (!psp) ++ goto e_err; ++ ++ sp->psp_data = psp; ++ ++ psp->vdata = (struct psp_vdata *)sp->dev_vdata->psp_vdata; ++ if (!psp->vdata) { ++ ret = -ENODEV; ++ dev_err(dev, "missing driver data\n"); ++ goto e_err; ++ } ++ ++ psp->io_regs = sp->io_map + psp->vdata->offset; ++ ++ /* Disable and clear interrupts until ready */ ++ iowrite32(0, psp->io_regs + PSP_P2CMSG_INTEN); ++ iowrite32(-1, psp->io_regs + PSP_P2CMSG_INTSTS); ++ ++ /* Request an irq */ ++ ret = sp_request_psp_irq(psp->sp, psp_irq_handler, psp->name, psp); ++ if (ret) { ++ dev_err(dev, "psp: unable to allocate an IRQ\n"); ++ goto e_err; ++ } ++ ++ if (sp->set_psp_master_device) ++ sp->set_psp_master_device(sp); ++ ++ /* Enable interrupt */ ++ iowrite32(-1, psp->io_regs + PSP_P2CMSG_INTEN); ++ ++ return 0; ++ ++e_err: ++ sp->psp_data = NULL; ++ ++ dev_notice(dev, "psp initialization failed\n"); ++ ++ return ret; ++} ++ ++void psp_dev_destroy(struct sp_device *sp) ++{ ++ struct psp_device *psp = sp->psp_data; ++ ++ sp_free_psp_irq(sp, psp); ++} +diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h +new file mode 100644 +index 0000000..55b7808 +--- /dev/null ++++ b/drivers/crypto/ccp/psp-dev.h +@@ -0,0 +1,59 @@ ++/* ++ * AMD Platform Security Processor (PSP) interface driver ++ * ++ * Copyright (C) 2017 Advanced Micro Devices, Inc. ++ * ++ * Author: Brijesh Singh <brijesh.singh@amd.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef __PSP_DEV_H__ ++#define __PSP_DEV_H__ ++ ++#include <linux/device.h> ++#include <linux/pci.h> ++#include <linux/spinlock.h> ++#include <linux/mutex.h> ++#include <linux/list.h> ++#include <linux/wait.h> ++#include <linux/dmapool.h> ++#include <linux/hw_random.h> ++#include <linux/bitops.h> ++#include <linux/interrupt.h> ++#include <linux/irqreturn.h> ++#include <linux/dmaengine.h> ++ ++#include "sp-dev.h" ++ ++#define PSP_P2CMSG_INTEN 0x0110 ++#define PSP_P2CMSG_INTSTS 0x0114 ++ ++#define PSP_C2PMSG_ATTR_0 0x0118 ++#define PSP_C2PMSG_ATTR_1 0x011c ++#define PSP_C2PMSG_ATTR_2 0x0120 ++#define PSP_C2PMSG_ATTR_3 0x0124 ++#define PSP_P2CMSG_ATTR_0 0x0128 ++ ++#define PSP_CMDRESP_CMD_SHIFT 16 ++#define PSP_CMDRESP_IOC BIT(0) ++#define PSP_CMDRESP_RESP BIT(31) ++#define PSP_CMDRESP_ERR_MASK 0xffff ++ ++#define MAX_PSP_NAME_LEN 16 ++ ++struct psp_device { ++ struct list_head entry; ++ ++ struct psp_vdata *vdata; ++ char name[MAX_PSP_NAME_LEN]; ++ ++ struct device *dev; ++ struct sp_device *sp; ++ ++ void __iomem *io_regs; ++}; ++ ++#endif /* __PSP_DEV_H */ +diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c +index bef387c8..cf101c0 100644 +--- a/drivers/crypto/ccp/sp-dev.c ++++ b/drivers/crypto/ccp/sp-dev.c +@@ -198,6 +198,8 @@ int sp_init(struct sp_device *sp) + if (sp->dev_vdata->ccp_vdata) + ccp_dev_init(sp); + ++ if (sp->dev_vdata->psp_vdata) ++ psp_dev_init(sp); + return 0; + } + +@@ -206,6 +208,9 @@ void sp_destroy(struct sp_device *sp) + if (sp->dev_vdata->ccp_vdata) + ccp_dev_destroy(sp); + ++ if (sp->dev_vdata->psp_vdata) ++ psp_dev_destroy(sp); ++ + sp_del_device(sp); + } + +@@ -237,6 +242,27 @@ int sp_resume(struct sp_device *sp) + } + #endif + ++struct sp_device *sp_get_psp_master_device(void) ++{ ++ struct sp_device *i, *ret = NULL; ++ unsigned long flags; ++ ++ write_lock_irqsave(&sp_unit_lock, flags); ++ if (list_empty(&sp_units)) ++ goto unlock; ++ ++ list_for_each_entry(i, &sp_units, entry) { ++ if (i->psp_data) ++ break; ++ } ++ ++ if (i->get_psp_master_device) ++ ret = i->get_psp_master_device(); ++unlock: ++ write_unlock_irqrestore(&sp_unit_lock, flags); ++ return ret; ++} ++ + static int __init sp_mod_init(void) + { + #ifdef CONFIG_X86 +diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h +index 5ab486a..909cf3e 100644 +--- a/drivers/crypto/ccp/sp-dev.h ++++ b/drivers/crypto/ccp/sp-dev.h +@@ -42,12 +42,17 @@ struct ccp_vdata { + const unsigned int offset; + const unsigned int rsamax; + }; ++ ++struct psp_vdata { ++ const unsigned int offset; ++}; ++ + /* Structure to hold SP device data */ + struct sp_dev_vdata { + const unsigned int bar; + + const struct ccp_vdata *ccp_vdata; +- void *psp_vdata; ++ const struct psp_vdata *psp_vdata; + }; + + struct sp_device { +@@ -68,6 +73,10 @@ struct sp_device { + /* DMA caching attribute support */ + unsigned int axcache; + ++ /* get and set master device */ ++ struct sp_device*(*get_psp_master_device)(void); ++ void (*set_psp_master_device)(struct sp_device *); ++ + bool irq_registered; + bool use_tasklet; + +@@ -103,6 +112,7 @@ void sp_free_ccp_irq(struct sp_device *sp, void *data); + int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data); + void sp_free_psp_irq(struct sp_device *sp, void *data); ++struct sp_device *sp_get_psp_master_device(void); + + #ifdef CONFIG_CRYPTO_DEV_SP_CCP + +@@ -130,4 +140,16 @@ static inline int ccp_dev_resume(struct sp_device *sp) + } + #endif /* CONFIG_CRYPTO_DEV_SP_CCP */ + ++#ifdef CONFIG_CRYPTO_DEV_SP_PSP ++ ++int psp_dev_init(struct sp_device *sp); ++void psp_dev_destroy(struct sp_device *sp); ++ ++#else /* !CONFIG_CRYPTO_DEV_SP_PSP */ ++ ++static inline int psp_dev_init(struct sp_device *sp) { return 0; } ++static inline void psp_dev_destroy(struct sp_device *sp) { } ++ ++#endif /* CONFIG_CRYPTO_DEV_SP_PSP */ ++ + #endif +diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c +index 9859aa6..f5f43c5 100644 +--- a/drivers/crypto/ccp/sp-pci.c ++++ b/drivers/crypto/ccp/sp-pci.c +@@ -25,6 +25,7 @@ + #include <linux/ccp.h> + + #include "ccp-dev.h" ++#include "psp-dev.h" + + #define MSIX_VECTORS 2 + +@@ -32,6 +33,7 @@ struct sp_pci { + int msix_count; + struct msix_entry msix_entry[MSIX_VECTORS]; + }; ++static struct sp_device *sp_dev_master; + + static int sp_get_msix_irqs(struct sp_device *sp) + { +@@ -108,6 +110,45 @@ static void sp_free_irqs(struct sp_device *sp) + sp->psp_irq = 0; + } + ++static bool sp_pci_is_master(struct sp_device *sp) ++{ ++ struct device *dev_cur, *dev_new; ++ struct pci_dev *pdev_cur, *pdev_new; ++ ++ dev_new = sp->dev; ++ dev_cur = sp_dev_master->dev; ++ ++ pdev_new = to_pci_dev(dev_new); ++ pdev_cur = to_pci_dev(dev_cur); ++ ++ if (pdev_new->bus->number < pdev_cur->bus->number) ++ return true; ++ ++ if (PCI_SLOT(pdev_new->devfn) < PCI_SLOT(pdev_cur->devfn)) ++ return true; ++ ++ if (PCI_FUNC(pdev_new->devfn) < PCI_FUNC(pdev_cur->devfn)) ++ return true; ++ ++ return false; ++} ++ ++static void psp_set_master(struct sp_device *sp) ++{ ++ if (!sp_dev_master) { ++ sp_dev_master = sp; ++ return; ++ } ++ ++ if (sp_pci_is_master(sp)) ++ sp_dev_master = sp; ++} ++ ++static struct sp_device *psp_get_master(void) ++{ ++ return sp_dev_master; ++} ++ + static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) + { + struct sp_device *sp; +@@ -166,6 +207,8 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) + goto e_err; + + pci_set_master(pdev); ++ sp->set_psp_master_device = psp_set_master; ++ sp->get_psp_master_device = psp_get_master; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { +@@ -225,6 +268,12 @@ static int sp_pci_resume(struct pci_dev *pdev) + } + #endif + ++#ifdef CONFIG_CRYPTO_DEV_SP_PSP ++static const struct psp_vdata psp_entry = { ++ .offset = 0x10500, ++}; ++#endif ++ + static const struct sp_dev_vdata dev_vdata[] = { + { + .bar = 2, +@@ -237,6 +286,9 @@ static const struct sp_dev_vdata dev_vdata[] = { + #ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv5a, + #endif ++#ifdef CONFIG_CRYPTO_DEV_SP_PSP ++ .psp_vdata = &psp_entry ++#endif + }, + { + .bar = 2, +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0182-crypto-ccp-Define-SEV-userspace-ioctl-and-command-id.patch b/meta-snowyowl/recipes-kernel/linux/files/0182-crypto-ccp-Define-SEV-userspace-ioctl-and-command-id.patch new file mode 100755 index 00000000..ce7c959a --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0182-crypto-ccp-Define-SEV-userspace-ioctl-and-command-id.patch @@ -0,0 +1,182 @@ +From ad8faa2762dbb0a5cfe803bb9e442f911944b975 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh <brijesh.singh@amd.com> +Date: Mon, 4 Dec 2017 10:57:27 -0600 +Subject: [PATCH 327/331] crypto: ccp: Define SEV userspace ioctl and command + id +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add a include file which defines the ioctl and command id used for +issuing SEV platform management specific commands. + +Cc: Paolo Bonzini <pbonzini@redhat.com> +Cc: "Radim Krčmář" <rkrcmar@redhat.com> +Cc: Borislav Petkov <bp@suse.de> +Cc: Herbert Xu <herbert@gondor.apana.org.au> +Cc: Gary Hook <gary.hook@amd.com> +Cc: Tom Lendacky <thomas.lendacky@amd.com> +Cc: linux-crypto@vger.kernel.org +Cc: kvm@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Improvements-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> +Reviewed-by: Borislav Petkov <bp@suse.de> +Acked-by: Gary R Hook <gary.hook@amd.com> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + include/uapi/linux/psp-sev.h | 142 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 142 insertions(+) + create mode 100644 include/uapi/linux/psp-sev.h + +diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h +new file mode 100644 +index 0000000..3d77fe9 +--- /dev/null ++++ b/include/uapi/linux/psp-sev.h +@@ -0,0 +1,142 @@ ++/* ++ * Userspace interface for AMD Secure Encrypted Virtualization (SEV) ++ * platform management commands. ++ * ++ * Copyright (C) 2016-2017 Advanced Micro Devices, Inc. ++ * ++ * Author: Brijesh Singh <brijesh.singh@amd.com> ++ * ++ * SEV spec 0.14 is available at: ++ * http://support.amd.com/TechDocs/55766_SEV-KM%20API_Specification.pdf ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef __PSP_SEV_USER_H__ ++#define __PSP_SEV_USER_H__ ++ ++#include <linux/types.h> ++ ++/** ++ * SEV platform commands ++ */ ++enum { ++ SEV_FACTORY_RESET = 0, ++ SEV_PLATFORM_STATUS, ++ SEV_PEK_GEN, ++ SEV_PEK_CSR, ++ SEV_PDH_GEN, ++ SEV_PDH_CERT_EXPORT, ++ SEV_PEK_CERT_IMPORT, ++ ++ SEV_MAX, ++}; ++ ++/** ++ * SEV Firmware status code ++ */ ++typedef enum { ++ SEV_RET_SUCCESS = 0, ++ SEV_RET_INVALID_PLATFORM_STATE, ++ SEV_RET_INVALID_GUEST_STATE, ++ SEV_RET_INAVLID_CONFIG, ++ SEV_RET_INVALID_len, ++ SEV_RET_ALREADY_OWNED, ++ SEV_RET_INVALID_CERTIFICATE, ++ SEV_RET_POLICY_FAILURE, ++ SEV_RET_INACTIVE, ++ SEV_RET_INVALID_ADDRESS, ++ SEV_RET_BAD_SIGNATURE, ++ SEV_RET_BAD_MEASUREMENT, ++ SEV_RET_ASID_OWNED, ++ SEV_RET_INVALID_ASID, ++ SEV_RET_WBINVD_REQUIRED, ++ SEV_RET_DFFLUSH_REQUIRED, ++ SEV_RET_INVALID_GUEST, ++ SEV_RET_INVALID_COMMAND, ++ SEV_RET_ACTIVE, ++ SEV_RET_HWSEV_RET_PLATFORM, ++ SEV_RET_HWSEV_RET_UNSAFE, ++ SEV_RET_UNSUPPORTED, ++ SEV_RET_MAX, ++} sev_ret_code; ++ ++/** ++ * struct sev_user_data_status - PLATFORM_STATUS command parameters ++ * ++ * @major: major API version ++ * @minor: minor API version ++ * @state: platform state ++ * @flags: platform config flags ++ * @build: firmware build id for API version ++ * @guest_count: number of active guests ++ */ ++struct sev_user_data_status { ++ __u8 api_major; /* Out */ ++ __u8 api_minor; /* Out */ ++ __u8 state; /* Out */ ++ __u32 flags; /* Out */ ++ __u8 build; /* Out */ ++ __u32 guest_count; /* Out */ ++} __packed; ++ ++/** ++ * struct sev_user_data_pek_csr - PEK_CSR command parameters ++ * ++ * @address: PEK certificate chain ++ * @length: length of certificate ++ */ ++struct sev_user_data_pek_csr { ++ __u64 address; /* In */ ++ __u32 length; /* In/Out */ ++} __packed; ++ ++/** ++ * struct sev_user_data_cert_import - PEK_CERT_IMPORT command parameters ++ * ++ * @pek_address: PEK certificate chain ++ * @pek_len: length of PEK certificate ++ * @oca_address: OCA certificate chain ++ * @oca_len: length of OCA certificate ++ */ ++struct sev_user_data_pek_cert_import { ++ __u64 pek_cert_address; /* In */ ++ __u32 pek_cert_len; /* In */ ++ __u64 oca_cert_address; /* In */ ++ __u32 oca_cert_len; /* In */ ++} __packed; ++ ++/** ++ * struct sev_user_data_pdh_cert_export - PDH_CERT_EXPORT command parameters ++ * ++ * @pdh_address: PDH certificate address ++ * @pdh_len: length of PDH certificate ++ * @cert_chain_address: PDH certificate chain ++ * @cert_chain_len: length of PDH certificate chain ++ */ ++struct sev_user_data_pdh_cert_export { ++ __u64 pdh_cert_address; /* In */ ++ __u32 pdh_cert_len; /* In/Out */ ++ __u64 cert_chain_address; /* In */ ++ __u32 cert_chain_len; /* In/Out */ ++} __packed; ++ ++/** ++ * struct sev_issue_cmd - SEV ioctl parameters ++ * ++ * @cmd: SEV commands to execute ++ * @opaque: pointer to the command structure ++ * @error: SEV FW return code on failure ++ */ ++struct sev_issue_cmd { ++ __u32 cmd; /* In */ ++ __u64 data; /* In */ ++ __u32 error; /* Out */ ++} __packed; ++ ++#define SEV_IOC_TYPE 'S' ++#define SEV_ISSUE_CMD _IOWR(SEV_IOC_TYPE, 0x0, struct sev_issue_cmd) ++ ++#endif /* __PSP_USER_SEV_H */ +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0183-mqueue-fix-a-use-after-free-in-sys_mq_notify.patch b/meta-snowyowl/recipes-kernel/linux/files/0183-mqueue-fix-a-use-after-free-in-sys_mq_notify.patch new file mode 100755 index 00000000..8e6f87ea --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0183-mqueue-fix-a-use-after-free-in-sys_mq_notify.patch @@ -0,0 +1,52 @@ +From 9e08c00a65d1228febf7e9b221b5c923e14705f6 Mon Sep 17 00:00:00 2001 +From: Cong Wang <xiyou.wangcong@gmail.com> +Date: Sun, 9 Jul 2017 13:19:55 -0700 +Subject: [PATCH 049/331] mqueue: fix a use-after-free in sys_mq_notify() + +commit f991af3daabaecff34684fd51fac80319d1baad1 upstream. + +The retry logic for netlink_attachskb() inside sys_mq_notify() +is nasty and vulnerable: + +1) The sock refcnt is already released when retry is needed +2) The fd is controllable by user-space because we already + release the file refcnt + +so we when retry but the fd has been just closed by user-space +during this small window, we end up calling netlink_detachskb() +on the error path which releases the sock again, later when +the user-space closes this socket a use-after-free could be +triggered. + +Setting 'sock' to NULL here should be sufficient to fix it. + +Reported-by: GeneBlue <geneblue.mail@gmail.com> +Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Manfred Spraul <manfred@colorfullife.com> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + ipc/mqueue.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/ipc/mqueue.c b/ipc/mqueue.c +index 8cbd6e6..28a142f 100644 +--- a/ipc/mqueue.c ++++ b/ipc/mqueue.c +@@ -1249,8 +1249,10 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, + + timeo = MAX_SCHEDULE_TIMEOUT; + ret = netlink_attachskb(sock, nc, &timeo, NULL); +- if (ret == 1) ++ if (ret == 1) { ++ sock = NULL; + goto retry; ++ } + if (ret) { + sock = NULL; + nc = NULL; +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0184-i2c-designware-Fix-system-suspend.patch b/meta-snowyowl/recipes-kernel/linux/files/0184-i2c-designware-Fix-system-suspend.patch new file mode 100755 index 00000000..d03304cb --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0184-i2c-designware-Fix-system-suspend.patch @@ -0,0 +1,95 @@ +From c446f04b979d45674b2d0d0b560a40c81119fa05 Mon Sep 17 00:00:00 2001 +From: Ulf Hansson <ulf.hansson@linaro.org> +Date: Wed, 9 Aug 2017 15:28:22 +0200 +Subject: [PATCH 069/331] i2c: designware: Fix system suspend + +commit a23318feeff662c8d25d21623daebdd2e55ec221 upstream. + +The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming +during system suspend"), may suggest to the PM core to try out the so +called direct_complete path for system sleep. In this path, the PM core +treats a runtime suspended device as it's already in a proper low power +state for system sleep, which makes it skip calling the system sleep +callbacks for the device, except for the ->prepare() and the ->complete() +callbacks. + +However, the PM core may unset the direct_complete flag for a parent +device, in case its child device are being system suspended before. In this +scenario, the PM core invokes the system sleep callbacks, no matter if the +device is runtime suspended or not. + +Particularly in cases of an existing i2c slave device, the above path is +triggered, which breaks the assumption that the i2c device is always +runtime resumed whenever the dw_i2c_plat_suspend() is being called. + +More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and +clk_core_unprepare(), for an already disabled/unprepared clock, leading to +a splat in the log about clocks calls being wrongly balanced and breaking +system sleep. + +To still allow the direct_complete path in cases when it's possible, but +also to keep the fix simple, let's runtime resume the i2c device in the +->suspend() callback, before continuing to put the device into low power +state. + +Note, in cases when the i2c device is attached to the ACPI PM domain, this +problem doesn't occur, because ACPI's ->suspend() callback, assigned to +acpi_subsys_suspend(), already calls pm_runtime_resume() for the device. + +It should also be noted that this change does not fix commit 8503ff166504 +("i2c: designware: Avoid unnecessary resuming during system suspend"). +Because for the non-ACPI case, the system sleep support was already broken +prior that point. + +Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org> +Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> +Tested-by: John Stultz <john.stultz@linaro.org> +Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com> +Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com> +Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com> +Signed-off-by: Wolfram Sang <wsa@the-dreams.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/i2c/busses/i2c-designware-platdrv.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c +index 0b42a12..b42d95f 100644 +--- a/drivers/i2c/busses/i2c-designware-platdrv.c ++++ b/drivers/i2c/busses/i2c-designware-platdrv.c +@@ -319,7 +319,7 @@ static void dw_i2c_plat_complete(struct device *dev) + #endif + + #ifdef CONFIG_PM +-static int dw_i2c_plat_suspend(struct device *dev) ++static int dw_i2c_plat_runtime_suspend(struct device *dev) + { + struct platform_device *pdev = to_platform_device(dev); + struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev); +@@ -343,11 +343,21 @@ static int dw_i2c_plat_resume(struct device *dev) + return 0; + } + ++#ifdef CONFIG_PM_SLEEP ++static int dw_i2c_plat_suspend(struct device *dev) ++{ ++ pm_runtime_resume(dev); ++ return dw_i2c_plat_runtime_suspend(dev); ++} ++#endif ++ + static const struct dev_pm_ops dw_i2c_dev_pm_ops = { + .prepare = dw_i2c_plat_prepare, + .complete = dw_i2c_plat_complete, + SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume) +- SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL) ++ SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, ++ dw_i2c_plat_resume, ++ NULL) + }; + + #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops) +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0185-iommu-dma-Don-t-reserve-PCI-I-O-windows.patch b/meta-snowyowl/recipes-kernel/linux/files/0185-iommu-dma-Don-t-reserve-PCI-I-O-windows.patch new file mode 100755 index 00000000..eeb002de --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0185-iommu-dma-Don-t-reserve-PCI-I-O-windows.patch @@ -0,0 +1,39 @@ +From 2f2373b38365aa0e21b45cfed35b830dccca4257 Mon Sep 17 00:00:00 2001 +From: Robin Murphy <robin.murphy@arm.com> +Date: Thu, 16 Mar 2017 17:00:17 +0000 +Subject: [PATCH 038/331] iommu/dma: Don't reserve PCI I/O windows + +commit 938f1bbe35e3a7cb07e1fa7c512e2ef8bb866bdf upstream. + +Even if a host controller's CPU-side MMIO windows into PCI I/O space do +happen to leak into PCI memory space such that it might treat them as +peer addresses, trying to reserve the corresponding I/O space addresses +doesn't do anything to help solve that problem. Stop doing a silly thing. + +Fixes: fade1ec055dc ("iommu/dma: Avoid PCI host bridge windows") +Reviewed-by: Eric Auger <eric.auger@redhat.com> +Signed-off-by: Robin Murphy <robin.murphy@arm.com> +Signed-off-by: Joerg Roedel <jroedel@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/iommu/dma-iommu.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c +index c5ab866..1520e7f 100644 +--- a/drivers/iommu/dma-iommu.c ++++ b/drivers/iommu/dma-iommu.c +@@ -112,8 +112,7 @@ static void iova_reserve_pci_windows(struct pci_dev *dev, + unsigned long lo, hi; + + resource_list_for_each_entry(window, &bridge->windows) { +- if (resource_type(window->res) != IORESOURCE_MEM && +- resource_type(window->res) != IORESOURCE_IO) ++ if (resource_type(window->res) != IORESOURCE_MEM) + continue; + + lo = iova_pfn(iovad, window->res->start - window->offset); +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0186-iommu-amd-Fix-incorrect-error-handling-in-amd_iommu_.patch b/meta-snowyowl/recipes-kernel/linux/files/0186-iommu-amd-Fix-incorrect-error-handling-in-amd_iommu_.patch new file mode 100755 index 00000000..ae49a732 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0186-iommu-amd-Fix-incorrect-error-handling-in-amd_iommu_.patch @@ -0,0 +1,41 @@ +From fecc4e051f43d6bc2d46d6226b7a036147226d5e Mon Sep 17 00:00:00 2001 +From: Pan Bian <bianpan2016@163.com> +Date: Sun, 23 Apr 2017 18:23:21 +0800 +Subject: [PATCH 039/331] iommu/amd: Fix incorrect error handling in + amd_iommu_bind_pasid() + +commit 73dbd4a4230216b6a5540a362edceae0c9b4876b upstream. + +In function amd_iommu_bind_pasid(), the control flow jumps +to label out_free when pasid_state->mm and mm is NULL. And +mmput(mm) is called. In function mmput(mm), mm is +referenced without validation. This will result in a NULL +dereference bug. This patch fixes the bug. + +Signed-off-by: Pan Bian <bianpan2016@163.com> +Fixes: f0aac63b873b ('iommu/amd: Don't hold a reference to mm_struct') +Signed-off-by: Joerg Roedel <jroedel@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/iommu/amd_iommu_v2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c +index f8ed8c9..a0b4ac6 100644 +--- a/drivers/iommu/amd_iommu_v2.c ++++ b/drivers/iommu/amd_iommu_v2.c +@@ -695,9 +695,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, + + out_unregister: + mmu_notifier_unregister(&pasid_state->mn, mm); ++ mmput(mm); + + out_free: +- mmput(mm); + free_pasid_state(pasid_state); + + out: +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0187-iommu-amd-Fix-interrupt-remapping-when-disable-guest.patch b/meta-snowyowl/recipes-kernel/linux/files/0187-iommu-amd-Fix-interrupt-remapping-when-disable-guest.patch new file mode 100755 index 00000000..0afa767f --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0187-iommu-amd-Fix-interrupt-remapping-when-disable-guest.patch @@ -0,0 +1,59 @@ +From 59a13724c78977c316184ee4efbf00e82f70fe50 Mon Sep 17 00:00:00 2001 +From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> +Date: Mon, 26 Jun 2017 04:28:04 -0500 +Subject: [PATCH 040/331] iommu/amd: Fix interrupt remapping when disable + guest_mode + +commit 84a21dbdef0b96d773599c33c2afbb002198d303 upstream. + +Pass-through devices to VM guest can get updated IRQ affinity +information via irq_set_affinity() when not running in guest mode. +Currently, AMD IOMMU driver in GA mode ignores the updated information +if the pass-through device is setup to use vAPIC regardless of guest_mode. +This could cause invalid interrupt remapping. + +Also, the guest_mode bit should be set and cleared only when +SVM updates posted-interrupt interrupt remapping information. + +Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> +Cc: Joerg Roedel <jroedel@suse.de> +Fixes: d98de49a53e48 ('iommu/amd: Enable vAPIC interrupt remapping mode by default') +Signed-off-by: Joerg Roedel <jroedel@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/iommu/amd_iommu.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c +index 11a13b5..41800b6 100644 +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3857,11 +3857,9 @@ static void irte_ga_prepare(void *entry, + u8 vector, u32 dest_apicid, int devid) + { + struct irte_ga *irte = (struct irte_ga *) entry; +- struct iommu_dev_data *dev_data = search_dev_data(devid); + + irte->lo.val = 0; + irte->hi.val = 0; +- irte->lo.fields_remap.guest_mode = dev_data ? dev_data->use_vapic : 0; + irte->lo.fields_remap.int_type = delivery_mode; + irte->lo.fields_remap.dm = dest_mode; + irte->hi.fields.vector = vector; +@@ -3917,10 +3915,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, + struct irte_ga *irte = (struct irte_ga *) entry; + struct iommu_dev_data *dev_data = search_dev_data(devid); + +- if (!dev_data || !dev_data->use_vapic) { ++ if (!dev_data || !dev_data->use_vapic || ++ !irte->lo.fields_remap.guest_mode) { + irte->hi.fields.vector = vector; + irte->lo.fields_remap.destination = dest_apicid; +- irte->lo.fields_remap.guest_mode = 0; + modify_irte_ga(devid, index, irte, NULL); + } + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0188-iommu-amd-Enable-ga_log_intr-when-enabling-guest_mod.patch b/meta-snowyowl/recipes-kernel/linux/files/0188-iommu-amd-Enable-ga_log_intr-when-enabling-guest_mod.patch new file mode 100755 index 00000000..f39df00d --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0188-iommu-amd-Enable-ga_log_intr-when-enabling-guest_mod.patch @@ -0,0 +1,37 @@ +From 91b5a0c0fb7ffdfd726ec048649b499d310708d9 Mon Sep 17 00:00:00 2001 +From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> +Date: Wed, 5 Jul 2017 21:29:59 -0500 +Subject: [PATCH 061/331] iommu/amd: Enable ga_log_intr when enabling + guest_mode + +commit efe6f241602cb61466895f6816b8ea6b90f04d4e upstream. + +IRTE[GALogIntr] bit should set when enabling guest_mode, which enables +IOMMU to generate entry in GALog when IRTE[IsRun] is not set, and send +an interrupt to notify IOMMU driver. + +Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> +Cc: Joerg Roedel <jroedel@suse.de> +Fixes: d98de49a53e48 ('iommu/amd: Enable vAPIC interrupt remapping mode by default') +Signed-off-by: Joerg Roedel <jroedel@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/iommu/amd_iommu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c +index 41800b6..c380b7e 100644 +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -4294,6 +4294,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) + /* Setting */ + irte->hi.fields.ga_root_ptr = (pi_data->base >> 12); + irte->hi.fields.vector = vcpu_pi_info->vector; ++ irte->lo.fields_vapic.ga_log_intr = 1; + irte->lo.fields_vapic.guest_mode = 1; + irte->lo.fields_vapic.ga_tag = pi_data->ga_tag; + +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0189-iommu-amd-Finish-TLB-flush-in-amd_iommu_unmap.patch b/meta-snowyowl/recipes-kernel/linux/files/0189-iommu-amd-Finish-TLB-flush-in-amd_iommu_unmap.patch new file mode 100755 index 00000000..f9ec60c1 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0189-iommu-amd-Finish-TLB-flush-in-amd_iommu_unmap.patch @@ -0,0 +1,34 @@ +From cda810e9b68656102c4d12c4016b9ea76e45604d Mon Sep 17 00:00:00 2001 +From: Joerg Roedel <jroedel@suse.de> +Date: Fri, 13 Oct 2017 14:32:37 +0200 +Subject: [PATCH 086/331] iommu/amd: Finish TLB flush in amd_iommu_unmap() + +commit ce76353f169a6471542d999baf3d29b121dce9c0 upstream. + +The function only sends the flush command to the IOMMU(s), +but does not wait for its completion when it returns. Fix +that. + +Fixes: 601367d76bd1 ('x86/amd-iommu: Remove iommu_flush_domain function') +Signed-off-by: Joerg Roedel <jroedel@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/iommu/amd_iommu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c +index c380b7e..1a0b110 100644 +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3120,6 +3120,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, + mutex_unlock(&domain->api_lock); + + domain_flush_tlb_pde(domain); ++ domain_flush_complete(domain); + + return unmap_size; + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0190-iommu-amd-Limit-the-IOVA-page-range-to-the-specified.patch b/meta-snowyowl/recipes-kernel/linux/files/0190-iommu-amd-Limit-the-IOVA-page-range-to-the-specified.patch new file mode 100755 index 00000000..9cc8e243 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0190-iommu-amd-Limit-the-IOVA-page-range-to-the-specified.patch @@ -0,0 +1,38 @@ +From 4e6907cae5feafc672ab00bab42fbf05e646496a Mon Sep 17 00:00:00 2001 +From: Gary R Hook <gary.hook@amd.com> +Date: Fri, 3 Nov 2017 10:50:34 -0600 +Subject: [PATCH 098/331] iommu/amd: Limit the IOVA page range to the specified + addresses + +[ Upstream commit b92b4fb5c14257c0e7eae291ecc1f7b1962e1699 ] + +The extent of pages specified when applying a reserved region should +include up to the last page of the range, but not the page following +the range. + +Signed-off-by: Gary R Hook <gary.hook@amd.com> +Fixes: 8d54d6c8b8f3 ('iommu/amd: Implement apply_dm_region call-back') +Signed-off-by: Alex Williamson <alex.williamson@redhat.com> +Signed-off-by: Sasha Levin <alexander.levin@verizon.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + drivers/iommu/amd_iommu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c +index 1a0b110..0c910a8 100644 +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3211,7 +3211,7 @@ static void amd_iommu_apply_dm_region(struct device *dev, + unsigned long start, end; + + start = IOVA_PFN(region->start); +- end = IOVA_PFN(region->start + region->length); ++ end = IOVA_PFN(region->start + region->length - 1); + + WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0483-x86-mce-AMD-Give-a-name-to-MCA-bank-3-when-accessed-.patch b/meta-snowyowl/recipes-kernel/linux/files/0483-x86-mce-AMD-Give-a-name-to-MCA-bank-3-when-accessed-.patch new file mode 100644 index 00000000..24803219 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0483-x86-mce-AMD-Give-a-name-to-MCA-bank-3-when-accessed-.patch @@ -0,0 +1,54 @@ +From d521c1d4299aa6940c566aae6b5101e756612c4b Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam <yazen.ghannam@amd.com> +Date: Thu, 30 Mar 2017 13:17:14 +0200 +Subject: [PATCH 03/10] x86/mce/AMD: Give a name to MCA bank 3 when accessed + with legacy MSRs + +MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name. +However, MCA bank 3 is defined on Fam17h systems and can be accessed +using legacy MSRs. Without a name we get a stack trace on Fam17h systems +when trying to register sysfs files for bank 3 on kernels that don't +recognize Scalable MCA. + +Call MCA bank 3 "decode_unit" since this is what it represents on +Fam17h. This will allow kernels without SMCA support to see this bank on +Fam17h+ and prevent the stack trace. This will not affect older systems +since this bank is reserved on them, i.e. it'll be ignored. + +Tested on AMD Fam15h and Fam17h systems. + + WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal + kobject: (ffff88085bb256c0): attempted to be registered with empty name! + ... + Call Trace: + kobject_add_internal + kobject_add + kobject_create_and_add + threshold_create_device + threshold_init_device + +Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index 48e875d..ea553db 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -61,7 +61,7 @@ static const char * const th_names[] = { + "load_store", + "insn_fetch", + "combined_unit", +- "", ++ "decode_unit", + "northbridge", + "execution_unit", + }; +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0484-x86-mce-Convert-threshold_bank.cpus-from-atomic_t-to.patch b/meta-snowyowl/recipes-kernel/linux/files/0484-x86-mce-Convert-threshold_bank.cpus-from-atomic_t-to.patch new file mode 100644 index 00000000..e2105e53 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0484-x86-mce-Convert-threshold_bank.cpus-from-atomic_t-to.patch @@ -0,0 +1,82 @@ +From eaf03de61d40ddf14876380f644446e7921340a9 Mon Sep 17 00:00:00 2001 +From: Elena Reshetova <elena.reshetova@intel.com> +Date: Fri, 19 May 2017 11:39:13 +0200 +Subject: [PATCH 04/10] x86/mce: Convert threshold_bank.cpus from atomic_t to + refcount_t + +The refcount_t type and corresponding API should be used instead +of atomic_t when the variable is used as a reference counter. This +allows to avoid accidental refcounter overflows that might lead to +use-after-free situations. + +Suggested-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Elena Reshetova <elena.reshetova@intel.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com> +Reviewed-by: David Windsor <dwindsor@gmail.com> +Cc: Tony Luck <tony.luck@intel.com> +Cc: Yazen Ghannam <Yazen.Ghannam@amd.com> +Cc: linux-edac <linux-edac@vger.kernel.org> +Link: http://lkml.kernel.org/r/1492695536-5947-1-git-send-email-elena.reshetova@intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + arch/x86/include/asm/amd_nb.h | 3 ++- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 6 +++--- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h +index 00c88a0..da181ad 100644 +--- a/arch/x86/include/asm/amd_nb.h ++++ b/arch/x86/include/asm/amd_nb.h +@@ -3,6 +3,7 @@ + + #include <linux/ioport.h> + #include <linux/pci.h> ++#include <linux/refcount.h> + + struct amd_nb_bus_dev_range { + u8 bus; +@@ -55,7 +56,7 @@ struct threshold_bank { + struct threshold_block *blocks; + + /* initialized to the number of CPUs on the node sharing this bank */ +- atomic_t cpus; ++ refcount_t cpus; + }; + + struct amd_northbridge { +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index ea553db..b5b352c 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -1203,7 +1203,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) + goto out; + + per_cpu(threshold_banks, cpu)[bank] = b; +- atomic_inc(&b->cpus); ++ refcount_inc(&b->cpus); + + err = __threshold_add_blocks(b); + +@@ -1226,7 +1226,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) + per_cpu(threshold_banks, cpu)[bank] = b; + + if (is_shared_bank(bank)) { +- atomic_set(&b->cpus, 1); ++ refcount_set(&b->cpus, 1); + + /* nb is already initialized, see above */ + if (nb) { +@@ -1290,7 +1290,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) + goto free_out; + + if (is_shared_bank(bank)) { +- if (!atomic_dec_and_test(&b->cpus)) { ++ if (!refcount_dec_and_test(&b->cpus)) { + __threshold_remove_blocks(b); + per_cpu(threshold_banks, cpu)[bank] = NULL; + return; +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0485-x86-mce-AMD-Redo-error-logging-from-APIC-LVT-interru.patch b/meta-snowyowl/recipes-kernel/linux/files/0485-x86-mce-AMD-Redo-error-logging-from-APIC-LVT-interru.patch new file mode 100644 index 00000000..377801f5 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0485-x86-mce-AMD-Redo-error-logging-from-APIC-LVT-interru.patch @@ -0,0 +1,253 @@ +From 448e70854e4578b043f3229e463076b5406141ba Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam <yazen.ghannam@amd.com> +Date: Fri, 19 May 2017 11:39:14 +0200 +Subject: [PATCH 05/10] x86/mce/AMD: Redo error logging from APIC LVT interrupt + handlers + +We have support for the new SMCA MCA_DE{STAT,ADDR} registers in Linux. +So we've used these registers in place of MCA_{STATUS,ADDR} on SMCA +systems. + +However, the guidance for current SMCA implementations of is to continue +using MCA_{STATUS,ADDR} and to use MCA_DE{STAT,ADDR} only if a Deferred +error was not found in the former registers. If we logged a Deferred +error in MCA_STATUS then we should also clear MCA_DESTAT. This also +means we shouldn't clear MCA_CONFIG[LogDeferredInMcaStat]. + +Rework __log_error() to only log an error and add helpers for the +different error types being logged from the corresponding interrupt +handlers. + +Boris: carve out common functionality into a _log_error_bank(). Cleanup +comments, check MCi_STATUS bits before reading MSRs. Streamline flow. + +Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Tony Luck <tony.luck@intel.com> +Cc: linux-edac <linux-edac@vger.kernel.org> +Link: http://lkml.kernel.org/r/1493147772-2721-1-git-send-email-Yazen.Ghannam@amd.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 147 ++++++++++++++++++----------------- + 1 file changed, 74 insertions(+), 73 deletions(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index b5b352c..58e4cea 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -473,20 +473,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + smca_high |= BIT(0); + + /* +- * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR} +- * registers with the option of additionally logging to +- * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set. +- * +- * This bit is usually set by BIOS to retain the old behavior +- * for OSes that don't use the new registers. Linux supports the +- * new registers so let's disable that additional logging here. +- * +- * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high +- * portion of the MSR). +- */ +- smca_high &= ~BIT(2); +- +- /* + * SMCA sets the Deferred Error Interrupt type per bank. + * + * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us +@@ -756,37 +742,19 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) + } + EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); + +-static void +-__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) ++static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) + { +- u32 msr_status = msr_ops.status(bank); +- u32 msr_addr = msr_ops.addr(bank); + struct mce m; +- u64 status; +- +- WARN_ON_ONCE(deferred_err && threshold_err); +- +- if (deferred_err && mce_flags.smca) { +- msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank); +- msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank); +- } +- +- rdmsrl(msr_status, status); +- +- if (!(status & MCI_STATUS_VAL)) +- return; + + mce_setup(&m); + + m.status = status; ++ m.misc = misc; + m.bank = bank; + m.tsc = rdtsc(); + +- if (threshold_err) +- m.misc = misc; +- + if (m.status & MCI_STATUS_ADDRV) { +- rdmsrl(msr_addr, m.addr); ++ m.addr = addr; + + /* + * Extract [55:<lsb>] where lsb is the least significant +@@ -807,8 +775,6 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) + } + + mce_log(&m); +- +- wrmsrl(msr_status, 0); + } + + static inline void __smp_deferred_error_interrupt(void) +@@ -833,45 +799,85 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void) + exiting_ack_irq(); + } + +-/* APIC interrupt handler for deferred errors */ +-static void amd_deferred_error_interrupt(void) ++/* ++ * Returns true if the logged error is deferred. False, otherwise. ++ */ ++static inline bool ++_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) + { +- unsigned int bank; +- u32 msr_status; +- u64 status; ++ u64 status, addr = 0; + +- for (bank = 0; bank < mca_cfg.banks; ++bank) { +- msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank) +- : msr_ops.status(bank); ++ rdmsrl(msr_stat, status); ++ if (!(status & MCI_STATUS_VAL)) ++ return false; + +- rdmsrl(msr_status, status); ++ if (status & MCI_STATUS_ADDRV) ++ rdmsrl(msr_addr, addr); + +- if (!(status & MCI_STATUS_VAL) || +- !(status & MCI_STATUS_DEFERRED)) +- continue; ++ __log_error(bank, status, addr, misc); + +- __log_error(bank, true, false, 0); +- break; +- } ++ wrmsrl(status, 0); ++ ++ return status & MCI_STATUS_DEFERRED; + } + + /* +- * APIC Interrupt Handler ++ * We have three scenarios for checking for Deferred errors: ++ * ++ * 1) Non-SMCA systems check MCA_STATUS and log error if found. ++ * 2) SMCA systems check MCA_STATUS. If error is found then log it and also ++ * clear MCA_DESTAT. ++ * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and ++ * log it. + */ ++static void log_error_deferred(unsigned int bank) ++{ ++ bool defrd; ++ ++ defrd = _log_error_bank(bank, msr_ops.status(bank), ++ msr_ops.addr(bank), 0); ++ ++ if (!mce_flags.smca) ++ return; ++ ++ /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */ ++ if (defrd) { ++ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); ++ return; ++ } ++ ++ /* ++ * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check ++ * for a valid error. ++ */ ++ _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank), ++ MSR_AMD64_SMCA_MCx_DEADDR(bank), 0); ++} ++ ++/* APIC interrupt handler for deferred errors */ ++static void amd_deferred_error_interrupt(void) ++{ ++ unsigned int bank; ++ ++ for (bank = 0; bank < mca_cfg.banks; ++bank) ++ log_error_deferred(bank); ++} ++ ++static void log_error_thresholding(unsigned int bank, u64 misc) ++{ ++ _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc); ++} + + /* +- * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. +- * the interrupt goes off when error_count reaches threshold_limit. +- * the handler will simply log mcelog w/ software defined bank number. ++ * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt ++ * goes off when error_count reaches threshold_limit. + */ +- + static void amd_threshold_interrupt(void) + { + u32 low = 0, high = 0, address = 0; + unsigned int bank, block, cpu = smp_processor_id(); + struct thresh_restart tr; + +- /* assume first bank caused it */ + for (bank = 0; bank < mca_cfg.banks; ++bank) { + if (!(per_cpu(bank_map, cpu) & (1 << bank))) + continue; +@@ -894,23 +900,18 @@ static void amd_threshold_interrupt(void) + (high & MASK_LOCKED_HI)) + continue; + +- /* +- * Log the machine check that caused the threshold +- * event. +- */ +- if (high & MASK_OVERFLOW_HI) +- goto log; +- } +- } +- return; ++ if (!(high & MASK_OVERFLOW_HI)) ++ continue; + +-log: +- __log_error(bank, false, true, ((u64)high << 32) | low); ++ /* Log the MCE which caused the threshold event. */ ++ log_error_thresholding(bank, ((u64)high << 32) | low); + +- /* Reset threshold block after logging error. */ +- memset(&tr, 0, sizeof(tr)); +- tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block]; +- threshold_restart_bank(&tr); ++ /* Reset threshold block after logging error. */ ++ memset(&tr, 0, sizeof(tr)); ++ tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block]; ++ threshold_restart_bank(&tr); ++ } ++ } + } + + /* +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0486-x86-mce-AMD-Carve-out-SMCA-bank-configuration.patch b/meta-snowyowl/recipes-kernel/linux/files/0486-x86-mce-AMD-Carve-out-SMCA-bank-configuration.patch new file mode 100644 index 00000000..523aece8 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0486-x86-mce-AMD-Carve-out-SMCA-bank-configuration.patch @@ -0,0 +1,147 @@ +From 5514f9a6ffc52bf263afc57171037ed7ec5033fa Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam <yazen.ghannam@amd.com> +Date: Fri, 19 May 2017 11:39:15 +0200 +Subject: [PATCH 06/10] x86/mce/AMD: Carve out SMCA bank configuration + +Scalable MCA systems have a new MCA_CONFIG register that we use to +configure each bank. We currently use this when we set up thresholding. +However, this is logically separate. + +Group all SMCA-related initialization into a single function. + +Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Tony Luck <tony.luck@intel.com> +Cc: linux-edac <linux-edac@vger.kernel.org> +Link: http://lkml.kernel.org/r/1493147772-2721-2-git-send-email-Yazen.Ghannam@amd.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 76 ++++++++++++++++++------------------ + 1 file changed, 38 insertions(+), 38 deletions(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index 58e4cea..82d0c1c 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -165,17 +165,48 @@ static void default_deferred_error_interrupt(void) + } + void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; + +-static void get_smca_bank_info(unsigned int bank) ++static void smca_configure(unsigned int bank, unsigned int cpu) + { +- unsigned int i, hwid_mcatype, cpu = smp_processor_id(); ++ unsigned int i, hwid_mcatype; + struct smca_hwid *s_hwid; +- u32 high, instance_id; ++ u32 high, low; ++ u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank); ++ ++ /* Set appropriate bits in MCA_CONFIG */ ++ if (!rdmsr_safe(smca_config, &low, &high)) { ++ /* ++ * OS is required to set the MCAX bit to acknowledge that it is ++ * now using the new MSR ranges and new registers under each ++ * bank. It also means that the OS will configure deferred ++ * errors in the new MCx_CONFIG register. If the bit is not set, ++ * uncorrectable errors will cause a system panic. ++ * ++ * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.) ++ */ ++ high |= BIT(0); ++ ++ /* ++ * SMCA sets the Deferred Error Interrupt type per bank. ++ * ++ * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us ++ * if the DeferredIntType bit field is available. ++ * ++ * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the ++ * high portion of the MSR). OS should set this to 0x1 to enable ++ * APIC based interrupt. First, check that no interrupt has been ++ * set. ++ */ ++ if ((low & BIT(5)) && !((high >> 5) & 0x3)) ++ high |= BIT(5); ++ ++ wrmsr(smca_config, low, high); ++ } + + /* Collect bank_info using CPU 0 for now. */ + if (cpu) + return; + +- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instance_id, &high)) { ++ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { + pr_warn("Failed to read MCA_IPID for bank %d\n", bank); + return; + } +@@ -192,7 +223,7 @@ static void get_smca_bank_info(unsigned int bank) + smca_get_name(s_hwid->bank_type)); + + smca_banks[bank].hwid = s_hwid; +- smca_banks[bank].id = instance_id; ++ smca_banks[bank].id = low; + smca_banks[bank].sysfs_id = s_hwid->count++; + break; + } +@@ -434,7 +465,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + int offset, u32 misc_high) + { + unsigned int cpu = smp_processor_id(); +- u32 smca_low, smca_high, smca_addr; ++ u32 smca_low, smca_high; + struct threshold_block b; + int new; + +@@ -458,37 +489,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + goto set_offset; + } + +- smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); +- +- if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { +- /* +- * OS is required to set the MCAX bit to acknowledge that it is +- * now using the new MSR ranges and new registers under each +- * bank. It also means that the OS will configure deferred +- * errors in the new MCx_CONFIG register. If the bit is not set, +- * uncorrectable errors will cause a system panic. +- * +- * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.) +- */ +- smca_high |= BIT(0); +- +- /* +- * SMCA sets the Deferred Error Interrupt type per bank. +- * +- * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us +- * if the DeferredIntType bit field is available. +- * +- * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the +- * high portion of the MSR). OS should set this to 0x1 to enable +- * APIC based interrupt. First, check that no interrupt has been +- * set. +- */ +- if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3)) +- smca_high |= BIT(5); +- +- wrmsr(smca_addr, smca_low, smca_high); +- } +- + /* Gather LVT offset for thresholding: */ + if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) + goto out; +@@ -517,7 +517,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) + + for (bank = 0; bank < mca_cfg.banks; ++bank) { + if (mce_flags.smca) +- get_smca_bank_info(bank); ++ smca_configure(bank, cpu); + + for (block = 0; block < NR_BLOCKS; ++block) { + address = get_block_address(cpu, address, low, high, bank, block); +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0487-x86-mce-AMD-Use-msr_stat-when-clearing-MCA_STATUS.patch b/meta-snowyowl/recipes-kernel/linux/files/0487-x86-mce-AMD-Use-msr_stat-when-clearing-MCA_STATUS.patch new file mode 100644 index 00000000..3bc5cb8c --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0487-x86-mce-AMD-Use-msr_stat-when-clearing-MCA_STATUS.patch @@ -0,0 +1,49 @@ +From 66004d7800e0310fac27a01e4139cb0c798e3c8d Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam <yazen.ghannam@amd.com> +Date: Tue, 13 Jun 2017 18:28:28 +0200 +Subject: [PATCH 07/10] x86/mce/AMD: Use msr_stat when clearing MCA_STATUS + +The value of MCA_STATUS is used as the MSR when clearing MCA_STATUS. + +This may cause the following warning: + + unchecked MSR access error: WRMSR to 0x11b (tried to write 0x0000000000000000) + Call Trace: + <IRQ> + smp_threshold_interrupt() + threshold_interrupt() + +Use msr_stat instead which has the MSR address. + +Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Tony Luck <tony.luck@intel.com> +Cc: linux-edac <linux-edac@vger.kernel.org> +Fixes: 37d43acfd79f ("x86/mce/AMD: Redo error logging from APIC LVT interrupt handlers") +Link: http://lkml.kernel.org/r/20170613162835.30750-2-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index 82d0c1c..a4e38c4 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -816,7 +816,7 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) + + __log_error(bank, status, addr, misc); + +- wrmsrl(status, 0); ++ wrmsrl(msr_stat, 0); + + return status & MCI_STATUS_DEFERRED; + } +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch b/meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch new file mode 100644 index 00000000..396ea4a5 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch @@ -0,0 +1,143 @@ +From 6fbcc132ec02586165b39b6a324becedc1da052f Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam <yazen.ghannam@amd.com> +Date: Tue, 13 Jun 2017 18:28:29 +0200 +Subject: [PATCH 08/10] x86/mce/AMD: Use saved threshold block info in + interrupt handler + +In the amd_threshold_interrupt() handler, we loop through every possible +block in each bank and rediscover the block's address and if it's valid, +e.g. valid, counter present and not locked. + +However, we already have the address saved in the threshold blocks list +for each CPU and bank. The list only contains blocks that have passed +all the valid checks. + +Besides the redundancy, there's also a smp_call_function* in +get_block_address() which causes a warning when servicing the interrupt: + + WARNING: CPU: 0 PID: 0 at kernel/smp.c:281 smp_call_function_single+0xdd/0xf0 + ... + Call Trace: + <IRQ> + rdmsr_safe_on_cpu() + get_block_address.isra.2() + amd_threshold_interrupt() + smp_threshold_interrupt() + threshold_interrupt() + +because we do get called in an interrupt handler *with* interrupts +disabled, which can result in a deadlock. + +Drop the redundant valid checks and move the overflow check, logging and +block reset into a separate function. + +Check the first block then iterate over the rest. This procedure is +needed since the first block is used as the head of the list. + +Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Tony Luck <tony.luck@intel.com> +Cc: linux-edac <linux-edac@vger.kernel.org> +Link: http://lkml.kernel.org/r/20170613162835.30750-3-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 66 +++++++++++++++++++----------------- + 1 file changed, 35 insertions(+), 31 deletions(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index a4e38c4..188f95b 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -868,49 +868,53 @@ static void log_error_thresholding(unsigned int bank, u64 misc) + _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc); + } + ++static void log_and_reset_block(struct threshold_block *block) ++{ ++ struct thresh_restart tr; ++ u32 low = 0, high = 0; ++ ++ if (!block) ++ return; ++ ++ if (rdmsr_safe(block->address, &low, &high)) ++ return; ++ ++ if (!(high & MASK_OVERFLOW_HI)) ++ return; ++ ++ /* Log the MCE which caused the threshold event. */ ++ log_error_thresholding(block->bank, ((u64)high << 32) | low); ++ ++ /* Reset threshold block after logging error. */ ++ memset(&tr, 0, sizeof(tr)); ++ tr.b = block; ++ threshold_restart_bank(&tr); ++} ++ + /* + * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt + * goes off when error_count reaches threshold_limit. + */ + static void amd_threshold_interrupt(void) + { +- u32 low = 0, high = 0, address = 0; +- unsigned int bank, block, cpu = smp_processor_id(); +- struct thresh_restart tr; ++ struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; ++ unsigned int bank, cpu = smp_processor_id(); + + for (bank = 0; bank < mca_cfg.banks; ++bank) { + if (!(per_cpu(bank_map, cpu) & (1 << bank))) + continue; +- for (block = 0; block < NR_BLOCKS; ++block) { +- address = get_block_address(cpu, address, low, high, bank, block); +- if (!address) +- break; +- +- if (rdmsr_safe(address, &low, &high)) +- break; +- +- if (!(high & MASK_VALID_HI)) { +- if (block) +- continue; +- else +- break; +- } +- +- if (!(high & MASK_CNTP_HI) || +- (high & MASK_LOCKED_HI)) +- continue; +- +- if (!(high & MASK_OVERFLOW_HI)) +- continue; + +- /* Log the MCE which caused the threshold event. */ +- log_error_thresholding(bank, ((u64)high << 32) | low); ++ first_block = per_cpu(threshold_banks, cpu)[bank]->blocks; ++ if (!first_block) ++ continue; + +- /* Reset threshold block after logging error. */ +- memset(&tr, 0, sizeof(tr)); +- tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block]; +- threshold_restart_bank(&tr); +- } ++ /* ++ * The first block is also the head of the list. Check it first ++ * before iterating over the rest. ++ */ ++ log_and_reset_block(first_block); ++ list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj) ++ log_and_reset_block(block); + } + } + +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0489-x86-mce-AMD-Allow-any-CPU-to-initialize-the-smca_ban.patch b/meta-snowyowl/recipes-kernel/linux/files/0489-x86-mce-AMD-Allow-any-CPU-to-initialize-the-smca_ban.patch new file mode 100644 index 00000000..86ef24f9 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0489-x86-mce-AMD-Allow-any-CPU-to-initialize-the-smca_ban.patch @@ -0,0 +1,77 @@ +From 5882e0374c595e107cd3d899ed9072f3af32f578 Mon Sep 17 00:00:00 2001 +From: Yazen Ghannam <yazen.ghannam@amd.com> +Date: Mon, 24 Jul 2017 12:12:28 +0200 +Subject: [PATCH 09/10] x86/mce/AMD: Allow any CPU to initialize the smca_banks + array + +Current SMCA implementations have the same banks on each CPU with the +non-core banks only visible to a "master thread" on each die. Practically, +this means the smca_banks array, which describes the banks, only needs to +be populated once by a single master thread. + +CPU 0 seemed like a good candidate to do the populating. However, it's +possible that CPU 0 is not enabled in which case the smca_banks array won't +be populated. + +Rather than try to figure out another master thread to do the populating, +we should just allow any CPU to populate the array. + +Drop the CPU 0 check and return early if the bank was already initialized. +Also, drop the WARNing about an already initialized bank, since this will +be a common, expected occurrence. + +The smca_banks array is only populated at boot time and CPUs are brought +online sequentially. So there's no need for locking around the array. + +If the first CPU up is a master thread, then it will populate the array +with all banks, core and non-core. Every CPU afterwards will return +early. If the first CPU up is not a master thread, then it will populate +the array with all core banks. The first CPU afterwards that is a master +thread will skip populating the core banks and continue populating the +non-core banks. + +Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +Acked-by: Jack Miller <jack@codezen.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Tony Luck <tony.luck@intel.com> +Cc: linux-edac <linux-edac@vger.kernel.org> +Link: http://lkml.kernel.org/r/20170724101228.17326-4-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c +index 188f95b..e08d323 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -202,8 +202,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu) + wrmsr(smca_config, low, high); + } + +- /* Collect bank_info using CPU 0 for now. */ +- if (cpu) ++ /* Return early if this bank was already initialized. */ ++ if (smca_banks[bank].hwid) + return; + + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { +@@ -217,11 +217,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) + for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { + s_hwid = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == s_hwid->hwid_mcatype) { +- +- WARN(smca_banks[bank].hwid, +- "Bank %s already initialized!\n", +- smca_get_name(s_hwid->bank_type)); +- + smca_banks[bank].hwid = s_hwid; + smca_banks[bank].id = low; + smca_banks[bank].sysfs_id = s_hwid->count++; +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/0490-Dependency-added.patch b/meta-snowyowl/recipes-kernel/linux/files/0490-Dependency-added.patch new file mode 100644 index 00000000..41ff7843 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/linux/files/0490-Dependency-added.patch @@ -0,0 +1,122 @@ +From e3fab5d128f3000c67b450d55215fd9f0861cd2d Mon Sep 17 00:00:00 2001 +From: Sudheesh Mavila <sudheesh.mavila@amd.com> +Date: Fri, 25 May 2018 16:05:11 +0530 +Subject: [PATCH 10/10] Dependency added + +Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com> +--- + include/linux/refcount.h | 102 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 102 insertions(+) + create mode 100755 include/linux/refcount.h + +diff --git a/include/linux/refcount.h b/include/linux/refcount.h +new file mode 100755 +index 0000000..4193c41 +--- /dev/null ++++ b/include/linux/refcount.h +@@ -0,0 +1,102 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_REFCOUNT_H ++#define _LINUX_REFCOUNT_H ++ ++#include <linux/atomic.h> ++#include <linux/mutex.h> ++#include <linux/spinlock.h> ++#include <linux/kernel.h> ++ ++/** ++ * struct refcount_t - variant of atomic_t specialized for reference counts ++ * @refs: atomic_t counter field ++ * ++ * The counter saturates at UINT_MAX and will not move once ++ * there. This avoids wrapping the counter and causing 'spurious' ++ * use-after-free bugs. ++ */ ++typedef struct refcount_struct { ++ atomic_t refs; ++} refcount_t; ++ ++#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } ++ ++/** ++ * refcount_set - set a refcount's value ++ * @r: the refcount ++ * @n: value to which the refcount will be set ++ */ ++static inline void refcount_set(refcount_t *r, unsigned int n) ++{ ++ atomic_set(&r->refs, n); ++} ++ ++/** ++ * refcount_read - get a refcount's value ++ * @r: the refcount ++ * ++ * Return: the refcount's value ++ */ ++static inline unsigned int refcount_read(const refcount_t *r) ++{ ++ return atomic_read(&r->refs); ++} ++ ++#ifdef CONFIG_REFCOUNT_FULL ++extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r); ++extern void refcount_add(unsigned int i, refcount_t *r); ++ ++extern __must_check bool refcount_inc_not_zero(refcount_t *r); ++extern void refcount_inc(refcount_t *r); ++ ++extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); ++ ++extern __must_check bool refcount_dec_and_test(refcount_t *r); ++extern void refcount_dec(refcount_t *r); ++#else ++# ifdef CONFIG_ARCH_HAS_REFCOUNT ++# include <asm/refcount.h> ++# else ++static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) ++{ ++ return atomic_add_unless(&r->refs, i, 0); ++} ++ ++static inline void refcount_add(unsigned int i, refcount_t *r) ++{ ++ atomic_add(i, &r->refs); ++} ++ ++static inline __must_check bool refcount_inc_not_zero(refcount_t *r) ++{ ++ return atomic_add_unless(&r->refs, 1, 0); ++} ++ ++static inline void refcount_inc(refcount_t *r) ++{ ++ atomic_inc(&r->refs); ++} ++ ++static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) ++{ ++ return atomic_sub_and_test(i, &r->refs); ++} ++ ++static inline __must_check bool refcount_dec_and_test(refcount_t *r) ++{ ++ return atomic_dec_and_test(&r->refs); ++} ++ ++static inline void refcount_dec(refcount_t *r) ++{ ++ atomic_dec(&r->refs); ++} ++# endif /* !CONFIG_ARCH_HAS_REFCOUNT */ ++#endif /* CONFIG_REFCOUNT_FULL */ ++ ++extern __must_check bool refcount_dec_if_one(refcount_t *r); ++extern __must_check bool refcount_dec_not_one(refcount_t *r); ++extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock); ++extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock); ++ ++#endif /* _LINUX_REFCOUNT_H */ +-- +2.7.4 + diff --git a/meta-snowyowl/recipes-kernel/linux/files/snowyowl-user-patches.scc b/meta-snowyowl/recipes-kernel/linux/files/snowyowl-user-patches.scc index 9a3c1aed..75f4dd46 100755 --- a/meta-snowyowl/recipes-kernel/linux/files/snowyowl-user-patches.scc +++ b/meta-snowyowl/recipes-kernel/linux/files/snowyowl-user-patches.scc @@ -190,3 +190,37 @@ patch 0162-crypto-ccp-invoke-the-DMA-callback-in-a-standard-way.patch patch 0163-amd-xgbe-Add-pre-post-auto-negotiation-phy-hooks.patch patch 0164-amd-xgbe-Improve-KR-auto-negotiation-and-training.patch patch 0001-net-core-dev.c-fix-build-for-full-RT-kernel.patch +patch 0165-License-cleanup-add-SPDX-GPL-2.0-license-identifier-.patch +patch 0166-mm-remove-__GFP_COLD.patch +patch 0167-net-amd-xgbe-Get-rid-of-custom-hex_dump_to_buffer.patch +patch 0168-net-amd-xgbe-fix-comparison-to-bitshift-when-dealing.patch +patch 0169-amd-xgbe-Restore-PCI-interrupt-enablement-setting-on.patch +patch 0170-ethernet-Use-octal-not-symbolic-permissions.patch +patch 0171-amd-xgbe-Only-use-the-SFP-supported-transceiver-sign.patch +patch 0172-Modification-to-previous-commit-305f3ad05fec3a5f0d7b.patch +patch 0173-crypto-gcm-wait-for-crypto-op-not-signal-safe.patch +patch 0174-crypto-drbg-wait-for-crypto-op-not-signal-safe.patch +patch 0175-crypto-asymmetric_keys-handle-EBUSY-due-to-backlog-c.patch +patch 0176-crypto-Work-around-deallocated-stack-frame-reference.patch +patch 0177-crypto-drbg-Fixes-panic-in-wait_for_completion-call.patch +patch 0178-crypto-ccp-remove-unused-variable-qim.patch +patch 0179-crypto-ccp-use-ENOSPC-for-transient-busy-indication.patch +patch 0180-crypto-ccp-Build-the-AMD-secure-processor-driver-onl.patch +patch 0181-crypto-ccp-Add-Platform-Security-Processor-PSP-devic.patch +patch 0182-crypto-ccp-Define-SEV-userspace-ioctl-and-command-id.patch +patch 0183-mqueue-fix-a-use-after-free-in-sys_mq_notify.patch +patch 0184-i2c-designware-Fix-system-suspend.patch +patch 0185-iommu-dma-Don-t-reserve-PCI-I-O-windows.patch +patch 0186-iommu-amd-Fix-incorrect-error-handling-in-amd_iommu_.patch +patch 0187-iommu-amd-Fix-interrupt-remapping-when-disable-guest.patch +patch 0188-iommu-amd-Enable-ga_log_intr-when-enabling-guest_mod.patch +patch 0189-iommu-amd-Finish-TLB-flush-in-amd_iommu_unmap.patch +patch 0190-iommu-amd-Limit-the-IOVA-page-range-to-the-specified.patch +patch 0483-x86-mce-AMD-Give-a-name-to-MCA-bank-3-when-accessed-.patch +patch 0484-x86-mce-Convert-threshold_bank.cpus-from-atomic_t-to.patch +patch 0485-x86-mce-AMD-Redo-error-logging-from-APIC-LVT-interru.patch +patch 0486-x86-mce-AMD-Carve-out-SMCA-bank-configuration.patch +patch 0487-x86-mce-AMD-Use-msr_stat-when-clearing-MCA_STATUS.patch +patch 0488-x86-mce-AMD-Use-saved-threshold-block-info-in-interr.patch +patch 0489-x86-mce-AMD-Allow-any-CPU-to-initialize-the-smca_ban.patch +patch 0490-Dependency-added.patch diff --git a/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0001-Update-kvm-instrumentation-for-4.15.patch b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0001-Update-kvm-instrumentation-for-4.15.patch new file mode 100644 index 00000000..3aa12e9c --- /dev/null +++ b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0001-Update-kvm-instrumentation-for-4.15.patch @@ -0,0 +1,49 @@ +From 37ab960eef4b96785906487cbb11bdf08a4e42b8 Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +Date: Tue, 26 Dec 2017 09:47:22 -0500 +Subject: [PATCH 1/4] Update kvm instrumentation for 4.15 + +Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +--- + instrumentation/events/lttng-module/kvm.h | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/instrumentation/events/lttng-module/kvm.h b/instrumentation/events/lttng-module/kvm.h +index a8b3e9a..c01772c 100644 +--- a/instrumentation/events/lttng-module/kvm.h ++++ b/instrumentation/events/lttng-module/kvm.h +@@ -84,6 +84,22 @@ LTTNG_TRACEPOINT_EVENT(kvm_ack_irq, + { KVM_TRACE_MMIO_READ, "read" }, \ + { KVM_TRACE_MMIO_WRITE, "write" } + ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) ++ ++LTTNG_TRACEPOINT_EVENT(kvm_mmio, ++ TP_PROTO(int type, int len, u64 gpa, void *val), ++ TP_ARGS(type, len, gpa, val), ++ ++ TP_FIELDS( ++ ctf_integer(u32, type, type) ++ ctf_integer(u32, len, len) ++ ctf_integer(u64, gpa, gpa) ++ ctf_sequence_hex(unsigned char, val, val, u32, len) ++ ) ++) ++ ++#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) */ ++ + LTTNG_TRACEPOINT_EVENT(kvm_mmio, + TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_ARGS(type, len, gpa, val), +@@ -96,6 +112,8 @@ LTTNG_TRACEPOINT_EVENT(kvm_mmio, + ) + ) + ++#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) */ ++ + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) + + #define kvm_fpu_load_symbol \ +-- +2.11.1 + diff --git a/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0001-kvm.h-workaround-kernel-version-issues.patch b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0001-kvm.h-workaround-kernel-version-issues.patch new file mode 100644 index 00000000..9da99e6d --- /dev/null +++ b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0001-kvm.h-workaround-kernel-version-issues.patch @@ -0,0 +1,32 @@ +From 7bee7691eec41aa0d3697466250be7680a5e963d Mon Sep 17 00:00:00 2001 +From: Awais Belal <awais_belal@mentor.com> +Date: Thu, 14 Jun 2018 16:43:28 +0500 +Subject: [PATCH] kvm.h: workaround kernel version issues + +The kernel version for this change is different than +what we currently have for the AMD BSPs as there are +various patches backported for the kernel that make +this change otherwise irrelevant. Fix this by forcing +a known version that works. + +Signed-off-by: Awais Belal <awais_belal@mentor.com> +--- + instrumentation/events/lttng-module/kvm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/instrumentation/events/lttng-module/kvm.h b/instrumentation/events/lttng-module/kvm.h +index 698cc71..840b7de 100644 +--- a/instrumentation/events/lttng-module/kvm.h ++++ b/instrumentation/events/lttng-module/kvm.h +@@ -86,7 +86,7 @@ LTTNG_TRACEPOINT_EVENT(kvm_ack_irq, + + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0) \ + || LTTNG_KERNEL_RANGE(4,14,14, 4,15,0) \ +- || LTTNG_KERNEL_RANGE(4,9,77, 4,10,0) \ ++ || LTTNG_KERNEL_RANGE(4,7,0, 4,10,0) \ + || LTTNG_KERNEL_RANGE(4,4,112, 4,5,0) \ + || LTTNG_KERNEL_RANGE(3,16,52, 3,17,0) \ + || LTTNG_KERNEL_RANGE(3,2,97, 3,3,0)) +-- +2.11.1 + diff --git a/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0002-Fix-kvm-instrumentation-for-4.15.patch b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0002-Fix-kvm-instrumentation-for-4.15.patch new file mode 100644 index 00000000..0978ecb4 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0002-Fix-kvm-instrumentation-for-4.15.patch @@ -0,0 +1,28 @@ +From b1406be7b1b151f2b8e69b1de846ba444c71ca03 Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +Date: Wed, 27 Dec 2017 09:07:30 -0500 +Subject: [PATCH 2/4] Fix: kvm instrumentation for 4.15 + +Incorrect version range. + +Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +--- + instrumentation/events/lttng-module/kvm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/instrumentation/events/lttng-module/kvm.h b/instrumentation/events/lttng-module/kvm.h +index c01772c..ec74bdd 100644 +--- a/instrumentation/events/lttng-module/kvm.h ++++ b/instrumentation/events/lttng-module/kvm.h +@@ -84,7 +84,7 @@ LTTNG_TRACEPOINT_EVENT(kvm_ack_irq, + { KVM_TRACE_MMIO_READ, "read" }, \ + { KVM_TRACE_MMIO_WRITE, "write" } + +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) + + LTTNG_TRACEPOINT_EVENT(kvm_mmio, + TP_PROTO(int type, int len, u64 gpa, void *val), +-- +2.11.1 + diff --git a/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0003-Update-kvm-instrumentation-for-3.16.52-and-3.2.97.patch b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0003-Update-kvm-instrumentation-for-3.16.52-and-3.2.97.patch new file mode 100644 index 00000000..177cf53b --- /dev/null +++ b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0003-Update-kvm-instrumentation-for-3.16.52-and-3.2.97.patch @@ -0,0 +1,50 @@ +From 523fd15bb1100ab9a09ca776585ecb689516c1ef Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +Date: Tue, 2 Jan 2018 11:07:05 -0500 +Subject: [PATCH 3/4] Update: kvm instrumentation for 3.16.52 and 3.2.97 + +Starting from 3.16.52 and 3.2.97, the 3.16 and 3.2 stable kernel +branches backport a kvm instrumentation change introduced in 4.15 which +affects the prototype of the kvm_mmio event. + +Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +--- + instrumentation/events/lttng-module/kvm.h | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/instrumentation/events/lttng-module/kvm.h b/instrumentation/events/lttng-module/kvm.h +index ec74bdd..ea63e88 100644 +--- a/instrumentation/events/lttng-module/kvm.h ++++ b/instrumentation/events/lttng-module/kvm.h +@@ -84,7 +84,9 @@ LTTNG_TRACEPOINT_EVENT(kvm_ack_irq, + { KVM_TRACE_MMIO_READ, "read" }, \ + { KVM_TRACE_MMIO_WRITE, "write" } + +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0) \ ++ || LTTNG_KERNEL_RANGE(3,16,52, 3,17,0) \ ++ || LTTNG_KERNEL_RANGE(3,2,97, 3,3,0)) + + LTTNG_TRACEPOINT_EVENT(kvm_mmio, + TP_PROTO(int type, int len, u64 gpa, void *val), +@@ -98,7 +100,7 @@ LTTNG_TRACEPOINT_EVENT(kvm_mmio, + ) + ) + +-#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) */ ++#else + + LTTNG_TRACEPOINT_EVENT(kvm_mmio, + TP_PROTO(int type, int len, u64 gpa, u64 val), +@@ -112,7 +114,7 @@ LTTNG_TRACEPOINT_EVENT(kvm_mmio, + ) + ) + +-#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) */ ++#endif + + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) + +-- +2.11.1 + diff --git a/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0004-Update-kvm-instrumentation-for-4.14.14-4.9.77-4.4.11.patch b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0004-Update-kvm-instrumentation-for-4.14.14-4.9.77-4.4.11.patch new file mode 100644 index 00000000..0a653f52 --- /dev/null +++ b/meta-snowyowl/recipes-kernel/lttng/lttng-modules/0004-Update-kvm-instrumentation-for-4.14.14-4.9.77-4.4.11.patch @@ -0,0 +1,32 @@ +From 0a128b6bc92cbc3315d4618ed8b9da37b2367573 Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +Date: Wed, 17 Jan 2018 11:17:08 -0500 +Subject: [PATCH 4/4] Update: kvm instrumentation for 4.14.14+, 4.9.77+, + 4.4.112+ + +Starting from 3.14.14, 4.9.77, and 4.4.112, the 3.14, 4.9, and 4.4 +stable kernel branches backport a kvm instrumentation change introduced +in 4.15 which affects the prototype of the kvm_mmio event. + +Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +--- + instrumentation/events/lttng-module/kvm.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/instrumentation/events/lttng-module/kvm.h b/instrumentation/events/lttng-module/kvm.h +index ea63e88..698cc71 100644 +--- a/instrumentation/events/lttng-module/kvm.h ++++ b/instrumentation/events/lttng-module/kvm.h +@@ -85,6 +85,9 @@ LTTNG_TRACEPOINT_EVENT(kvm_ack_irq, + { KVM_TRACE_MMIO_WRITE, "write" } + + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0) \ ++ || LTTNG_KERNEL_RANGE(4,14,14, 4,15,0) \ ++ || LTTNG_KERNEL_RANGE(4,9,77, 4,10,0) \ ++ || LTTNG_KERNEL_RANGE(4,4,112, 4,5,0) \ + || LTTNG_KERNEL_RANGE(3,16,52, 3,17,0) \ + || LTTNG_KERNEL_RANGE(3,2,97, 3,3,0)) + +-- +2.11.1 + diff --git a/meta-snowyowl/recipes-kernel/lttng/lttng-modules_%.bbappend b/meta-snowyowl/recipes-kernel/lttng/lttng-modules_%.bbappend index 5a800608..cae9d002 100644 --- a/meta-snowyowl/recipes-kernel/lttng/lttng-modules_%.bbappend +++ b/meta-snowyowl/recipes-kernel/lttng/lttng-modules_%.bbappend @@ -1 +1,7 @@ +FILESEXTRAPATHS_prepend := "${THISDIR}/${PN}:" inherit kernel-openssl +SRC_URI_append_snowyowl = " file://0001-Update-kvm-instrumentation-for-4.15.patch \ + file://0002-Fix-kvm-instrumentation-for-4.15.patch \ + file://0003-Update-kvm-instrumentation-for-3.16.52-and-3.2.97.patch \ + file://0004-Update-kvm-instrumentation-for-4.14.14-4.9.77-4.4.11.patch \ + file://0001-kvm.h-workaround-kernel-version-issues.patch" |