aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch165
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch97
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch45
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch48
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch63
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch67
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch72
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch64
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch61
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch103
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch82
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch60
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch72
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch47
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch69
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch119
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch295
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch104
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch585
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch352
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch156
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch305
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch192
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch100
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch103
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch70
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch37
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch41
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch105
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch57
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch102
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch105
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch65
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc33
34 files changed, 4041 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch
new file mode 100644
index 00000000..9772c5f8
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch
@@ -0,0 +1,165 @@
+From af0e9ccc133f03f5150a7afba349a9f50897f793 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Thu, 14 Dec 2017 17:40:50 -0800
+Subject: [PATCH 01/33] KVM: Fix stack-out-of-bounds read in write_mmio
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit e39d200fa5bf5b94a0948db0dae44c1b73b84a56 upstream.
+
+Reported by syzkaller:
+
+ BUG: KASAN: stack-out-of-bounds in write_mmio+0x11e/0x270 [kvm]
+ Read of size 8 at addr ffff8803259df7f8 by task syz-executor/32298
+
+ CPU: 6 PID: 32298 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #18
+ Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016
+ Call Trace:
+ dump_stack+0xab/0xe1
+ print_address_description+0x6b/0x290
+ kasan_report+0x28a/0x370
+ write_mmio+0x11e/0x270 [kvm]
+ emulator_read_write_onepage+0x311/0x600 [kvm]
+ emulator_read_write+0xef/0x240 [kvm]
+ emulator_fix_hypercall+0x105/0x150 [kvm]
+ em_hypercall+0x2b/0x80 [kvm]
+ x86_emulate_insn+0x2b1/0x1640 [kvm]
+ x86_emulate_instruction+0x39a/0xb90 [kvm]
+ handle_exception+0x1b4/0x4d0 [kvm_intel]
+ vcpu_enter_guest+0x15a0/0x2640 [kvm]
+ kvm_arch_vcpu_ioctl_run+0x549/0x7d0 [kvm]
+ kvm_vcpu_ioctl+0x479/0x880 [kvm]
+ do_vfs_ioctl+0x142/0x9a0
+ SyS_ioctl+0x74/0x80
+ entry_SYSCALL_64_fastpath+0x23/0x9a
+
+The path of patched vmmcall will patch 3 bytes opcode 0F 01 C1(vmcall)
+to the guest memory, however, write_mmio tracepoint always prints 8 bytes
+through *(u64 *)val since kvm splits the mmio access into 8 bytes. This
+leaks 5 bytes from the kernel stack (CVE-2017-17741). This patch fixes
+it by just accessing the bytes which we operate on.
+
+Before patch:
+
+syz-executor-5567 [007] .... 51370.561696: kvm_mmio: mmio write len 3 gpa 0x10 val 0x1ffff10077c1010f
+
+After patch:
+
+syz-executor-13416 [002] .... 51302.299573: kvm_mmio: mmio write len 3 gpa 0x10 val 0xc1010f
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
+Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
+Tested-by: Marc Zyngier <marc.zyngier@arm.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Cc: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/kvm/mmio.c | 6 +++---
+ arch/x86/kvm/x86.c | 8 ++++----
+ include/trace/events/kvm.h | 7 +++++--
+ 3 files changed, 12 insertions(+), 9 deletions(-)
+
+diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
+index b6e715f..dac7ceb 100644
+--- a/arch/arm/kvm/mmio.c
++++ b/arch/arm/kvm/mmio.c
+@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+ }
+
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+- data);
++ &data);
+ data = vcpu_data_host_to_guest(vcpu, data, len);
+ vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
+ }
+@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
+ len);
+
+- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
++ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
+ kvm_mmio_write_buf(data_buf, len, data);
+
+ ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+ data_buf);
+ } else {
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
+- fault_ipa, 0);
++ fault_ipa, NULL);
+
+ ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+ data_buf);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 51a700a..9cc9117 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4242,7 +4242,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
+ addr, n, v))
+ && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
+ break;
+- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
++ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
+ handled += n;
+ addr += n;
+ len -= n;
+@@ -4495,7 +4495,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
+ {
+ if (vcpu->mmio_read_completed) {
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
++ vcpu->mmio_fragments[0].gpa, val);
+ vcpu->mmio_read_completed = 0;
+ return 1;
+ }
+@@ -4517,14 +4517,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
+
+ static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
+ {
+- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
++ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
+ return vcpu_mmio_write(vcpu, gpa, bytes, val);
+ }
+
+ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
+ void *val, int bytes)
+ {
+- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
++ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
+ return X86EMUL_IO_NEEDED;
+ }
+
+diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
+index 8ade3eb..90fce4d 100644
+--- a/include/trace/events/kvm.h
++++ b/include/trace/events/kvm.h
+@@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq,
+ { KVM_TRACE_MMIO_WRITE, "write" }
+
+ TRACE_EVENT(kvm_mmio,
+- TP_PROTO(int type, int len, u64 gpa, u64 val),
++ TP_PROTO(int type, int len, u64 gpa, void *val),
+ TP_ARGS(type, len, gpa, val),
+
+ TP_STRUCT__entry(
+@@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio,
+ __entry->type = type;
+ __entry->len = len;
+ __entry->gpa = gpa;
+- __entry->val = val;
++ __entry->val = 0;
++ if (val)
++ memcpy(&__entry->val, val,
++ min_t(u32, sizeof(__entry->val), len));
+ ),
+
+ TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch
new file mode 100644
index 00000000..406a79d3
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch
@@ -0,0 +1,97 @@
+From 1cd771013c357075c745f99419bdaf31503c5a51 Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Wed, 3 Jan 2018 14:31:38 -0800
+Subject: [PATCH 02/33] kvm: vmx: Scrub hardware GPRs at VM-exit
+
+commit 0cb5b30698fdc8f6b4646012e3acb4ddce430788 upstream.
+
+Guest GPR values are live in the hardware GPRs at VM-exit. Do not
+leave any guest values in hardware GPRs after the guest GPR values are
+saved to the vcpu_vmx structure.
+
+This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753.
+Specifically, it defeats the Project Zero PoC for CVE 2017-5715.
+
+Suggested-by: Eric Northup <digitaleric@google.com>
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: Eric Northup <digitaleric@google.com>
+Reviewed-by: Benjamin Serebrin <serebrin@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+[Paolo: Add AMD bits, Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c | 19 +++++++++++++++++++
+ arch/x86/kvm/vmx.c | 14 +++++++++++++-
+ 2 files changed, 32 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 975ea99..491f077 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -4858,6 +4858,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ "mov %%r14, %c[r14](%[svm]) \n\t"
+ "mov %%r15, %c[r15](%[svm]) \n\t"
+ #endif
++ /*
++ * Clear host registers marked as clobbered to prevent
++ * speculative use.
++ */
++ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
++ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
++ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
++ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
++ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
++#ifdef CONFIG_X86_64
++ "xor %%r8, %%r8 \n\t"
++ "xor %%r9, %%r9 \n\t"
++ "xor %%r10, %%r10 \n\t"
++ "xor %%r11, %%r11 \n\t"
++ "xor %%r12, %%r12 \n\t"
++ "xor %%r13, %%r13 \n\t"
++ "xor %%r14, %%r14 \n\t"
++ "xor %%r15, %%r15 \n\t"
++#endif
+ "pop %%" _ASM_BP
+ :
+ : [svm]"a"(svm),
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 4ead27f..91ae4e2 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8932,6 +8932,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ /* Save guest registers, load host registers, keep flags */
+ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
+ "pop %0 \n\t"
++ "setbe %c[fail](%0)\n\t"
+ "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
+ "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
+ __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
+@@ -8948,12 +8949,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ "mov %%r13, %c[r13](%0) \n\t"
+ "mov %%r14, %c[r14](%0) \n\t"
+ "mov %%r15, %c[r15](%0) \n\t"
++ "xor %%r8d, %%r8d \n\t"
++ "xor %%r9d, %%r9d \n\t"
++ "xor %%r10d, %%r10d \n\t"
++ "xor %%r11d, %%r11d \n\t"
++ "xor %%r12d, %%r12d \n\t"
++ "xor %%r13d, %%r13d \n\t"
++ "xor %%r14d, %%r14d \n\t"
++ "xor %%r15d, %%r15d \n\t"
+ #endif
+ "mov %%cr2, %%" _ASM_AX " \n\t"
+ "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+
++ "xor %%eax, %%eax \n\t"
++ "xor %%ebx, %%ebx \n\t"
++ "xor %%esi, %%esi \n\t"
++ "xor %%edi, %%edi \n\t"
+ "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
+- "setbe %c[fail](%0) \n\t"
+ ".pushsection .rodata \n\t"
+ ".global vmx_return \n\t"
+ "vmx_return: " _ASM_PTR " 2b \n\t"
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch
new file mode 100644
index 00000000..b53db2f4
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch
@@ -0,0 +1,45 @@
+From ab442dfc820b6ebdbb1c135e6fad66130d44e5a8 Mon Sep 17 00:00:00 2001
+From: Andrew Honig <ahonig@google.com>
+Date: Wed, 10 Jan 2018 10:12:03 -0800
+Subject: [PATCH 03/33] KVM: x86: Add memory barrier on vmcs field lookup
+
+commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream.
+
+This adds a memory barrier when performing a lookup into
+the vmcs_field_to_offset_table. This is related to
+CVE-2017-5753.
+
+Signed-off-by: Andrew Honig <ahonig@google.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 91ae4e2..ee766c2 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -858,8 +858,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
+ {
+ BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+
+- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+- vmcs_field_to_offset_table[field] == 0)
++ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
++ return -ENOENT;
++
++ /*
++ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
++ * generic mechanism.
++ */
++ asm("lfence");
++
++ if (vmcs_field_to_offset_table[field] == 0)
+ return -ENOENT;
+
+ return vmcs_field_to_offset_table[field];
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch
new file mode 100644
index 00000000..dd1f4c29
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch
@@ -0,0 +1,48 @@
+From ce7bea11dfe01825a2ced79b5bcc04b7e781e63b Mon Sep 17 00:00:00 2001
+From: Liran Alon <liran.alon@oracle.com>
+Date: Sun, 5 Nov 2017 16:56:33 +0200
+Subject: [PATCH 04/33] KVM: x86: emulator: Return to user-mode on L1 CPL=0
+ emulation failure
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit 1f4dcb3b213235e642088709a1c54964d23365e9 ]
+
+On this case, handle_emulation_failure() fills kvm_run with
+internal-error information which it expects to be delivered
+to user-mode for further processing.
+However, the code reports a wrong return-value which makes KVM to never
+return to user-mode on this scenario.
+
+Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to
+userspace")
+
+Signed-off-by: Liran Alon <liran.alon@oracle.com>
+Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 9cc9117..abbb37a 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5265,7 +5265,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+- r = EMULATE_FAIL;
++ r = EMULATE_USER_EXIT;
+ }
+ kvm_queue_exception(vcpu, UD_VECTOR);
+
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch
new file mode 100644
index 00000000..49770e88
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch
@@ -0,0 +1,63 @@
+From 585df9100649b5038250e1c33cf8af019a77844c Mon Sep 17 00:00:00 2001
+From: Liran Alon <liran.alon@oracle.com>
+Date: Sun, 5 Nov 2017 16:56:34 +0200
+Subject: [PATCH 05/33] KVM: x86: Don't re-execute instruction when not passing
+ CR2 value
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit 9b8ae63798cb97e785a667ff27e43fa6220cb734 ]
+
+In case of instruction-decode failure or emulation failure,
+x86_emulate_instruction() will call reexecute_instruction() which will
+attempt to use the cr2 value passed to x86_emulate_instruction().
+However, when x86_emulate_instruction() is called from
+emulate_instruction(), cr2 is not passed (passed as 0) and therefore
+it doesn't make sense to execute reexecute_instruction() logic at all.
+
+Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction")
+
+Signed-off-by: Liran Alon <liran.alon@oracle.com>
+Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h | 3 ++-
+ arch/x86/kvm/vmx.c | 2 +-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index bdde807..6f6ee68 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1113,7 +1113,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
+ int emulation_type)
+ {
+- return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
++ return x86_emulate_instruction(vcpu, 0,
++ emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
+ }
+
+ void kvm_enable_efer_bits(u64);
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index ee766c2..8e5001d 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -6232,7 +6232,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
+ if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
+ return 1;
+
+- err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
++ err = emulate_instruction(vcpu, 0);
+
+ if (err == EMULATE_USER_EXIT) {
+ ++vcpu->stat.mmio_exits;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch
new file mode 100644
index 00000000..9430b597
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch
@@ -0,0 +1,67 @@
+From 399e9dee4411858aa4eb8894f031ff68ab3b5e9f Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Sun, 5 Nov 2017 16:54:47 -0800
+Subject: [PATCH 06/33] KVM: X86: Fix operand/address-size during instruction
+ decoding
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit 3853be2603191829b442b64dac6ae8ba0c027bf9 ]
+
+Pedro reported:
+ During tests that we conducted on KVM, we noticed that executing a "PUSH %ES"
+ instruction under KVM produces different results on both memory and the SP
+ register depending on whether EPT support is enabled. With EPT the SP is
+ reduced by 4 bytes (and the written value is 0-padded) but without EPT support
+ it is only reduced by 2 bytes. The difference can be observed when the CS.DB
+ field is 1 (32-bit) but not when it's 0 (16-bit).
+
+The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D
+also should be respected instead of just default operand/address-size/66H
+prefix/67H prefix during instruction decoding. This patch fixes it by also
+adjusting operand/address-size according to CS.D.
+
+Reported-by: Pedro Fonseca <pfonseca@cs.washington.edu>
+Tested-by: Pedro Fonseca <pfonseca@cs.washington.edu>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Pedro Fonseca <pfonseca@cs.washington.edu>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 9f676ad..9984daf 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -4971,6 +4971,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
+ bool op_prefix = false;
+ bool has_seg_override = false;
+ struct opcode opcode;
++ u16 dummy;
++ struct desc_struct desc;
+
+ ctxt->memop.type = OP_NONE;
+ ctxt->memopp = NULL;
+@@ -4989,6 +4991,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
+ switch (mode) {
+ case X86EMUL_MODE_REAL:
+ case X86EMUL_MODE_VM86:
++ def_op_bytes = def_ad_bytes = 2;
++ ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
++ if (desc.d)
++ def_op_bytes = def_ad_bytes = 4;
++ break;
+ case X86EMUL_MODE_PROT16:
+ def_op_bytes = def_ad_bytes = 2;
+ break;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch
new file mode 100644
index 00000000..2ca432cf
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch
@@ -0,0 +1,72 @@
+From 34cbfb000e9bd72eb48fb3d1e61be034053f743f Mon Sep 17 00:00:00 2001
+From: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Date: Sun, 5 Nov 2017 15:52:29 +0200
+Subject: [PATCH 07/33] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC
+ reconfigure race
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 ]
+
+KVM uses ioapic_handled_vectors to track vectors that need to notify the
+IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an
+interrupt with old configuration is pending or running and
+ioapic_handled_vectors only remembers the newest configuration;
+thus EOI from the old interrupt is not delievered to the IOAPIC.
+
+A previous commit db2bdcbbbd32
+("KVM: x86: fix edge EOI and IOAPIC reconfig race")
+addressed this issue by adding pending edge-triggered interrupts to
+ioapic_handled_vectors, fixing this race for edge-triggered interrupts.
+The commit explicitly ignored level-triggered interrupts,
+but this race applies to them as well:
+
+1) IOAPIC sends a level triggered interrupt vector to VCPU0
+2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC
+ to route the vector to VCPU1. The reconfiguration rewrites only the
+ upper 32 bits of the IOREDTBLn register. (Causes KVM to update
+ ioapic_handled_vectors for VCPU0 and it no longer includes the vector.)
+3) VCPU0 sends EOI for the vector, but it's not delievered to the
+ IOAPIC because the ioapic_handled_vectors doesn't include the vector.
+4) New interrupts are not delievered to VCPU1 because remote_irr bit
+ is set forever.
+
+Therefore, the correct behavior is to add all pending and running
+interrupts to ioapic_handled_vectors.
+
+This commit introduces a slight performance hit similar to
+commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race")
+for the rare case that the vector is reused by a non-IOAPIC source on
+VCPU0. We prefer to keep solution simple and not handle this case just
+as the original commit does.
+
+Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race")
+
+Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Reviewed-by: Liran Alon <liran.alon@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/ioapic.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
+index 6e219e5..a7ac868 100644
+--- a/arch/x86/kvm/ioapic.c
++++ b/arch/x86/kvm/ioapic.c
+@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
+ index == RTC_GSI) {
+ if (kvm_apic_match_dest(vcpu, NULL, 0,
+ e->fields.dest_id, e->fields.dest_mode) ||
+- (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
+- kvm_apic_pending_eoi(vcpu, e->fields.vector)))
++ kvm_apic_pending_eoi(vcpu, e->fields.vector))
+ __set_bit(e->fields.vector,
+ ioapic_handled_vectors);
+ }
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch
new file mode 100644
index 00000000..6e097d05
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch
@@ -0,0 +1,64 @@
+From aca211b549c07b81295e817e663a61a1ae1fd659 Mon Sep 17 00:00:00 2001
+From: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Date: Sun, 5 Nov 2017 15:52:32 +0200
+Subject: [PATCH 08/33] KVM: x86: ioapic: Clear Remote IRR when entry is
+ switched to edge-triggered
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit a8bfec2930525808c01f038825d1df3904638631 ]
+
+Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for
+IOAPICs without an EOI register. They simulate the EOI message manually
+by changing the trigger mode to edge and then back to level, with the
+entry being masked during this.
+
+QEMU implements this feature in commit ed1263c363c9
+("ioapic: clear remote irr bit for edge-triggered interrupts")
+
+As a side effect, this commit removes an incorrect behavior where Remote
+IRR was cleared when the redirection table entry was rewritten. This is not
+consistent with the manual and also opens an opportunity for a strange
+behavior when a redirection table entry is modified from an interrupt
+handler that handles the same entry: The modification will clear the
+Remote IRR bit even though the interrupt handler is still running.
+
+Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Reviewed-by: Liran Alon <liran.alon@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Reviewed-by: Steve Rutherford <srutherford@google.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/ioapic.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
+index a7ac868..4b573c8 100644
+--- a/arch/x86/kvm/ioapic.c
++++ b/arch/x86/kvm/ioapic.c
+@@ -306,8 +306,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+ } else {
+ e->bits &= ~0xffffffffULL;
+ e->bits |= (u32) val;
+- e->fields.remote_irr = 0;
+ }
++
++ /*
++ * Some OSes (Linux, Xen) assume that Remote IRR bit will
++ * be cleared by IOAPIC hardware when the entry is configured
++ * as edge-triggered. This behavior is used to simulate an
++ * explicit EOI on IOAPICs that don't have the EOI register.
++ */
++ if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
++ e->fields.remote_irr = 0;
++
+ mask_after = e->fields.mask;
+ if (mask_before != mask_after)
+ kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch
new file mode 100644
index 00000000..071eccd3
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch
@@ -0,0 +1,61 @@
+From a4337b660fe26046e81471186dc393ca77371b83 Mon Sep 17 00:00:00 2001
+From: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Date: Sun, 5 Nov 2017 15:52:33 +0200
+Subject: [PATCH 09/33] KVM: x86: ioapic: Preserve read-only values in the
+ redirection table
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit b200dded0a6974a3b69599832b2203483920ab25 ]
+
+According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are
+read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb
+("ioapic: keep RO bits for IOAPIC entry").
+
+Signed-off-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Reviewed-by: Liran Alon <liran.alon@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Reviewed-by: Steve Rutherford <srutherford@google.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/ioapic.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
+index 4b573c8..5f810bb 100644
+--- a/arch/x86/kvm/ioapic.c
++++ b/arch/x86/kvm/ioapic.c
+@@ -278,6 +278,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+ {
+ unsigned index;
+ bool mask_before, mask_after;
++ int old_remote_irr, old_delivery_status;
+ union kvm_ioapic_redirect_entry *e;
+
+ switch (ioapic->ioregsel) {
+@@ -300,6 +301,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+ return;
+ e = &ioapic->redirtbl[index];
+ mask_before = e->fields.mask;
++ /* Preserve read-only fields */
++ old_remote_irr = e->fields.remote_irr;
++ old_delivery_status = e->fields.delivery_status;
+ if (ioapic->ioregsel & 1) {
+ e->bits &= 0xffffffff;
+ e->bits |= (u64) val << 32;
+@@ -307,6 +311,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+ e->bits &= ~0xffffffffULL;
+ e->bits |= (u32) val;
+ }
++ e->fields.remote_irr = old_remote_irr;
++ e->fields.delivery_status = old_delivery_status;
+
+ /*
+ * Some OSes (Linux, Xen) assume that Remote IRR bit will
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch
new file mode 100644
index 00000000..7ab25b0b
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch
@@ -0,0 +1,103 @@
+From fc18f773d54edfedf8875473d8e69753265a3dfd Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Mon, 20 Nov 2017 14:52:21 -0800
+Subject: [PATCH 10/33] KVM: VMX: Fix rflags cache during vCPU reset
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit c37c28730bb031cc8a44a130c2555c0f3efbe2d0 ]
+
+Reported by syzkaller:
+
+ *** Guest State ***
+ CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7
+ CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1
+ CR3 = 0x000000002081e000
+ RSP = 0x000000000000fffa RIP = 0x0000000000000000
+ RFLAGS=0x00023000 DR7 = 0x00000000000000
+ ^^^^^^^^^^
+ ------------[ cut here ]------------
+ WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm]
+ CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26
+ RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm]
+ RSP: 0018:ffff880291d179e0 EFLAGS: 00010202
+ Call Trace:
+ kvm_vcpu_ioctl+0x479/0x880 [kvm]
+ do_vfs_ioctl+0x142/0x9a0
+ SyS_ioctl+0x74/0x80
+ entry_SYSCALL_64_fastpath+0x23/0x9a
+
+The failed vmentry is triggered by the following beautified testcase:
+
+ #include <unistd.h>
+ #include <sys/syscall.h>
+ #include <string.h>
+ #include <stdint.h>
+ #include <linux/kvm.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+
+ long r[5];
+ int main()
+ {
+ struct kvm_debugregs dr = { 0 };
+
+ r[2] = open("/dev/kvm", O_RDONLY);
+ r[3] = ioctl(r[2], KVM_CREATE_VM, 0);
+ r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7);
+ struct kvm_guest_debug debug = {
+ .control = 0xf0403,
+ .arch = {
+ .debugreg[6] = 0x2,
+ .debugreg[7] = 0x2
+ }
+ };
+ ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug);
+ ioctl(r[4], KVM_RUN, 0);
+ }
+
+which testcase tries to setup the processor specific debug
+registers and configure vCPU for handling guest debug events through
+KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set
+rflags in order to set TF bit if single step is needed. All regs' caches
+are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU
+reset. However, the cache of rflags is not reset during vCPU reset. The
+function vmx_get_rflags() returns an unreset rflags cache value since
+the cache is marked avail, it is 0 after boot. Vmentry fails if the
+rflags reserved bit 1 is 0.
+
+This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and
+its cache to 0x2 during vCPU reset.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Tested-by: Dmitry Vyukov <dvyukov@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 8e5001d..98f6545 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -5171,7 +5171,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ }
+
+- vmcs_writel(GUEST_RFLAGS, 0x02);
++ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+ kvm_rip_write(vcpu, 0xfff0);
+
+ vmcs_writel(GUEST_GDTR_BASE, 0);
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch
new file mode 100644
index 00000000..4e1d906b
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch
@@ -0,0 +1,82 @@
+From adbb63b59bd2792df649335e7d3c28be2fbbe1c2 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 25 Jan 2018 10:58:13 +0100
+Subject: [PATCH 11/33] KVM: x86: Make indirect calls in emulator speculation
+ safe
+
+(cherry picked from commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab)
+
+Replace the indirect calls with CALL_NOSPEC.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: rga@amazon.de
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org
+[dwmw2: Use ASM_CALL_CONSTRAINT like upstream, now we have it]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 9984daf..6faac71 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -25,6 +25,7 @@
+ #include <asm/kvm_emulate.h>
+ #include <linux/stringify.h>
+ #include <asm/debugreg.h>
++#include <asm/nospec-branch.h>
+
+ #include "x86.h"
+ #include "tss.h"
+@@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
+ void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+
+ flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+- asm("push %[flags]; popf; call *%[fastop]"
+- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
++ asm("push %[flags]; popf; " CALL_NOSPEC
++ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
+ return rc;
+ }
+
+@@ -5287,15 +5288,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
+
+ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
+ {
+- register void *__sp asm(_ASM_SP);
+ ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+
+ if (!(ctxt->d & ByteOp))
+ fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+
+- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
++ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
+ : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
+- [fastop]"+S"(fop), "+r"(__sp)
++ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
+ : "c"(ctxt->src2.val));
+
+ ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch
new file mode 100644
index 00000000..ba052d9e
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0012-KVM-VMX-Make-indirect-call-speculation-safe.patch
@@ -0,0 +1,60 @@
+From 9eee1ba493f5899d7c3793818db16deaf084df21 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 25 Jan 2018 10:58:14 +0100
+Subject: [PATCH 12/33] KVM: VMX: Make indirect call speculation safe
+
+(cherry picked from commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699)
+
+Replace indirect call with CALL_NOSPEC.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: rga@amazon.de
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 98f6545..6f3ed0e 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8659,14 +8659,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+ #endif
+ "pushf\n\t"
+ __ASM_SIZE(push) " $%c[cs]\n\t"
+- "call *%[entry]\n\t"
++ CALL_NOSPEC
+ :
+ #ifdef CONFIG_X86_64
+ [sp]"=&r"(tmp),
+ #endif
+ "+r"(__sp)
+ :
+- [entry]"r"(entry),
++ THUNK_TARGET(entry),
+ [ss]"i"(__KERNEL_DS),
+ [cs]"i"(__KERNEL_CS)
+ );
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch
new file mode 100644
index 00000000..8b58f32e
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0013-x86-kvm-Update-spectre-v1-mitigation.patch
@@ -0,0 +1,72 @@
+From 7a1d0c7758b49b1f107157db33df0aae1c10cf26 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Wed, 31 Jan 2018 17:47:03 -0800
+Subject: [PATCH 13/33] x86/kvm: Update spectre-v1 mitigation
+
+(cherry picked from commit 085331dfc6bbe3501fb936e657331ca943827600)
+
+Commit 75f139aaf896 "KVM: x86: Add memory barrier on vmcs field lookup"
+added a raw 'asm("lfence");' to prevent a bounds check bypass of
+'vmcs_field_to_offset_table'.
+
+The lfence can be avoided in this path by using the array_index_nospec()
+helper designed for these types of fixes.
+
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andrew Honig <ahonig@google.com>
+Cc: kvm@vger.kernel.org
+Cc: Jim Mattson <jmattson@google.com>
+Link: https://lkml.kernel.org/r/151744959670.6342.3001723920950249067.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 20 +++++++++-----------
+ 1 file changed, 9 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 6f3ed0e..af90bc4 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -33,6 +33,7 @@
+ #include <linux/slab.h>
+ #include <linux/tboot.h>
+ #include <linux/hrtimer.h>
++#include <linux/nospec.h>
+ #include "kvm_cache_regs.h"
+ #include "x86.h"
+
+@@ -856,21 +857,18 @@ static const unsigned short vmcs_field_to_offset_table[] = {
+
+ static inline short vmcs_field_to_offset(unsigned long field)
+ {
+- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
++ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
++ unsigned short offset;
+
+- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
++ BUILD_BUG_ON(size > SHRT_MAX);
++ if (field >= size)
+ return -ENOENT;
+
+- /*
+- * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
+- * generic mechanism.
+- */
+- asm("lfence");
+-
+- if (vmcs_field_to_offset_table[field] == 0)
++ field = array_index_nospec(field, size);
++ offset = vmcs_field_to_offset_table[field];
++ if (offset == 0)
+ return -ENOENT;
+-
+- return vmcs_field_to_offset_table[field];
++ return offset;
+ }
+
+ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch
new file mode 100644
index 00000000..38a23282
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0014-KVM-nVMX-kmap-can-t-fail.patch
@@ -0,0 +1,47 @@
+From 6b359ffcb519698f93eadc2706d06805ce933086 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 25 Jan 2017 11:58:57 +0100
+Subject: [PATCH 14/33] KVM: nVMX: kmap() can't fail
+
+commit 42cf014d38d8822cce63703a467e00f65d000952 upstream.
+
+kmap() can't fail, therefore it will always return a valid pointer. Let's
+just get rid of the unnecessary checks.
+
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index af90bc4..17fcbaf 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -4742,10 +4742,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ return 0;
+
+ vapic_page = kmap(vmx->nested.virtual_apic_page);
+- if (!vapic_page) {
+- WARN_ON(1);
+- return -ENOMEM;
+- }
+ __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
+ kunmap(vmx->nested.virtual_apic_page);
+
+@@ -9562,11 +9558,6 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ return false;
+ }
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+- if (!msr_bitmap_l1) {
+- nested_release_page_clean(page);
+- WARN_ON(1);
+- return false;
+- }
+
+ memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch
new file mode 100644
index 00000000..806b1ac0
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch
@@ -0,0 +1,69 @@
+From b53c02711255aa79e4e1a9974ca24610c4fbd7d7 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 25 Jan 2017 11:58:58 +0100
+Subject: [PATCH 15/33] KVM: nVMX: vmx_complete_nested_posted_interrupt() can't
+ fail
+
+(cherry picked from commit 6342c50ad12e8ce0736e722184a7dbdea4a3477f)
+
+vmx_complete_nested_posted_interrupt() can't fail, let's turn it into
+a void function.
+
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 17fcbaf..13dc454 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -4722,7 +4722,7 @@ static bool vmx_get_enable_apicv(void)
+ return enable_apicv;
+ }
+
+-static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
++static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int max_irr;
+@@ -4733,13 +4733,13 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ vmx->nested.pi_pending) {
+ vmx->nested.pi_pending = false;
+ if (!pi_test_and_clear_on(vmx->nested.pi_desc))
+- return 0;
++ return;
+
+ max_irr = find_last_bit(
+ (unsigned long *)vmx->nested.pi_desc->pir, 256);
+
+ if (max_irr == 256)
+- return 0;
++ return;
+
+ vapic_page = kmap(vmx->nested.virtual_apic_page);
+ __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
+@@ -4752,7 +4752,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ vmcs_write16(GUEST_INTR_STATUS, status);
+ }
+ }
+- return 0;
+ }
+
+ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
+@@ -10440,7 +10439,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+ return 0;
+ }
+
+- return vmx_complete_nested_posted_interrupt(vcpu);
++ vmx_complete_nested_posted_interrupt(vcpu);
++ return 0;
+ }
+
+ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch
new file mode 100644
index 00000000..e7f44b1b
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch
@@ -0,0 +1,119 @@
+From 50fefe1aabf115927dbe944d4607d3696ed2773e Mon Sep 17 00:00:00 2001
+From: David Matlack <dmatlack@google.com>
+Date: Tue, 1 Aug 2017 14:00:40 -0700
+Subject: [PATCH 16/33] KVM: nVMX: mark vmcs12 pages dirty on L2 exit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+(cherry picked from commit c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570)
+
+The host physical addresses of L1's Virtual APIC Page and Posted
+Interrupt descriptor are loaded into the VMCS02. The CPU may write
+to these pages via their host physical address while L2 is running,
+bypassing address-translation-based dirty tracking (e.g. EPT write
+protection). Mark them dirty on every exit from L2 to prevent them
+from getting out of sync with dirty tracking.
+
+Also mark the virtual APIC page and the posted interrupt descriptor
+dirty when KVM is virtualizing posted interrupt processing.
+
+Signed-off-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 43 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 13dc454..2e88fd1 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -4722,6 +4722,28 @@ static bool vmx_get_enable_apicv(void)
+ return enable_apicv;
+ }
+
++static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
++{
++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++ gfn_t gfn;
++
++ /*
++ * Don't need to mark the APIC access page dirty; it is never
++ * written to by the CPU during APIC virtualization.
++ */
++
++ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
++ gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
++ kvm_vcpu_mark_page_dirty(vcpu, gfn);
++ }
++
++ if (nested_cpu_has_posted_intr(vmcs12)) {
++ gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
++ kvm_vcpu_mark_page_dirty(vcpu, gfn);
++ }
++}
++
++
+ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+@@ -4729,18 +4751,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ void *vapic_page;
+ u16 status;
+
+- if (vmx->nested.pi_desc &&
+- vmx->nested.pi_pending) {
+- vmx->nested.pi_pending = false;
+- if (!pi_test_and_clear_on(vmx->nested.pi_desc))
+- return;
+-
+- max_irr = find_last_bit(
+- (unsigned long *)vmx->nested.pi_desc->pir, 256);
++ if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
++ return;
+
+- if (max_irr == 256)
+- return;
++ vmx->nested.pi_pending = false;
++ if (!pi_test_and_clear_on(vmx->nested.pi_desc))
++ return;
+
++ max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
++ if (max_irr != 256) {
+ vapic_page = kmap(vmx->nested.virtual_apic_page);
+ __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
+ kunmap(vmx->nested.virtual_apic_page);
+@@ -4752,6 +4771,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ vmcs_write16(GUEST_INTR_STATUS, status);
+ }
+ }
++
++ nested_mark_vmcs12_pages_dirty(vcpu);
+ }
+
+ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
+@@ -8009,6 +8030,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
+ vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+ KVM_ISA_VMX);
+
++ /*
++ * The host physical addresses of some pages of guest memory
++ * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
++ * may write to these pages via their host physical address while
++ * L2 is running, bypassing any address-translation-based dirty
++ * tracking (e.g. EPT write protection).
++ *
++ * Mark them dirty on every exit from L2 to prevent them from
++ * getting out of sync with dirty tracking.
++ */
++ nested_mark_vmcs12_pages_dirty(vcpu);
++
+ if (vmx->nested.nested_run_pending)
+ return false;
+
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch
new file mode 100644
index 00000000..96687e49
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0017-KVM-nVMX-Eliminate-vmcs02-pool.patch
@@ -0,0 +1,295 @@
+From 8e52c41b7072930e5951b324964f31ef6991f3af Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Mon, 27 Nov 2017 17:22:25 -0600
+Subject: [PATCH 17/33] KVM: nVMX: Eliminate vmcs02 pool
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+(cherry picked from commit de3a0021a60635de96aa92713c1a31a96747d72c)
+
+The potential performance advantages of a vmcs02 pool have never been
+realized. To simplify the code, eliminate the pool. Instead, a single
+vmcs02 is allocated per VCPU when the VCPU enters VMX operation.
+
+Cc: stable@vger.kernel.org # prereq for Spectre mitigation
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
+Reviewed-by: Ameya More <ameya.more@oracle.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 146 +++++++++--------------------------------------------
+ 1 file changed, 23 insertions(+), 123 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 2e88fd1..099f221 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -174,7 +174,6 @@ module_param(ple_window_max, int, S_IRUGO);
+ extern const ulong vmx_return;
+
+ #define NR_AUTOLOAD_MSRS 8
+-#define VMCS02_POOL_SIZE 1
+
+ struct vmcs {
+ u32 revision_id;
+@@ -208,7 +207,7 @@ struct shared_msr_entry {
+ * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
+ * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
+ * More than one of these structures may exist, if L1 runs multiple L2 guests.
+- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
+ * underlying hardware which will be used to run L2.
+ * This structure is packed to ensure that its layout is identical across
+ * machines (necessary for live migration).
+@@ -387,13 +386,6 @@ struct __packed vmcs12 {
+ */
+ #define VMCS12_SIZE 0x1000
+
+-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
+-struct vmcs02_list {
+- struct list_head list;
+- gpa_t vmptr;
+- struct loaded_vmcs vmcs02;
+-};
+-
+ /*
+ * The nested_vmx structure is part of vcpu_vmx, and holds information we need
+ * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
+@@ -420,15 +412,15 @@ struct nested_vmx {
+ */
+ bool sync_shadow_vmcs;
+
+- /* vmcs02_list cache of VMCSs recently used to run L2 guests */
+- struct list_head vmcs02_pool;
+- int vmcs02_num;
+ bool change_vmcs01_virtual_x2apic_mode;
+ /* L2 must run next, and mustn't decide to exit to L1. */
+ bool nested_run_pending;
++
++ struct loaded_vmcs vmcs02;
++
+ /*
+- * Guest pages referred to in vmcs02 with host-physical pointers, so
+- * we must keep them pinned while L2 runs.
++ * Guest pages referred to in the vmcs02 with host-physical
++ * pointers, so we must keep them pinned while L2 runs.
+ */
+ struct page *apic_access_page;
+ struct page *virtual_apic_page;
+@@ -6657,94 +6649,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
+ }
+
+ /*
+- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
+- * We could reuse a single VMCS for all the L2 guests, but we also want the
+- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
+- * allows keeping them loaded on the processor, and in the future will allow
+- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
+- * every entry if they never change.
+- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
+- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
+- *
+- * The following functions allocate and free a vmcs02 in this pool.
+- */
+-
+-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
+-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
+-{
+- struct vmcs02_list *item;
+- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+- if (item->vmptr == vmx->nested.current_vmptr) {
+- list_move(&item->list, &vmx->nested.vmcs02_pool);
+- return &item->vmcs02;
+- }
+-
+- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
+- /* Recycle the least recently used VMCS. */
+- item = list_last_entry(&vmx->nested.vmcs02_pool,
+- struct vmcs02_list, list);
+- item->vmptr = vmx->nested.current_vmptr;
+- list_move(&item->list, &vmx->nested.vmcs02_pool);
+- return &item->vmcs02;
+- }
+-
+- /* Create a new VMCS */
+- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+- if (!item)
+- return NULL;
+- item->vmcs02.vmcs = alloc_vmcs();
+- item->vmcs02.shadow_vmcs = NULL;
+- if (!item->vmcs02.vmcs) {
+- kfree(item);
+- return NULL;
+- }
+- loaded_vmcs_init(&item->vmcs02);
+- item->vmptr = vmx->nested.current_vmptr;
+- list_add(&(item->list), &(vmx->nested.vmcs02_pool));
+- vmx->nested.vmcs02_num++;
+- return &item->vmcs02;
+-}
+-
+-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
+-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
+-{
+- struct vmcs02_list *item;
+- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+- if (item->vmptr == vmptr) {
+- free_loaded_vmcs(&item->vmcs02);
+- list_del(&item->list);
+- kfree(item);
+- vmx->nested.vmcs02_num--;
+- return;
+- }
+-}
+-
+-/*
+- * Free all VMCSs saved for this vcpu, except the one pointed by
+- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
+- * must be &vmx->vmcs01.
+- */
+-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
+-{
+- struct vmcs02_list *item, *n;
+-
+- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
+- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
+- /*
+- * Something will leak if the above WARN triggers. Better than
+- * a use-after-free.
+- */
+- if (vmx->loaded_vmcs == &item->vmcs02)
+- continue;
+-
+- free_loaded_vmcs(&item->vmcs02);
+- list_del(&item->list);
+- kfree(item);
+- vmx->nested.vmcs02_num--;
+- }
+-}
+-
+-/*
+ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
+ * set the success or error code of an emulated VMX instruction, as specified
+ * by Vol 2B, VMX Instruction Reference, "Conventions".
+@@ -7051,6 +6955,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ return 1;
+ }
+
++ vmx->nested.vmcs02.vmcs = alloc_vmcs();
++ vmx->nested.vmcs02.shadow_vmcs = NULL;
++ if (!vmx->nested.vmcs02.vmcs)
++ goto out_vmcs02;
++ loaded_vmcs_init(&vmx->nested.vmcs02);
++
+ if (cpu_has_vmx_msr_bitmap()) {
+ vmx->nested.msr_bitmap =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+@@ -7073,9 +6983,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ vmx->vmcs01.shadow_vmcs = shadow_vmcs;
+ }
+
+- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
+- vmx->nested.vmcs02_num = 0;
+-
+ hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_PINNED);
+ vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+@@ -7093,6 +7000,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+
+ out_msr_bitmap:
++ free_loaded_vmcs(&vmx->nested.vmcs02);
++
++out_vmcs02:
+ return -ENOMEM;
+ }
+
+@@ -7178,7 +7088,7 @@ static void free_nested(struct vcpu_vmx *vmx)
+ vmx->vmcs01.shadow_vmcs = NULL;
+ }
+ kfree(vmx->nested.cached_vmcs12);
+- /* Unpin physical memory we referred to in current vmcs02 */
++ /* Unpin physical memory we referred to in the vmcs02 */
+ if (vmx->nested.apic_access_page) {
+ nested_release_page(vmx->nested.apic_access_page);
+ vmx->nested.apic_access_page = NULL;
+@@ -7194,7 +7104,7 @@ static void free_nested(struct vcpu_vmx *vmx)
+ vmx->nested.pi_desc = NULL;
+ }
+
+- nested_free_all_saved_vmcss(vmx);
++ free_loaded_vmcs(&vmx->nested.vmcs02);
+ }
+
+ /* Emulate the VMXOFF instruction */
+@@ -7242,8 +7152,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
+ kunmap(page);
+ nested_release_page(page);
+
+- nested_free_vmcs02(vmx, vmptr);
+-
+ skip_emulated_instruction(vcpu);
+ nested_vmx_succeed(vcpu);
+ return 1;
+@@ -8032,10 +7940,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
+
+ /*
+ * The host physical addresses of some pages of guest memory
+- * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
+- * may write to these pages via their host physical address while
+- * L2 is running, bypassing any address-translation-based dirty
+- * tracking (e.g. EPT write protection).
++ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
++ * Page). The CPU may write to these pages via their host
++ * physical address while L2 is running, bypassing any
++ * address-translation-based dirty tracking (e.g. EPT write
++ * protection).
+ *
+ * Mark them dirty on every exit from L2 to prevent them from
+ * getting out of sync with dirty tracking.
+@@ -10170,7 +10079,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
+ struct vmcs12 *vmcs12;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int cpu;
+- struct loaded_vmcs *vmcs02;
+ bool ia32e;
+ u32 msr_entry_idx;
+
+@@ -10310,17 +10218,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
+ * the nested entry.
+ */
+
+- vmcs02 = nested_get_current_vmcs02(vmx);
+- if (!vmcs02)
+- return -ENOMEM;
+-
+ enter_guest_mode(vcpu);
+
+ if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+ vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+
+ cpu = get_cpu();
+- vmx->loaded_vmcs = vmcs02;
++ vmx->loaded_vmcs = &vmx->nested.vmcs02;
+ vmx_vcpu_put(vcpu);
+ vmx_vcpu_load(vcpu, cpu);
+ vcpu->cpu = cpu;
+@@ -10833,10 +10737,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+ vm_exit_controls_reset_shadow(vmx);
+ vmx_segment_cache_clear(vmx);
+
+- /* if no vmcs02 cache requested, remove the one we used */
+- if (VMCS02_POOL_SIZE == 0)
+- nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
+-
+ load_vmcs12_host_state(vcpu, vmcs12);
+
+ /* Update any VMCS fields that might have changed while L2 ran */
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch
new file mode 100644
index 00000000..a22f91a8
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch
@@ -0,0 +1,104 @@
+From 80f4f0e9de9cce1047ac0aac305aca7310e37313 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 11 Jan 2018 12:16:15 +0100
+Subject: [PATCH 18/33] KVM: VMX: introduce alloc_loaded_vmcs
+
+(cherry picked from commit f21f165ef922c2146cc5bdc620f542953c41714b)
+
+Group together the calls to alloc_vmcs and loaded_vmcs_init. Soon we'll also
+allocate an MSR bitmap there.
+
+Cc: stable@vger.kernel.org # prereq for Spectre mitigation
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 38 +++++++++++++++++++++++---------------
+ 1 file changed, 23 insertions(+), 15 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 099f221..6814355 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3514,11 +3514,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
+ return vmcs;
+ }
+
+-static struct vmcs *alloc_vmcs(void)
+-{
+- return alloc_vmcs_cpu(raw_smp_processor_id());
+-}
+-
+ static void free_vmcs(struct vmcs *vmcs)
+ {
+ free_pages((unsigned long)vmcs, vmcs_config.order);
+@@ -3537,6 +3532,22 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+ WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
+ }
+
++static struct vmcs *alloc_vmcs(void)
++{
++ return alloc_vmcs_cpu(raw_smp_processor_id());
++}
++
++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
++{
++ loaded_vmcs->vmcs = alloc_vmcs();
++ if (!loaded_vmcs->vmcs)
++ return -ENOMEM;
++
++ loaded_vmcs->shadow_vmcs = NULL;
++ loaded_vmcs_init(loaded_vmcs);
++ return 0;
++}
++
+ static void free_kvm_area(void)
+ {
+ int cpu;
+@@ -6916,6 +6927,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ struct vmcs *shadow_vmcs;
+ const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
+ | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++ int r;
+
+ /* The Intel VMX Instruction Reference lists a bunch of bits that
+ * are prerequisite to running VMXON, most notably cr4.VMXE must be
+@@ -6955,11 +6967,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ return 1;
+ }
+
+- vmx->nested.vmcs02.vmcs = alloc_vmcs();
+- vmx->nested.vmcs02.shadow_vmcs = NULL;
+- if (!vmx->nested.vmcs02.vmcs)
++ r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
++ if (r < 0)
+ goto out_vmcs02;
+- loaded_vmcs_init(&vmx->nested.vmcs02);
+
+ if (cpu_has_vmx_msr_bitmap()) {
+ vmx->nested.msr_bitmap =
+@@ -9090,17 +9100,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+ if (!vmx->guest_msrs)
+ goto free_pml;
+
+- vmx->loaded_vmcs = &vmx->vmcs01;
+- vmx->loaded_vmcs->vmcs = alloc_vmcs();
+- vmx->loaded_vmcs->shadow_vmcs = NULL;
+- if (!vmx->loaded_vmcs->vmcs)
+- goto free_msrs;
+ if (!vmm_exclusive)
+ kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
+- loaded_vmcs_init(vmx->loaded_vmcs);
++ err = alloc_loaded_vmcs(&vmx->vmcs01);
+ if (!vmm_exclusive)
+ kvm_cpu_vmxoff();
++ if (err < 0)
++ goto free_msrs;
+
++ vmx->loaded_vmcs = &vmx->vmcs01;
+ cpu = get_cpu();
+ vmx_vcpu_load(&vmx->vcpu, cpu);
+ vmx->vcpu.cpu = cpu;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch
new file mode 100644
index 00000000..0a8db555
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch
@@ -0,0 +1,585 @@
+From cc42f184dfdfed46c394274020b84a1641f24714 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 16 Jan 2018 16:51:18 +0100
+Subject: [PATCH 19/33] KVM: VMX: make MSR bitmaps per-VCPU
+
+(cherry picked from commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6)
+
+Place the MSR bitmap in struct loaded_vmcs, and update it in place
+every time the x2apic or APICv state can change. This is rare and
+the loop can handle 64 MSRs per iteration, in a similar fashion as
+nested_vmx_prepare_msr_bitmap.
+
+This prepares for choosing, on a per-VM basis, whether to intercept
+the SPEC_CTRL and PRED_CMD MSRs.
+
+Cc: stable@vger.kernel.org # prereq for Spectre mitigation
+Suggested-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 315 +++++++++++++++++++----------------------------------
+ 1 file changed, 114 insertions(+), 201 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 6814355..c6a7563 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -110,6 +110,14 @@ static u64 __read_mostly host_xss;
+ static bool __read_mostly enable_pml = 1;
+ module_param_named(pml, enable_pml, bool, S_IRUGO);
+
++#define MSR_TYPE_R 1
++#define MSR_TYPE_W 2
++#define MSR_TYPE_RW 3
++
++#define MSR_BITMAP_MODE_X2APIC 1
++#define MSR_BITMAP_MODE_X2APIC_APICV 2
++#define MSR_BITMAP_MODE_LM 4
++
+ #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
+
+ /* Guest_tsc -> host_tsc conversion requires 64-bit division. */
+@@ -191,6 +199,7 @@ struct loaded_vmcs {
+ struct vmcs *shadow_vmcs;
+ int cpu;
+ int launched;
++ unsigned long *msr_bitmap;
+ struct list_head loaded_vmcss_on_cpu_link;
+ };
+
+@@ -429,8 +438,6 @@ struct nested_vmx {
+ bool pi_pending;
+ u16 posted_intr_nv;
+
+- unsigned long *msr_bitmap;
+-
+ struct hrtimer preemption_timer;
+ bool preemption_timer_expired;
+
+@@ -531,6 +538,7 @@ struct vcpu_vmx {
+ unsigned long host_rsp;
+ u8 fail;
+ bool nmi_known_unmasked;
++ u8 msr_bitmap_mode;
+ u32 exit_intr_info;
+ u32 idt_vectoring_info;
+ ulong rflags;
+@@ -902,6 +910,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
+ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
+ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+ static int alloc_identity_pagetable(struct kvm *kvm);
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+
+ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+@@ -921,12 +930,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
+ static unsigned long *vmx_io_bitmap_a;
+ static unsigned long *vmx_io_bitmap_b;
+-static unsigned long *vmx_msr_bitmap_legacy;
+-static unsigned long *vmx_msr_bitmap_longmode;
+-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
+-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+ static unsigned long *vmx_vmread_bitmap;
+ static unsigned long *vmx_vmwrite_bitmap;
+
+@@ -2517,36 +2520,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
+ vmx->guest_msrs[from] = tmp;
+ }
+
+-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
+-{
+- unsigned long *msr_bitmap;
+-
+- if (is_guest_mode(vcpu))
+- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
+- else if (cpu_has_secondary_exec_ctrls() &&
+- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
+- if (is_long_mode(vcpu))
+- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+- else
+- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+- } else {
+- if (is_long_mode(vcpu))
+- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+- else
+- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+- }
+- } else {
+- if (is_long_mode(vcpu))
+- msr_bitmap = vmx_msr_bitmap_longmode;
+- else
+- msr_bitmap = vmx_msr_bitmap_legacy;
+- }
+-
+- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+-}
+-
+ /*
+ * Set up the vmcs to automatically save and restore system
+ * msrs. Don't touch the 64-bit msrs if the guest is in legacy
+@@ -2587,7 +2560,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
+ vmx->save_nmsrs = save_nmsrs;
+
+ if (cpu_has_vmx_msr_bitmap())
+- vmx_set_msr_bitmap(&vmx->vcpu);
++ vmx_update_msr_bitmap(&vmx->vcpu);
+ }
+
+ /*
+@@ -3529,6 +3502,8 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+ loaded_vmcs_clear(loaded_vmcs);
+ free_vmcs(loaded_vmcs->vmcs);
+ loaded_vmcs->vmcs = NULL;
++ if (loaded_vmcs->msr_bitmap)
++ free_page((unsigned long)loaded_vmcs->msr_bitmap);
+ WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
+ }
+
+@@ -3545,7 +3520,18 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+
+ loaded_vmcs->shadow_vmcs = NULL;
+ loaded_vmcs_init(loaded_vmcs);
++
++ if (cpu_has_vmx_msr_bitmap()) {
++ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
++ if (!loaded_vmcs->msr_bitmap)
++ goto out_vmcs;
++ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
++ }
+ return 0;
++
++out_vmcs:
++ free_loaded_vmcs(loaded_vmcs);
++ return -ENOMEM;
+ }
+
+ static void free_kvm_area(void)
+@@ -4548,10 +4534,8 @@ static void free_vpid(int vpid)
+ spin_unlock(&vmx_vpid_lock);
+ }
+
+-#define MSR_TYPE_R 1
+-#define MSR_TYPE_W 2
+-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+- u32 msr, int type)
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++ u32 msr, int type)
+ {
+ int f = sizeof(unsigned long);
+
+@@ -4585,8 +4569,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ }
+ }
+
+-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+- u32 msr, int type)
++static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
++ u32 msr, int type)
+ {
+ int f = sizeof(unsigned long);
+
+@@ -4620,6 +4604,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+ }
+ }
+
++static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
++ u32 msr, int type, bool value)
++{
++ if (value)
++ vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
++ else
++ vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
++}
++
+ /*
+ * If a msr is allowed by L0, we should check whether it is allowed by L1.
+ * The corresponding bit will be cleared unless both of L0 and L1 allow it.
+@@ -4666,58 +4659,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
+ }
+ }
+
+-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
+ {
+- if (!longmode_only)
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+- msr, MSR_TYPE_R | MSR_TYPE_W);
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+- msr, MSR_TYPE_R | MSR_TYPE_W);
+-}
++ u8 mode = 0;
+
+-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+-{
+- if (apicv_active) {
+- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+- msr, MSR_TYPE_R);
+- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+- msr, MSR_TYPE_R);
+- } else {
+- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+- msr, MSR_TYPE_R);
+- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+- msr, MSR_TYPE_R);
++ if (cpu_has_secondary_exec_ctrls() &&
++ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
++ mode |= MSR_BITMAP_MODE_X2APIC;
++ if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
++ mode |= MSR_BITMAP_MODE_X2APIC_APICV;
+ }
++
++ if (is_long_mode(vcpu))
++ mode |= MSR_BITMAP_MODE_LM;
++
++ return mode;
+ }
+
+-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
++
++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
++ u8 mode)
+ {
+- if (apicv_active) {
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+- msr, MSR_TYPE_R);
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+- msr, MSR_TYPE_R);
+- } else {
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+- msr, MSR_TYPE_R);
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+- msr, MSR_TYPE_R);
++ int msr;
++
++ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
++ unsigned word = msr / BITS_PER_LONG;
++ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
++ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
++ }
++
++ if (mode & MSR_BITMAP_MODE_X2APIC) {
++ /*
++ * TPR reads and writes can be virtualized even if virtual interrupt
++ * delivery is not in use.
++ */
++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
++ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
++ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
++ }
+ }
+ }
+
+-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
+ {
+- if (apicv_active) {
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+- msr, MSR_TYPE_W);
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+- msr, MSR_TYPE_W);
+- } else {
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+- msr, MSR_TYPE_W);
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+- msr, MSR_TYPE_W);
+- }
++ struct vcpu_vmx *vmx = to_vmx(vcpu);
++ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
++ u8 mode = vmx_msr_bitmap_mode(vcpu);
++ u8 changed = mode ^ vmx->msr_bitmap_mode;
++
++ if (!changed)
++ return;
++
++ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
++ !(mode & MSR_BITMAP_MODE_LM));
++
++ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
++ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
++
++ vmx->msr_bitmap_mode = mode;
+ }
+
+ static bool vmx_get_enable_apicv(void)
+@@ -4953,7 +4956,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+ }
+
+ if (cpu_has_vmx_msr_bitmap())
+- vmx_set_msr_bitmap(vcpu);
++ vmx_update_msr_bitmap(vcpu);
+ }
+
+ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
+@@ -5042,7 +5045,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+ }
+ if (cpu_has_vmx_msr_bitmap())
+- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
++ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
+
+ vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
+
+@@ -6371,7 +6374,7 @@ static void wakeup_handler(void)
+
+ static __init int hardware_setup(void)
+ {
+- int r = -ENOMEM, i, msr;
++ int r = -ENOMEM, i;
+
+ rdmsrl_safe(MSR_EFER, &host_efer);
+
+@@ -6386,41 +6389,13 @@ static __init int hardware_setup(void)
+ if (!vmx_io_bitmap_b)
+ goto out;
+
+- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+- if (!vmx_msr_bitmap_legacy)
+- goto out1;
+-
+- vmx_msr_bitmap_legacy_x2apic =
+- (unsigned long *)__get_free_page(GFP_KERNEL);
+- if (!vmx_msr_bitmap_legacy_x2apic)
+- goto out2;
+-
+- vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
+- (unsigned long *)__get_free_page(GFP_KERNEL);
+- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
+- goto out3;
+-
+- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+- if (!vmx_msr_bitmap_longmode)
+- goto out4;
+-
+- vmx_msr_bitmap_longmode_x2apic =
+- (unsigned long *)__get_free_page(GFP_KERNEL);
+- if (!vmx_msr_bitmap_longmode_x2apic)
+- goto out5;
+-
+- vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
+- (unsigned long *)__get_free_page(GFP_KERNEL);
+- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
+- goto out6;
+-
+ vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_vmread_bitmap)
+- goto out7;
++ goto out1;
+
+ vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_vmwrite_bitmap)
+- goto out8;
++ goto out2;
+
+ memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+ memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+@@ -6434,12 +6409,9 @@ static __init int hardware_setup(void)
+
+ memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+-
+ if (setup_vmcs_config(&vmcs_config) < 0) {
+ r = -EIO;
+- goto out9;
++ goto out3;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_NX))
+@@ -6494,48 +6466,8 @@ static __init int hardware_setup(void)
+ kvm_tsc_scaling_ratio_frac_bits = 48;
+ }
+
+- vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+- vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+-
+- memcpy(vmx_msr_bitmap_legacy_x2apic,
+- vmx_msr_bitmap_legacy, PAGE_SIZE);
+- memcpy(vmx_msr_bitmap_longmode_x2apic,
+- vmx_msr_bitmap_longmode, PAGE_SIZE);
+- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+- vmx_msr_bitmap_legacy, PAGE_SIZE);
+- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+- vmx_msr_bitmap_longmode, PAGE_SIZE);
+-
+ set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+
+- /*
+- * enable_apicv && kvm_vcpu_apicv_active()
+- */
+- for (msr = 0x800; msr <= 0x8ff; msr++)
+- vmx_disable_intercept_msr_read_x2apic(msr, true);
+-
+- /* TMCCT */
+- vmx_enable_intercept_msr_read_x2apic(0x839, true);
+- /* TPR */
+- vmx_disable_intercept_msr_write_x2apic(0x808, true);
+- /* EOI */
+- vmx_disable_intercept_msr_write_x2apic(0x80b, true);
+- /* SELF-IPI */
+- vmx_disable_intercept_msr_write_x2apic(0x83f, true);
+-
+- /*
+- * (enable_apicv && !kvm_vcpu_apicv_active()) ||
+- * !enable_apicv
+- */
+- /* TPR */
+- vmx_disable_intercept_msr_read_x2apic(0x808, false);
+- vmx_disable_intercept_msr_write_x2apic(0x808, false);
+-
+ if (enable_ept) {
+ kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
+ (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+@@ -6581,22 +6513,10 @@ static __init int hardware_setup(void)
+
+ return alloc_kvm_area();
+
+-out9:
+- free_page((unsigned long)vmx_vmwrite_bitmap);
+-out8:
+- free_page((unsigned long)vmx_vmread_bitmap);
+-out7:
+- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
+-out6:
+- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+-out5:
+- free_page((unsigned long)vmx_msr_bitmap_longmode);
+-out4:
+- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
+ out3:
+- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
++ free_page((unsigned long)vmx_vmwrite_bitmap);
+ out2:
+- free_page((unsigned long)vmx_msr_bitmap_legacy);
++ free_page((unsigned long)vmx_vmread_bitmap);
+ out1:
+ free_page((unsigned long)vmx_io_bitmap_b);
+ out:
+@@ -6607,12 +6527,6 @@ static __init int hardware_setup(void)
+
+ static __exit void hardware_unsetup(void)
+ {
+- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
+- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
+- free_page((unsigned long)vmx_msr_bitmap_legacy);
+- free_page((unsigned long)vmx_msr_bitmap_longmode);
+ free_page((unsigned long)vmx_io_bitmap_b);
+ free_page((unsigned long)vmx_io_bitmap_a);
+ free_page((unsigned long)vmx_vmwrite_bitmap);
+@@ -6971,13 +6885,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ if (r < 0)
+ goto out_vmcs02;
+
+- if (cpu_has_vmx_msr_bitmap()) {
+- vmx->nested.msr_bitmap =
+- (unsigned long *)__get_free_page(GFP_KERNEL);
+- if (!vmx->nested.msr_bitmap)
+- goto out_msr_bitmap;
+- }
+-
+ vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ if (!vmx->nested.cached_vmcs12)
+ goto out_cached_vmcs12;
+@@ -7007,9 +6914,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ kfree(vmx->nested.cached_vmcs12);
+
+ out_cached_vmcs12:
+- free_page((unsigned long)vmx->nested.msr_bitmap);
+-
+-out_msr_bitmap:
+ free_loaded_vmcs(&vmx->nested.vmcs02);
+
+ out_vmcs02:
+@@ -7088,10 +6992,6 @@ static void free_nested(struct vcpu_vmx *vmx)
+ vmx->nested.vmxon = false;
+ free_vpid(vmx->nested.vpid02);
+ nested_release_vmcs12(vmx);
+- if (vmx->nested.msr_bitmap) {
+- free_page((unsigned long)vmx->nested.msr_bitmap);
+- vmx->nested.msr_bitmap = NULL;
+- }
+ if (enable_shadow_vmcs) {
+ vmcs_clear(vmx->vmcs01.shadow_vmcs);
+ free_vmcs(vmx->vmcs01.shadow_vmcs);
+@@ -8450,7 +8350,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ }
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+
+- vmx_set_msr_bitmap(vcpu);
++ vmx_update_msr_bitmap(vcpu);
+ }
+
+ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+@@ -9068,6 +8968,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+ {
+ int err;
+ struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
++ unsigned long *msr_bitmap;
+ int cpu;
+
+ if (!vmx)
+@@ -9108,6 +9009,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+ if (err < 0)
+ goto free_msrs;
+
++ msr_bitmap = vmx->vmcs01.msr_bitmap;
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
++ vmx->msr_bitmap_mode = 0;
++
+ vmx->loaded_vmcs = &vmx->vmcs01;
+ cpu = get_cpu();
+ vmx_vcpu_load(&vmx->vcpu, cpu);
+@@ -9495,7 +9405,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ int msr;
+ struct page *page;
+ unsigned long *msr_bitmap_l1;
+- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
++ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+
+ /* This shortcut is ok because we support only x2APIC MSRs so far. */
+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
+@@ -10007,6 +9917,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
+
++ if (cpu_has_vmx_msr_bitmap())
++ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
++
+ if (enable_vpid) {
+ /*
+ * There is no direct mapping between vpid02 and vpid12, the
+@@ -10694,7 +10607,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+ vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+ if (cpu_has_vmx_msr_bitmap())
+- vmx_set_msr_bitmap(vcpu);
++ vmx_update_msr_bitmap(vcpu);
+
+ if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
+ vmcs12->vm_exit_msr_load_count))
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch
new file mode 100644
index 00000000..731a182a
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-KVM-x86-Add-IBPB-support.patch
@@ -0,0 +1,352 @@
+From b70d7889c078c97d11ae6412760f3231fda324cd Mon Sep 17 00:00:00 2001
+From: Ashok Raj <ashok.raj@intel.com>
+Date: Thu, 1 Feb 2018 22:59:43 +0100
+Subject: [PATCH 20/33] KVM/x86: Add IBPB support
+
+(cherry picked from commit 15d45071523d89b3fb7372e2135fbd72f6af9506)
+
+The Indirect Branch Predictor Barrier (IBPB) is an indirect branch
+control mechanism. It keeps earlier branches from influencing
+later ones.
+
+Unlike IBRS and STIBP, IBPB does not define a new mode of operation.
+It's a command that ensures predicted branch targets aren't used after
+the barrier. Although IBRS and IBPB are enumerated by the same CPUID
+enumeration, IBPB is very different.
+
+IBPB helps mitigate against three potential attacks:
+
+* Mitigate guests from being attacked by other guests.
+ - This is addressed by issing IBPB when we do a guest switch.
+
+* Mitigate attacks from guest/ring3->host/ring3.
+ These would require a IBPB during context switch in host, or after
+ VMEXIT. The host process has two ways to mitigate
+ - Either it can be compiled with retpoline
+ - If its going through context switch, and has set !dumpable then
+ there is a IBPB in that path.
+ (Tim's patch: https://patchwork.kernel.org/patch/10192871)
+ - The case where after a VMEXIT you return back to Qemu might make
+ Qemu attackable from guest when Qemu isn't compiled with retpoline.
+ There are issues reported when doing IBPB on every VMEXIT that resulted
+ in some tsc calibration woes in guest.
+
+* Mitigate guest/ring0->host/ring0 attacks.
+ When host kernel is using retpoline it is safe against these attacks.
+ If host kernel isn't using retpoline we might need to do a IBPB flush on
+ every VMEXIT.
+
+Even when using retpoline for indirect calls, in certain conditions 'ret'
+can use the BTB on Skylake-era CPUs. There are other mitigations
+available like RSB stuffing/clearing.
+
+* IBPB is issued only for SVM during svm_free_vcpu().
+ VMX has a vmclear and SVM doesn't. Follow discussion here:
+ https://lkml.org/lkml/2018/1/15/146
+
+Please refer to the following spec for more details on the enumeration
+and control.
+
+Refer here to get documentation about mitigations.
+
+https://software.intel.com/en-us/side-channel-security-support
+
+[peterz: rebase and changelog rewrite]
+[karahmed: - rebase
+ - vmx: expose PRED_CMD if guest has it in CPUID
+ - svm: only pass through IBPB if guest has it in CPUID
+ - vmx: support !cpu_has_vmx_msr_bitmap()]
+ - vmx: support nested]
+[dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS)
+ PRED_CMD is a write-only MSR]
+
+Signed-off-by: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com
+Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c | 11 +++++++-
+ arch/x86/kvm/cpuid.h | 12 ++++++++
+ arch/x86/kvm/svm.c | 28 +++++++++++++++++++
+ arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 4 files changed, 127 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index afa7bbb..42323be 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
+ 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+
++ /* cpuid 0x80000008.ebx */
++ const u32 kvm_cpuid_8000_0008_ebx_x86_features =
++ F(IBPB);
++
+ /* cpuid 0xC0000001.edx */
+ const u32 kvm_cpuid_C000_0001_edx_x86_features =
+ F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+@@ -607,7 +611,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ if (!g_phys_as)
+ g_phys_as = phys_as;
+ entry->eax = g_phys_as | (virt_as << 8);
+- entry->ebx = entry->edx = 0;
++ entry->edx = 0;
++ /* IBPB isn't necessarily present in hardware cpuid */
++ if (boot_cpu_has(X86_FEATURE_IBPB))
++ entry->ebx |= F(IBPB);
++ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
++ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ break;
+ }
+ case 0x80000019:
+diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
+index 35058c2..f4a2a1a 100644
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -152,6 +152,18 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
+ return best && (best->edx & bit(X86_FEATURE_RDTSCP));
+ }
+
++static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
++{
++ struct kvm_cpuid_entry2 *best;
++
++ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
++ if (best && (best->ebx & bit(X86_FEATURE_IBPB)))
++ return true;
++ best = kvm_find_cpuid_entry(vcpu, 7, 0);
++ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
++}
++
++
+ /*
+ * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
+ */
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 491f077..43e45b9 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -248,6 +248,7 @@ static const struct svm_direct_access_msrs {
+ { .index = MSR_CSTAR, .always = true },
+ { .index = MSR_SYSCALL_MASK, .always = true },
+ #endif
++ { .index = MSR_IA32_PRED_CMD, .always = false },
+ { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
+ { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
+ { .index = MSR_IA32_LASTINTFROMIP, .always = false },
+@@ -510,6 +511,7 @@ struct svm_cpu_data {
+ struct kvm_ldttss_desc *tss_desc;
+
+ struct page *save_area;
++ struct vmcb *current_vmcb;
+ };
+
+ static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+@@ -1641,11 +1643,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+ __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, svm);
++ /*
++ * The vmcb page can be recycled, causing a false negative in
++ * svm_vcpu_load(). So do a full IBPB now.
++ */
++ indirect_branch_prediction_barrier();
+ }
+
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ int i;
+
+ if (unlikely(cpu != vcpu->cpu)) {
+@@ -1674,6 +1682,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ if (static_cpu_has(X86_FEATURE_RDTSCP))
+ wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+
++ if (sd->current_vmcb != svm->vmcb) {
++ sd->current_vmcb = svm->vmcb;
++ indirect_branch_prediction_barrier();
++ }
+ avic_vcpu_load(vcpu, cpu);
+ }
+
+@@ -3587,6 +3599,22 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, msr);
+ break;
++ case MSR_IA32_PRED_CMD:
++ if (!msr->host_initiated &&
++ !guest_cpuid_has_ibpb(vcpu))
++ return 1;
++
++ if (data & ~PRED_CMD_IBPB)
++ return 1;
++
++ if (!data)
++ break;
++
++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++ if (is_guest_mode(vcpu))
++ break;
++ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
++ break;
+ case MSR_STAR:
+ svm->vmcb->save.star = data;
+ break;
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index c6a7563..855df75 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -550,6 +550,7 @@ struct vcpu_vmx {
+ u64 msr_host_kernel_gs_base;
+ u64 msr_guest_kernel_gs_base;
+ #endif
++
+ u32 vm_entry_controls_shadow;
+ u32 vm_exit_controls_shadow;
+ /*
+@@ -911,6 +912,8 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
+ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+ static int alloc_identity_pagetable(struct kvm *kvm);
+ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++ u32 msr, int type);
+
+ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+@@ -1841,6 +1844,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
+ vmcs_write32(EXCEPTION_BITMAP, eb);
+ }
+
++/*
++ * Check if MSR is intercepted for L01 MSR bitmap.
++ */
++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
++{
++ unsigned long *msr_bitmap;
++ int f = sizeof(unsigned long);
++
++ if (!cpu_has_vmx_msr_bitmap())
++ return true;
++
++ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
++
++ if (msr <= 0x1fff) {
++ return !!test_bit(msr, msr_bitmap + 0x800 / f);
++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++ msr &= 0x1fff;
++ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++ }
++
++ return true;
++}
++
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+ unsigned long entry, unsigned long exit)
+ {
+@@ -2252,6 +2278,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+ per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+ vmcs_load(vmx->loaded_vmcs->vmcs);
++ indirect_branch_prediction_barrier();
+ }
+
+ if (!already_loaded) {
+@@ -3048,6 +3075,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, msr_info);
+ break;
++ case MSR_IA32_PRED_CMD:
++ if (!msr_info->host_initiated &&
++ !guest_cpuid_has_ibpb(vcpu))
++ return 1;
++
++ if (data & ~PRED_CMD_IBPB)
++ return 1;
++
++ if (!data)
++ break;
++
++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++
++ /*
++ * For non-nested:
++ * When it's written (to non-zero) for the first time, pass
++ * it through.
++ *
++ * For nested:
++ * The handling of the MSR bitmap for L2 guests is done in
++ * nested_vmx_merge_msr_bitmap. We should not touch the
++ * vmcs02.msr_bitmap here since it gets completely overwritten
++ * in the merging.
++ */
++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
++ MSR_TYPE_W);
++ break;
+ case MSR_IA32_CR_PAT:
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+@@ -9406,9 +9460,23 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct page *page;
+ unsigned long *msr_bitmap_l1;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
++ /*
++ * pred_cmd is trying to verify two things:
++ *
++ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
++ * ensures that we do not accidentally generate an L02 MSR bitmap
++ * from the L12 MSR bitmap that is too permissive.
++ * 2. That L1 or L2s have actually used the MSR. This avoids
++ * unnecessarily merging of the bitmap if the MSR is unused. This
++ * works properly because we only update the L01 MSR bitmap lazily.
++ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
++ * updated to reflect this when L1 (or its L2s) actually write to
++ * the MSR.
++ */
++ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+
+- /* This shortcut is ok because we support only x2APIC MSRs so far. */
+- if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
++ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
++ !pred_cmd)
+ return false;
+
+ page = nested_get_page(vcpu, vmcs12->msr_bitmap);
+@@ -9443,6 +9511,13 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ MSR_TYPE_W);
+ }
+ }
++
++ if (pred_cmd)
++ nested_vmx_disable_intercept_for_msr(
++ msr_bitmap_l1, msr_bitmap_l0,
++ MSR_IA32_PRED_CMD,
++ MSR_TYPE_W);
++
+ kunmap(page);
+ nested_release_page_clean(page);
+
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch
new file mode 100644
index 00000000..538a1137
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch
@@ -0,0 +1,156 @@
+From dc7636423649302a329856f238df8820b9c7dc28 Mon Sep 17 00:00:00 2001
+From: KarimAllah Ahmed <karahmed@amazon.de>
+Date: Thu, 1 Feb 2018 22:59:44 +0100
+Subject: [PATCH 21/33] KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES
+
+(cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd)
+
+Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO
+(bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the
+contents will come directly from the hardware, but user-space can still
+override it.
+
+[dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional]
+
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c | 8 +++++++-
+ arch/x86/kvm/cpuid.h | 8 ++++++++
+ arch/x86/kvm/vmx.c | 15 +++++++++++++++
+ arch/x86/kvm/x86.c | 1 +
+ 4 files changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 42323be..4d3555b 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ /* cpuid 7.0.ecx*/
+ const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+
++ /* cpuid 7.0.edx*/
++ const u32 kvm_cpuid_7_0_edx_x86_features =
++ F(ARCH_CAPABILITIES);
++
+ /* all calls to cpuid_count() should be made on the same cpu */
+ get_cpu();
+
+@@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ /* PKU is not yet implemented for shadow paging. */
+ if (!tdp_enabled)
+ entry->ecx &= ~F(PKU);
++ entry->edx &= kvm_cpuid_7_0_edx_x86_features;
++ cpuid_mask(&entry->edx, CPUID_7_EDX);
+ } else {
+ entry->ebx = 0;
+ entry->ecx = 0;
++ entry->edx = 0;
+ }
+ entry->eax = 0;
+- entry->edx = 0;
+ break;
+ }
+ case 9:
+diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
+index f4a2a1a..a69906c 100644
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -163,6 +163,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+ }
+
++static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
++{
++ struct kvm_cpuid_entry2 *best;
++
++ best = kvm_find_cpuid_entry(vcpu, 7, 0);
++ return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
++}
++
+
+ /*
+ * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 855df75..d8e3c02 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -551,6 +551,8 @@ struct vcpu_vmx {
+ u64 msr_guest_kernel_gs_base;
+ #endif
+
++ u64 arch_capabilities;
++
+ u32 vm_entry_controls_shadow;
+ u32 vm_exit_controls_shadow;
+ /*
+@@ -2976,6 +2978,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_IA32_TSC:
+ msr_info->data = guest_read_tsc(vcpu);
+ break;
++ case MSR_IA32_ARCH_CAPABILITIES:
++ if (!msr_info->host_initiated &&
++ !guest_cpuid_has_arch_capabilities(vcpu))
++ return 1;
++ msr_info->data = to_vmx(vcpu)->arch_capabilities;
++ break;
+ case MSR_IA32_SYSENTER_CS:
+ msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
+ break;
+@@ -3102,6 +3110,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
+ MSR_TYPE_W);
+ break;
++ case MSR_IA32_ARCH_CAPABILITIES:
++ if (!msr_info->host_initiated)
++ return 1;
++ vmx->arch_capabilities = data;
++ break;
+ case MSR_IA32_CR_PAT:
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+@@ -5173,6 +5186,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ ++vmx->nmsrs;
+ }
+
++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+
+ vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index abbb37a..d01742e 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -975,6 +975,7 @@ static u32 msrs_to_save[] = {
+ #endif
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
++ MSR_IA32_ARCH_CAPABILITIES
+ };
+
+ static unsigned num_msrs_to_save;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch
new file mode 100644
index 00000000..9a833616
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch
@@ -0,0 +1,305 @@
+From 3a5351279f63e7822bbfe5c0f4ee3d5a1a5bced1 Mon Sep 17 00:00:00 2001
+From: KarimAllah Ahmed <karahmed@amazon.de>
+Date: Thu, 1 Feb 2018 22:59:45 +0100
+Subject: [PATCH 22/33] KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL
+
+(cherry picked from commit d28b387fb74da95d69d2615732f50cceb38e9a4d)
+
+[ Based on a patch from Ashok Raj <ashok.raj@intel.com> ]
+
+Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
+guests that will only mitigate Spectre V2 through IBRS+IBPB and will not
+be using a retpoline+IBPB based approach.
+
+To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for
+guests that do not actually use the MSR, only start saving and restoring
+when a non-zero is written to it.
+
+No attempt is made to handle STIBP here, intentionally. Filtering STIBP
+may be added in a future patch, which may require trapping all writes
+if we don't want to pass it through directly to the guest.
+
+[dwmw2: Clean up CPUID bits, save/restore manually, handle reset]
+
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c | 8 ++--
+ arch/x86/kvm/cpuid.h | 11 ++++++
+ arch/x86/kvm/vmx.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++-
+ arch/x86/kvm/x86.c | 2 +-
+ 4 files changed, 118 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 4d3555b..bcebe84 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+
+ /* cpuid 0x80000008.ebx */
+ const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+- F(IBPB);
++ F(IBPB) | F(IBRS);
+
+ /* cpuid 0xC0000001.edx */
+ const u32 kvm_cpuid_C000_0001_edx_x86_features =
+@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+- F(ARCH_CAPABILITIES);
++ F(SPEC_CTRL) | F(ARCH_CAPABILITIES);
+
+ /* all calls to cpuid_count() should be made on the same cpu */
+ get_cpu();
+@@ -618,9 +618,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ g_phys_as = phys_as;
+ entry->eax = g_phys_as | (virt_as << 8);
+ entry->edx = 0;
+- /* IBPB isn't necessarily present in hardware cpuid */
++ /* IBRS and IBPB aren't necessarily present in hardware cpuid */
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ entry->ebx |= F(IBPB);
++ if (boot_cpu_has(X86_FEATURE_IBRS))
++ entry->ebx |= F(IBRS);
+ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ break;
+diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
+index a69906c..841e80d 100644
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -163,6 +163,17 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+ }
+
++static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu)
++{
++ struct kvm_cpuid_entry2 *best;
++
++ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
++ if (best && (best->ebx & bit(X86_FEATURE_IBRS)))
++ return true;
++ best = kvm_find_cpuid_entry(vcpu, 7, 0);
++ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
++}
++
+ static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
+ {
+ struct kvm_cpuid_entry2 *best;
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index d8e3c02..c564d03 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -552,6 +552,7 @@ struct vcpu_vmx {
+ #endif
+
+ u64 arch_capabilities;
++ u64 spec_ctrl;
+
+ u32 vm_entry_controls_shadow;
+ u32 vm_exit_controls_shadow;
+@@ -1847,6 +1848,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
+ }
+
+ /*
++ * Check if MSR is intercepted for currently loaded MSR bitmap.
++ */
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
++{
++ unsigned long *msr_bitmap;
++ int f = sizeof(unsigned long);
++
++ if (!cpu_has_vmx_msr_bitmap())
++ return true;
++
++ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
++
++ if (msr <= 0x1fff) {
++ return !!test_bit(msr, msr_bitmap + 0x800 / f);
++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++ msr &= 0x1fff;
++ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++ }
++
++ return true;
++}
++
++/*
+ * Check if MSR is intercepted for L01 MSR bitmap.
+ */
+ static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+@@ -2978,6 +3002,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_IA32_TSC:
+ msr_info->data = guest_read_tsc(vcpu);
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ if (!msr_info->host_initiated &&
++ !guest_cpuid_has_ibrs(vcpu))
++ return 1;
++
++ msr_info->data = to_vmx(vcpu)->spec_ctrl;
++ break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_arch_capabilities(vcpu))
+@@ -3083,6 +3114,36 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, msr_info);
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ if (!msr_info->host_initiated &&
++ !guest_cpuid_has_ibrs(vcpu))
++ return 1;
++
++ /* The STIBP bit doesn't fault even if it's not advertised */
++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++ return 1;
++
++ vmx->spec_ctrl = data;
++
++ if (!data)
++ break;
++
++ /*
++ * For non-nested:
++ * When it's written (to non-zero) for the first time, pass
++ * it through.
++ *
++ * For nested:
++ * The handling of the MSR bitmap for L2 guests is done in
++ * nested_vmx_merge_msr_bitmap. We should not touch the
++ * vmcs02.msr_bitmap here since it gets completely overwritten
++ * in the merging. We update the vmcs01 here for L1 as well
++ * since it will end up touching the MSR anyway now.
++ */
++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
++ MSR_IA32_SPEC_CTRL,
++ MSR_TYPE_RW);
++ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibpb(vcpu))
+@@ -5216,6 +5277,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ u64 cr0;
+
+ vmx->rmode.vm86_active = 0;
++ vmx->spec_ctrl = 0;
+
+ vmx->soft_vnmi_blocked = 0;
+
+@@ -8806,6 +8868,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+
+ vmx_arm_hv_timer(vcpu);
+
++ /*
++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++ * it's non-zero. Since vmentry is serialising on affected CPUs, there
++ * is no need to worry about the conditional branch over the wrmsr
++ * being speculatively taken.
++ */
++ if (vmx->spec_ctrl)
++ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++
+ vmx->__launched = vmx->loaded_vmcs->launched;
+ asm(
+ /* Store host registers */
+@@ -8924,6 +8995,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ );
+
++ /*
++ * We do not use IBRS in the kernel. If this vCPU has used the
++ * SPEC_CTRL MSR it may have left it on; save the value and
++ * turn it off. This is much more efficient than blindly adding
++ * it to the atomic save/restore list. Especially as the former
++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++ *
++ * For non-nested case:
++ * If the L01 MSR bitmap does not intercept the MSR, then we need to
++ * save it.
++ *
++ * For nested case:
++ * If the L02 MSR bitmap does not intercept the MSR, then we need to
++ * save it.
++ */
++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++
++ if (vmx->spec_ctrl)
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
+@@ -9476,7 +9568,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ unsigned long *msr_bitmap_l1;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+ /*
+- * pred_cmd is trying to verify two things:
++ * pred_cmd & spec_ctrl are trying to verify two things:
+ *
+ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+ * ensures that we do not accidentally generate an L02 MSR bitmap
+@@ -9489,9 +9581,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ * the MSR.
+ */
+ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
++ bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+
+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
+- !pred_cmd)
++ !pred_cmd && !spec_ctrl)
+ return false;
+
+ page = nested_get_page(vcpu, vmcs12->msr_bitmap);
+@@ -9527,6 +9620,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
+ }
+ }
+
++ if (spec_ctrl)
++ nested_vmx_disable_intercept_for_msr(
++ msr_bitmap_l1, msr_bitmap_l0,
++ MSR_IA32_SPEC_CTRL,
++ MSR_TYPE_R | MSR_TYPE_W);
++
+ if (pred_cmd)
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index d01742e..d2ea523 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -975,7 +975,7 @@ static u32 msrs_to_save[] = {
+ #endif
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+- MSR_IA32_ARCH_CAPABILITIES
++ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
+ };
+
+ static unsigned num_msrs_to_save;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch
new file mode 100644
index 00000000..905134c7
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch
@@ -0,0 +1,192 @@
+From c8b2b4bc3e5eddb48f6eda57e9138a2ea2d39345 Mon Sep 17 00:00:00 2001
+From: KarimAllah Ahmed <karahmed@amazon.de>
+Date: Sat, 3 Feb 2018 15:56:23 +0100
+Subject: [PATCH 23/33] KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL
+
+(cherry picked from commit b2ac58f90540e39324e7a29a7ad471407ae0bf48)
+
+[ Based on a patch from Paolo Bonzini <pbonzini@redhat.com> ]
+
+... basically doing exactly what we do for VMX:
+
+- Passthrough SPEC_CTRL to guests (if enabled in guest CPUID)
+- Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest
+ actually used it.
+
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 88 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 43e45b9..4a36977 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -183,6 +183,8 @@ struct vcpu_svm {
+ u64 gs_base;
+ } host;
+
++ u64 spec_ctrl;
++
+ u32 *msrpm;
+
+ ulong nmi_iret_rip;
+@@ -248,6 +250,7 @@ static const struct svm_direct_access_msrs {
+ { .index = MSR_CSTAR, .always = true },
+ { .index = MSR_SYSCALL_MASK, .always = true },
+ #endif
++ { .index = MSR_IA32_SPEC_CTRL, .always = false },
+ { .index = MSR_IA32_PRED_CMD, .always = false },
+ { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
+ { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
+@@ -863,6 +866,25 @@ static bool valid_msr_intercept(u32 index)
+ return false;
+ }
+
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
++{
++ u8 bit_write;
++ unsigned long tmp;
++ u32 offset;
++ u32 *msrpm;
++
++ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
++ to_svm(vcpu)->msrpm;
++
++ offset = svm_msrpm_offset(msr);
++ bit_write = 2 * (msr & 0x0f) + 1;
++ tmp = msrpm[offset];
++
++ BUG_ON(offset == MSR_INVALID);
++
++ return !!test_bit(bit_write, &tmp);
++}
++
+ static void set_msr_interception(u32 *msrpm, unsigned msr,
+ int read, int write)
+ {
+@@ -1534,6 +1556,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ u32 dummy;
+ u32 eax = 1;
+
++ svm->spec_ctrl = 0;
++
+ if (!init_event) {
+ svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
+@@ -3515,6 +3539,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ case MSR_VM_CR:
+ msr_info->data = svm->nested.vm_cr_msr;
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ if (!msr_info->host_initiated &&
++ !guest_cpuid_has_ibrs(vcpu))
++ return 1;
++
++ msr_info->data = svm->spec_ctrl;
++ break;
+ case MSR_IA32_UCODE_REV:
+ msr_info->data = 0x01000065;
+ break;
+@@ -3599,6 +3630,33 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, msr);
+ break;
++ case MSR_IA32_SPEC_CTRL:
++ if (!msr->host_initiated &&
++ !guest_cpuid_has_ibrs(vcpu))
++ return 1;
++
++ /* The STIBP bit doesn't fault even if it's not advertised */
++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++ return 1;
++
++ svm->spec_ctrl = data;
++
++ if (!data)
++ break;
++
++ /*
++ * For non-nested:
++ * When it's written (to non-zero) for the first time, pass
++ * it through.
++ *
++ * For nested:
++ * The handling of the MSR bitmap for L2 guests is done in
++ * nested_svm_vmrun_msrpm.
++ * We update the L1 MSR bit as well since it will end up
++ * touching the MSR anyway now.
++ */
++ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
++ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has_ibpb(vcpu))
+@@ -4842,6 +4900,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+
+ local_irq_enable();
+
++ /*
++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++ * it's non-zero. Since vmentry is serialising on affected CPUs, there
++ * is no need to worry about the conditional branch over the wrmsr
++ * being speculatively taken.
++ */
++ if (svm->spec_ctrl)
++ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
+ asm volatile (
+ "push %%" _ASM_BP "; \n\t"
+ "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
+@@ -4934,6 +5001,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ );
+
++ /*
++ * We do not use IBRS in the kernel. If this vCPU has used the
++ * SPEC_CTRL MSR it may have left it on; save the value and
++ * turn it off. This is much more efficient than blindly adding
++ * it to the atomic save/restore list. Especially as the former
++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++ *
++ * For non-nested case:
++ * If the L01 MSR bitmap does not intercept the MSR, then we need to
++ * save it.
++ *
++ * For nested case:
++ * If the L02 MSR bitmap does not intercept the MSR, then we need to
++ * save it.
++ */
++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
++ if (svm->spec_ctrl)
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch
new file mode 100644
index 00000000..8feed73a
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch
@@ -0,0 +1,100 @@
+From 36417bad8e288e64df1067207030c67304c26ee5 Mon Sep 17 00:00:00 2001
+From: Liran Alon <liran.alon@oracle.com>
+Date: Thu, 9 Nov 2017 20:27:20 +0200
+Subject: [PATCH 24/33] KVM: nVMX: Fix races when sending nested PI while dest
+ enters/leaves L2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 6b6977117f50d60455ace86b2d256f6fb4f3de05 upstream.
+
+Consider the following scenario:
+1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI
+to CPU B via virtual posted-interrupt mechanism.
+2. CPU B is currently executing L2 guest.
+3. vmx_deliver_nested_posted_interrupt() calls
+kvm_vcpu_trigger_posted_interrupt() which will note that
+vcpu->mode == IN_GUEST_MODE.
+4. Assume that before CPU A sends the physical POSTED_INTR_NESTED_VECTOR
+IPI, CPU B exits from L2 to L0 during event-delivery
+(valid IDT-vectoring-info).
+5. CPU A now sends the physical IPI. The IPI is received in host and
+it's handler (smp_kvm_posted_intr_nested_ipi()) does nothing.
+6. Assume that before CPU A sets pi_pending=true and KVM_REQ_EVENT,
+CPU B continues to run in L0 and reach vcpu_enter_guest(). As
+KVM_REQ_EVENT is not set yet, vcpu_enter_guest() will continue and resume
+L2 guest.
+7. At this point, CPU A sets pi_pending=true and KVM_REQ_EVENT but
+it's too late! CPU B already entered L2 and KVM_REQ_EVENT will only be
+consumed at next L2 entry!
+
+Another scenario to consider:
+1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI
+to CPU B via virtual posted-interrupt mechanism.
+2. Assume that before CPU A calls kvm_vcpu_trigger_posted_interrupt(),
+CPU B is at L0 and is about to resume into L2. Further assume that it is
+in vcpu_enter_guest() after check for KVM_REQ_EVENT.
+3. At this point, CPU A calls kvm_vcpu_trigger_posted_interrupt() which
+will note that vcpu->mode != IN_GUEST_MODE. Therefore, do nothing and
+return false. Then, will set pi_pending=true and KVM_REQ_EVENT.
+4. Now CPU B continue and resumes into L2 guest without processing
+the posted-interrupt until next L2 entry!
+
+To fix both issues, we just need to change
+vmx_deliver_nested_posted_interrupt() to set pi_pending=true and
+KVM_REQ_EVENT before calling kvm_vcpu_trigger_posted_interrupt().
+
+It will fix the first scenario by chaging step (6) to note that
+KVM_REQ_EVENT and pi_pending=true and therefore process
+nested posted-interrupt.
+
+It will fix the second scenario by two possible ways:
+1. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B has changed
+vcpu->mode to IN_GUEST_MODE, physical IPI will be sent and will be received
+when CPU resumes into L2.
+2. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B hasn't yet
+changed vcpu->mode to IN_GUEST_MODE, then after CPU B will change
+vcpu->mode it will call kvm_request_pending() which will return true and
+therefore force another round of vcpu_enter_guest() which will note that
+KVM_REQ_EVENT and pi_pending=true and therefore process nested
+posted-interrupt.
+
+Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing")
+Signed-off-by: Liran Alon <liran.alon@oracle.com>
+Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
+Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
+[Add kvm_vcpu_kick to also handle the case where L1 doesn't intercept L2 HLT
+ and L2 executes HLT instruction. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index c564d03..85078c7 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -4944,14 +4944,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
+
+ if (is_guest_mode(vcpu) &&
+ vector == vmx->nested.posted_intr_nv) {
+- /* the PIR and ON have been set by L1. */
+- kvm_vcpu_trigger_posted_interrupt(vcpu);
+ /*
+ * If a posted intr is not recognized by hardware,
+ * we will accomplish it in the next vmentry.
+ */
+ vmx->nested.pi_pending = true;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
++ /* the PIR and ON have been set by L1. */
++ if (!kvm_vcpu_trigger_posted_interrupt(vcpu))
++ kvm_vcpu_kick(vcpu);
+ return 0;
+ }
+ return -1;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch
new file mode 100644
index 00000000..eb633c9c
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch
@@ -0,0 +1,103 @@
+From 15ca5afe3e56a0f80151aa4b6f06233b39736a2e Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Sat, 10 Feb 2018 23:39:24 +0000
+Subject: [PATCH 25/33] KVM/x86: Reduce retpoline performance impact in
+ slot_handle_level_range(), by always inlining iterator helper methods
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 928a4c39484281f8ca366f53a1db79330d058401 upstream.
+
+With retpoline, tight loops of "call this function for every XXX" are
+very much pessimised by taking a prediction miss *every* time. This one
+is by far the biggest contributor to the guest launch time with retpoline.
+
+By marking the iterator slot_handle_…() functions always_inline, we can
+ensure that the indirect function call can be optimised away into a
+direct call and it actually generates slightly smaller code because
+some of the other conditionals can get optimised away too.
+
+Performance is now pretty close to what we see with nospectre_v2 on
+the command line.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Tested-by: Filippo Sironi <sironi@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Reviewed-by: Filippo Sironi <sironi@amazon.de>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: arjan.van.de.ven@intel.com
+Cc: dave.hansen@intel.com
+Cc: jmattson@google.com
+Cc: karahmed@amazon.de
+Cc: kvm@vger.kernel.org
+Cc: rkrcmar@redhat.com
+Link: http://lkml.kernel.org/r/1518305967-31356-4-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index d9c7e98..ee4af7a 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -4636,7 +4636,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
+ typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
+
+ /* The caller should hold mmu-lock before calling this function. */
+-static bool
++static __always_inline bool
+ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ slot_level_handler fn, int start_level, int end_level,
+ gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
+@@ -4666,7 +4666,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ return flush;
+ }
+
+-static bool
++static __always_inline bool
+ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ slot_level_handler fn, int start_level, int end_level,
+ bool lock_flush_tlb)
+@@ -4677,7 +4677,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ lock_flush_tlb);
+ }
+
+-static bool
++static __always_inline bool
+ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ slot_level_handler fn, bool lock_flush_tlb)
+ {
+@@ -4685,7 +4685,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+ }
+
+-static bool
++static __always_inline bool
+ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ slot_level_handler fn, bool lock_flush_tlb)
+ {
+@@ -4693,7 +4693,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+ }
+
+-static bool
++static __always_inline bool
+ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ slot_level_handler fn, bool lock_flush_tlb)
+ {
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch
new file mode 100644
index 00000000..38255613
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch
@@ -0,0 +1,70 @@
+From 75a724909e81cd4612490d633ab269495377d332 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Wed, 13 Dec 2017 10:46:40 +0100
+Subject: [PATCH 26/33] KVM: x86: fix escape of guest dr6 to the host
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit efdab992813fb2ed825745625b83c05032e9cda2 upstream.
+
+syzkaller reported:
+
+ WARNING: CPU: 0 PID: 12927 at arch/x86/kernel/traps.c:780 do_debug+0x222/0x250
+ CPU: 0 PID: 12927 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #16
+ RIP: 0010:do_debug+0x222/0x250
+ Call Trace:
+ <#DB>
+ debug+0x3e/0x70
+ RIP: 0010:copy_user_enhanced_fast_string+0x10/0x20
+ </#DB>
+ _copy_from_user+0x5b/0x90
+ SyS_timer_create+0x33/0x80
+ entry_SYSCALL_64_fastpath+0x23/0x9a
+
+The testcase sets a watchpoint (with perf_event_open) on a buffer that is
+passed to timer_create() as the struct sigevent argument. In timer_create(),
+copy_from_user()'s rep movsb triggers the BP. The testcase also sets
+the debug registers for the guest.
+
+However, KVM only restores host debug registers when the host has active
+watchpoints, which triggers a race condition when running the testcase with
+multiple threads. The guest's DR6.BS bit can escape to the host before
+another thread invokes timer_create(), and do_debug() complains.
+
+The fix is to respect do_debug()'s dr6 invariant when leaving KVM.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index d2ea523..af333e1 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2833,6 +2833,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+ kvm_x86_ops->vcpu_put(vcpu);
+ kvm_put_guest_fpu(vcpu);
+ vcpu->arch.last_host_tsc = rdtsc();
++ /*
++ * If userspace has set any breakpoints or watchpoints, dr6 is restored
++ * on every vmexit, but if not, we might have a stale dr6 from the
++ * guest. do_debug expects dr6 to be cleared after it runs, do the same.
++ */
++ set_debugreg(0, 6);
+ }
+
+ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch
new file mode 100644
index 00000000..ef01a1cb
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-x86-add-MULTIUSER-dependency-for-KVM.patch
@@ -0,0 +1,37 @@
+From 216ac4ef7d2da59cd2b3d6e34e559c7ef49a143d Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 19 Jul 2017 14:53:04 +0200
+Subject: [PATCH 27/33] x86: add MULTIUSER dependency for KVM
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit c2ce3f5d89d57301e2756ac325fe2ebc33bfec30 upstream.
+
+KVM tries to select 'TASKSTATS', which had additional dependencies:
+
+warning: (KVM) selects TASKSTATS which has unmet direct dependencies (NET && MULTIUSER)
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
+index ab8e32f..66da97d 100644
+--- a/arch/x86/kvm/Kconfig
++++ b/arch/x86/kvm/Kconfig
+@@ -22,7 +22,7 @@ config KVM
+ depends on HAVE_KVM
+ depends on HIGH_RES_TIMERS
+ # for TASKSTATS/TASK_DELAY_ACCT:
+- depends on NET
++ depends on NET && MULTIUSER
+ select PREEMPT_NOTIFIERS
+ select MMU_NOTIFIER
+ select ANON_INODES
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch
new file mode 100644
index 00000000..5c62ba8b
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0028-KVM-add-X86_LOCAL_APIC-dependency.patch
@@ -0,0 +1,41 @@
+From 7e8b0d6af232b1d642960ca4fb026a70bfaf1206 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 4 Oct 2017 12:28:18 +0200
+Subject: [PATCH 28/33] KVM: add X86_LOCAL_APIC dependency
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit e42eef4ba38806b18c4a74f0c276fb2e0b548173 upstream.
+
+The rework of the posted interrupt handling broke building without
+support for the local APIC:
+
+ERROR: "boot_cpu_physical_apicid" [arch/x86/kvm/kvm-intel.ko] undefined!
+
+That configuration is probably not particularly useful anyway, so
+we can avoid the randconfig failures by adding a Kconfig dependency.
+
+Fixes: 8b306e2f3c41 ("KVM: VMX: avoid double list add with VT-d posted interrupts")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
+index 66da97d..9150e09 100644
+--- a/arch/x86/kvm/Kconfig
++++ b/arch/x86/kvm/Kconfig
+@@ -23,6 +23,7 @@ config KVM
+ depends on HIGH_RES_TIMERS
+ # for TASKSTATS/TASK_DELAY_ACCT:
+ depends on NET && MULTIUSER
++ depends on X86_LOCAL_APIC
+ select PREEMPT_NOTIFIERS
+ select MMU_NOTIFIER
+ select ANON_INODES
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch
new file mode 100644
index 00000000..b50d5453
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch
@@ -0,0 +1,105 @@
+From 8e13680f134458dd1b0529ccb636ae5895fa8a4d Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Thu, 14 Sep 2017 03:54:16 -0700
+Subject: [PATCH 29/33] KVM: async_pf: Fix #DF due to inject "Page not Present"
+ and "Page Ready" exceptions simultaneously
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 9a6e7c39810e4a8bc7fc95056cefb40583fe07ef upstream.
+
+qemu-system-x86-8600 [004] d..1 7205.687530: kvm_entry: vcpu 2
+qemu-system-x86-8600 [004] .... 7205.687532: kvm_exit: reason EXCEPTION_NMI rip 0xffffffffa921297d info ffffeb2c0e44e018 80000b0e
+qemu-system-x86-8600 [004] .... 7205.687532: kvm_page_fault: address ffffeb2c0e44e018 error_code 0
+qemu-system-x86-8600 [004] .... 7205.687620: kvm_try_async_get_page: gva = 0xffffeb2c0e44e018, gfn = 0x427e4e
+qemu-system-x86-8600 [004] .N.. 7205.687628: kvm_async_pf_not_present: token 0x8b002 gva 0xffffeb2c0e44e018
+ kworker/4:2-7814 [004] .... 7205.687655: kvm_async_pf_completed: gva 0xffffeb2c0e44e018 address 0x7fcc30c4e000
+qemu-system-x86-8600 [004] .... 7205.687703: kvm_async_pf_ready: token 0x8b002 gva 0xffffeb2c0e44e018
+qemu-system-x86-8600 [004] d..1 7205.687711: kvm_entry: vcpu 2
+
+After running some memory intensive workload in guest, I catch the kworker
+which completes the GUP too quickly, and queues an "Page Ready" #PF exception
+after the "Page not Present" exception before the next vmentry as the above
+trace which will result in #DF injected to guest.
+
+This patch fixes it by clearing the queue for "Page not Present" if "Page Ready"
+occurs before the next vmentry since the GUP has already got the required page
+and shadow page table has already been fixed by "Page Ready" handler.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Fixes: 7c90705bf2a3 ("KVM: Inject asynchronous page fault into a PV guest if page is swapped out.")
+[Changed indentation and added clearing of injected. - Radim]
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+[port from upstream v4.14-rc1, Don't assign to kvm_queued_exception::injected or
+ x86_exception::async_page_fault]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 34 ++++++++++++++++++++++++++--------
+ 1 file changed, 26 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index af333e1..9f0f7e2 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8370,6 +8370,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
+ sizeof(val));
+ }
+
++static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
++{
++
++ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
++ sizeof(u32));
++}
++
+ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+ {
+@@ -8396,6 +8403,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+ {
+ struct x86_exception fault;
++ u32 val;
+
+ trace_kvm_async_pf_ready(work->arch.token, work->gva);
+ if (work->wakeup_all)
+@@ -8403,14 +8411,24 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ else
+ kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+
+- if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
+- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
+- fault.vector = PF_VECTOR;
+- fault.error_code_valid = true;
+- fault.error_code = 0;
+- fault.nested_page_fault = false;
+- fault.address = work->arch.token;
+- kvm_inject_page_fault(vcpu, &fault);
++ if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
++ !apf_get_user(vcpu, &val)) {
++ if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
++ vcpu->arch.exception.pending &&
++ vcpu->arch.exception.nr == PF_VECTOR &&
++ !apf_put_user(vcpu, 0)) {
++ vcpu->arch.exception.pending = false;
++ vcpu->arch.exception.nr = 0;
++ vcpu->arch.exception.has_error_code = false;
++ vcpu->arch.exception.error_code = 0;
++ } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
++ fault.vector = PF_VECTOR;
++ fault.error_code_valid = true;
++ fault.error_code = 0;
++ fault.nested_page_fault = false;
++ fault.address = work->arch.token;
++ kvm_inject_page_fault(vcpu, &fault);
++ }
+ }
+ vcpu->arch.apf.halted = false;
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch
new file mode 100644
index 00000000..fefa3aac
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch
@@ -0,0 +1,57 @@
+From 9d11f29130341345dee37007dd76b9c4e83956a9 Mon Sep 17 00:00:00 2001
+From: Jan Dakinevich <jan.dakinevich@gmail.com>
+Date: Fri, 23 Feb 2018 11:42:17 +0100
+Subject: [PATCH 30/33] KVM: VMX: clean up declaration of VPID/EPT invalidation
+ types
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 63f3ac48133a19110c8a3666028dbd9b1bf3dcb3 upstream
+
+- Remove VMX_EPT_EXTENT_INDIVIDUAL_ADDR, since there is no such type of
+ EPT invalidation
+
+ - Add missing VPID types names
+
+Signed-off-by: Jan Dakinevich <jan.dakinevich@gmail.com>
+Tested-by: Ladi Prosek <lprosek@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+[jwang: port to 4.4]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/vmx.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
+index a002b07..6899cf1 100644
+--- a/arch/x86/include/asm/vmx.h
++++ b/arch/x86/include/asm/vmx.h
+@@ -399,10 +399,11 @@ enum vmcs_field {
+ #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
+
+ #define VMX_NR_VPIDS (1 << 16)
++#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0
+ #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
+ #define VMX_VPID_EXTENT_ALL_CONTEXT 2
++#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3
+
+-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
+ #define VMX_EPT_EXTENT_CONTEXT 1
+ #define VMX_EPT_EXTENT_GLOBAL 2
+ #define VMX_EPT_EXTENT_SHIFT 24
+@@ -419,8 +420,10 @@ enum vmcs_field {
+ #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
+
+ #define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */
++#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */
+ #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */
+ #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
++#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */
+
+ #define VMX_EPT_DEFAULT_GAW 3
+ #define VMX_EPT_MAX_GAW 0x4
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch
new file mode 100644
index 00000000..e96f0d9b
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0031-KVM-nVMX-invvpid-handling-improvements.patch
@@ -0,0 +1,102 @@
+From 1d5388c0b1e6eef66d7999451bb22cddf4cc5546 Mon Sep 17 00:00:00 2001
+From: Jan Dakinevich <jan.dakinevich@gmail.com>
+Date: Fri, 23 Feb 2018 11:42:18 +0100
+Subject: [PATCH 31/33] KVM: nVMX: invvpid handling improvements
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit bcdde302b8268ef7dbc4ddbdaffb5b44eafe9a1e upstream
+
+ - Expose all invalidation types to the L1
+
+ - Reject invvpid instruction, if L1 passed zero vpid value to single
+ context invalidations
+
+Signed-off-by: Jan Dakinevich <jan.dakinevich@gmail.com>
+Tested-by: Ladi Prosek <lprosek@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+[jwang: port to 4.4]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 36 ++++++++++++++++++++++++------------
+ 1 file changed, 24 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 85078c7..f6c0568 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -142,6 +142,12 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
+
+ #define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+
++#define VMX_VPID_EXTENT_SUPPORTED_MASK \
++ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
++ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
++ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
++ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
++
+ /*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * ple_gap: upper bound on the amount of time between two successive
+@@ -2836,8 +2842,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
+ */
+ if (enable_vpid)
+ vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+- VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
+- VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
++ VMX_VPID_EXTENT_SUPPORTED_MASK;
+ else
+ vmx->nested.nested_vmx_vpid_caps = 0;
+
+@@ -7671,7 +7676,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
+ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+- types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
++ types = (vmx->nested.nested_vmx_vpid_caps &
++ VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
+
+ if (type >= 32 || !(types & (1 << type))) {
+ nested_vmx_failValid(vcpu,
+@@ -7693,21 +7699,27 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
+ }
+
+ switch (type) {
++ case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+ case VMX_VPID_EXTENT_SINGLE_CONTEXT:
+- /*
+- * Old versions of KVM use the single-context version so we
+- * have to support it; just treat it the same as all-context.
+- */
++ case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
++ if (!vpid) {
++ nested_vmx_failValid(vcpu,
++ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
++ skip_emulated_instruction(vcpu);
++ return 1;
++ }
++ break;
+ case VMX_VPID_EXTENT_ALL_CONTEXT:
+- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
+- nested_vmx_succeed(vcpu);
+ break;
+ default:
+- /* Trap individual address invalidation invvpid calls */
+- BUG_ON(1);
+- break;
++ WARN_ON_ONCE(1);
++ skip_emulated_instruction(vcpu);
++ return 1;
+ }
+
++ __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
++ nested_vmx_succeed(vcpu);
++
+ skip_emulated_instruction(vcpu);
+ return 1;
+ }
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch
new file mode 100644
index 00000000..4f0b4222
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch
@@ -0,0 +1,105 @@
+From 0ebeae5f6b25b48c0559950e2b7c2f0a1ffd641c Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 22 Feb 2018 16:43:17 +0100
+Subject: [PATCH 32/33] KVM/x86: Remove indirect MSR op calls from SPEC_CTRL
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit ecb586bd29c99fb4de599dec388658e74388daad upstream.
+
+Having a paravirt indirect call in the IBRS restore path is not a
+good idea, since we are trying to protect from speculative execution
+of bogus indirect branch targets. It is also slower, so use
+native_wrmsrl() on the vmentry path too.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: KarimAllah Ahmed <karahmed@amazon.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d
+Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonzini@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c | 7 ++++---
+ arch/x86/kvm/vmx.c | 7 ++++---
+ 2 files changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 4a36977..8d33396 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -44,6 +44,7 @@
+ #include <asm/debugreg.h>
+ #include <asm/kvm_para.h>
+ #include <asm/irq_remapping.h>
++#include <asm/microcode.h>
+ #include <asm/nospec-branch.h>
+
+ #include <asm/virtext.h>
+@@ -4907,7 +4908,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ * being speculatively taken.
+ */
+ if (svm->spec_ctrl)
+- wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
+ asm volatile (
+ "push %%" _ASM_BP "; \n\t"
+@@ -5017,10 +5018,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+- rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++ svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+
+ if (svm->spec_ctrl)
+- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index f6c0568..aa2684a 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -49,6 +49,7 @@
+ #include <asm/kexec.h>
+ #include <asm/apic.h>
+ #include <asm/irq_remapping.h>
++#include <asm/microcode.h>
+ #include <asm/nospec-branch.h>
+
+ #include "trace.h"
+@@ -8888,7 +8889,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ * being speculatively taken.
+ */
+ if (vmx->spec_ctrl)
+- wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
+ vmx->__launched = vmx->loaded_vmcs->launched;
+ asm(
+@@ -9024,10 +9025,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+- rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+
+ if (vmx->spec_ctrl)
+- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch
new file mode 100644
index 00000000..95086730
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch
@@ -0,0 +1,65 @@
+From 885a241a441e144391884136534657f8502b2a48 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 22 Feb 2018 16:43:18 +0100
+Subject: [PATCH 33/33] KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by
+ marking the RDMSR path as unlikely()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 946fbbc13dce68902f64515b610eeb2a6c3d7a64 upstream.
+
+vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving
+branch hints to the compiler can actually make a substantial cycle
+difference by keeping the fast path contiguous in memory.
+
+With this optimization, the retpoline-guest/retpoline-host case is
+about 50 cycles faster.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: KarimAllah Ahmed <karahmed@amazon.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonzini@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c | 2 +-
+ arch/x86/kvm/vmx.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 8d33396..b82bb66 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -5017,7 +5017,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+
+ if (svm->spec_ctrl)
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index aa2684a..3c3558b 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -9024,7 +9024,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+
+ if (vmx->spec_ctrl)
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc b/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc
index 06d6de30..70237be9 100644
--- a/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/upstream-backports.scc
@@ -167,3 +167,36 @@ patch 0011-x86-retpoline-Support-retpoline-builds-with-Clang.patch
patch 0012-x86-speculation-objtool-Annotate-indirect-calls-jump.patch
patch 0013-x86-boot-objtool-Annotate-indirect-jump-in-secondary.patch
patch 0014-x86-speculation-Move-firmware_restrict_branch_specul.patch
+patch 0001-KVM-Fix-stack-out-of-bounds-read-in-write_mmio.patch
+patch 0002-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch
+patch 0003-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch
+patch 0004-KVM-x86-emulator-Return-to-user-mode-on-L1-CPL-0-emu.patch
+patch 0005-KVM-x86-Don-t-re-execute-instruction-when-not-passin.patch
+patch 0006-KVM-X86-Fix-operand-address-size-during-instruction-.patch
+patch 0007-KVM-x86-ioapic-Fix-level-triggered-EOI-and-IOAPIC-re.patch
+patch 0008-KVM-x86-ioapic-Clear-Remote-IRR-when-entry-is-switch.patch
+patch 0009-KVM-x86-ioapic-Preserve-read-only-values-in-the-redi.patch
+patch 0010-KVM-VMX-Fix-rflags-cache-during-vCPU-reset.patch
+patch 0011-KVM-x86-Make-indirect-calls-in-emulator-speculation-.patch
+patch 0012-KVM-VMX-Make-indirect-call-speculation-safe.patch
+patch 0013-x86-kvm-Update-spectre-v1-mitigation.patch
+patch 0014-KVM-nVMX-kmap-can-t-fail.patch
+patch 0015-KVM-nVMX-vmx_complete_nested_posted_interrupt-can-t-.patch
+patch 0016-KVM-nVMX-mark-vmcs12-pages-dirty-on-L2-exit.patch
+patch 0017-KVM-nVMX-Eliminate-vmcs02-pool.patch
+patch 0018-KVM-VMX-introduce-alloc_loaded_vmcs.patch
+patch 0019-KVM-VMX-make-MSR-bitmaps-per-VCPU.patch
+patch 0020-KVM-x86-Add-IBPB-support.patch
+patch 0021-KVM-VMX-Emulate-MSR_IA32_ARCH_CAPABILITIES.patch
+patch 0022-KVM-VMX-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch
+patch 0023-KVM-SVM-Allow-direct-access-to-MSR_IA32_SPEC_CTRL.patch
+patch 0024-KVM-nVMX-Fix-races-when-sending-nested-PI-while-dest.patch
+patch 0025-KVM-x86-Reduce-retpoline-performance-impact-in-slot_.patch
+patch 0026-KVM-x86-fix-escape-of-guest-dr6-to-the-host.patch
+patch 0027-x86-add-MULTIUSER-dependency-for-KVM.patch
+patch 0028-KVM-add-X86_LOCAL_APIC-dependency.patch
+patch 0029-KVM-async_pf-Fix-DF-due-to-inject-Page-not-Present-a.patch
+patch 0030-KVM-VMX-clean-up-declaration-of-VPID-EPT-invalidatio.patch
+patch 0031-KVM-nVMX-invvpid-handling-improvements.patch
+patch 0032-KVM-x86-Remove-indirect-MSR-op-calls-from-SPEC_CTRL.patch
+patch 0033-KVM-VMX-Optimize-vmx_vcpu_run-and-svm_vcpu_run-by-ma.patch