aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c259
1 files changed, 166 insertions, 93 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f379fe16aa8..2ac1cd4ae973 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -188,6 +188,9 @@ static struct kvm_shared_msrs __percpu *shared_msrs;
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
+bool __read_mostly allow_smaller_maxphyaddr;
+EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
+
static u64 __read_mostly host_xss;
u64 __read_mostly supported_xss;
EXPORT_SYMBOL_GPL(supported_xss);
@@ -244,6 +247,29 @@ static struct kmem_cache *x86_fpu_cache;
static struct kmem_cache *x86_emulator_cache;
+/*
+ * When called, it means the previous get/set msr reached an invalid msr.
+ * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want
+ * to fail the caller.
+ */
+static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
+ u64 data, bool write)
+{
+ const char *op = write ? "wrmsr" : "rdmsr";
+
+ if (ignore_msrs) {
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n",
+ op, msr, data);
+ /* Mask the error */
+ return 0;
+ } else {
+ vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n",
+ op, msr, data);
+ return 1;
+ }
+}
+
static struct kmem_cache *kvm_alloc_emulator_cache(void)
{
unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
@@ -380,7 +406,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
-asmlinkage __visible void kvm_spurious_fault(void)
+asmlinkage __visible noinstr void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
BUG_ON(!kvm_rebooting);
@@ -776,6 +802,7 @@ EXPORT_SYMBOL_GPL(pdptrs_changed);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long old_cr0 = kvm_read_cr0(vcpu);
+ unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
cr0 |= X86_CR0_ET;
@@ -793,22 +820,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
return 1;
- if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
#ifdef CONFIG_X86_64
- if ((vcpu->arch.efer & EFER_LME)) {
- int cs_db, cs_l;
+ if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
+ (cr0 & X86_CR0_PG)) {
+ int cs_db, cs_l;
- if (!is_pae(vcpu))
- return 1;
- kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
- if (cs_l)
- return 1;
- } else
-#endif
- if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
- kvm_read_cr3(vcpu)))
+ if (!is_pae(vcpu))
+ return 1;
+ kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ if (cs_l)
return 1;
}
+#endif
+ if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
+ is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
+ !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
+ return 1;
if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
return 1;
@@ -917,7 +944,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
- kvm_update_cpuid(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
return 0;
}
@@ -932,37 +959,17 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
}
EXPORT_SYMBOL_GPL(kvm_set_xcr);
-#define __cr4_reserved_bits(__cpu_has, __c) \
-({ \
- u64 __reserved_bits = CR4_RESERVED_BITS; \
- \
- if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \
- __reserved_bits |= X86_CR4_OSXSAVE; \
- if (!__cpu_has(__c, X86_FEATURE_SMEP)) \
- __reserved_bits |= X86_CR4_SMEP; \
- if (!__cpu_has(__c, X86_FEATURE_SMAP)) \
- __reserved_bits |= X86_CR4_SMAP; \
- if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \
- __reserved_bits |= X86_CR4_FSGSBASE; \
- if (!__cpu_has(__c, X86_FEATURE_PKU)) \
- __reserved_bits |= X86_CR4_PKE; \
- if (!__cpu_has(__c, X86_FEATURE_LA57)) \
- __reserved_bits |= X86_CR4_LA57; \
- if (!__cpu_has(__c, X86_FEATURE_UMIP)) \
- __reserved_bits |= X86_CR4_UMIP; \
- __reserved_bits; \
-})
-
-static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
return -EINVAL;
- if (cr4 & __cr4_reserved_bits(guest_cpuid_has, vcpu))
+ if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(kvm_valid_cr4);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
@@ -1001,7 +1008,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
kvm_mmu_reset_context(vcpu);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
- kvm_update_cpuid(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
return 0;
}
@@ -1111,7 +1118,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
case 4:
/* fall through */
case 6:
- if (val & 0xffffffff00000000ULL)
+ if (!kvm_dr6_valid(val))
return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
break;
@@ -1392,8 +1399,7 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
rdmsrl_safe(msr->index, &msr->data);
break;
default:
- if (kvm_x86_ops.get_msr_feature(msr))
- return 1;
+ return kvm_x86_ops.get_msr_feature(msr);
}
return 0;
}
@@ -1405,6 +1411,13 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
msr.index = index;
r = kvm_get_msr_feature(&msr);
+
+ if (r == KVM_MSR_RET_INVALID) {
+ /* Unconditionally clear the output for simplicity */
+ *data = 0;
+ r = kvm_msr_ignored_check(vcpu, index, 0, false);
+ }
+
if (r)
return r;
@@ -1519,6 +1532,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
return kvm_x86_ops.set_msr(vcpu, &msr);
}
+static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
+ u32 index, u64 data, bool host_initiated)
+{
+ int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
+
+ if (ret == KVM_MSR_RET_INVALID)
+ ret = kvm_msr_ignored_check(vcpu, index, data, true);
+
+ return ret;
+}
+
/*
* Read the MSR specified by @index into @data. Select MSR specific fault
* checks are bypassed if @host_initiated is %true.
@@ -1540,15 +1564,29 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
return ret;
}
+static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
+ u32 index, u64 *data, bool host_initiated)
+{
+ int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
+
+ if (ret == KVM_MSR_RET_INVALID) {
+ /* Unconditionally clear *data for simplicity */
+ *data = 0;
+ ret = kvm_msr_ignored_check(vcpu, index, 0, false);
+ }
+
+ return ret;
+}
+
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
- return __kvm_get_msr(vcpu, index, data, false);
+ return kvm_get_msr_ignored_check(vcpu, index, data, false);
}
EXPORT_SYMBOL_GPL(kvm_get_msr);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
- return __kvm_set_msr(vcpu, index, data, false);
+ return kvm_set_msr_ignored_check(vcpu, index, data, false);
}
EXPORT_SYMBOL_GPL(kvm_set_msr);
@@ -1668,12 +1706,12 @@ EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
*/
static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
- return __kvm_get_msr(vcpu, index, data, true);
+ return kvm_get_msr_ignored_check(vcpu, index, data, true);
}
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
- return __kvm_set_msr(vcpu, index, *data, true);
+ return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
#ifdef CONFIG_X86_64
@@ -2825,6 +2863,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.arch_capabilities = data;
break;
+ case MSR_IA32_PERF_CAPABILITIES: {
+ struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
+
+ if (!msr_info->host_initiated)
+ return 1;
+ if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
+ return 1;
+ if (data & ~msr_ent.data)
+ return 1;
+
+ vcpu->arch.perf_capabilities = data;
+
+ return 0;
+ }
case MSR_EFER:
return set_efer(vcpu, msr_info);
case MSR_K7_HWCR:
@@ -2884,7 +2936,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
- kvm_update_cpuid(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
} else {
vcpu->arch.ia32_misc_enable_msr = data;
}
@@ -3069,17 +3121,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return xen_hvm_config(vcpu, data);
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
- if (!ignore_msrs) {
- vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
- msr, data);
- return 1;
- } else {
- if (report_ignored_msrs)
- vcpu_unimpl(vcpu,
- "ignored wrmsr: 0x%x data 0x%llx\n",
- msr, data);
- break;
- }
+ return KVM_MSR_RET_INVALID;
}
return 0;
}
@@ -3175,6 +3217,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.arch_capabilities;
break;
+ case MSR_IA32_PERF_CAPABILITIES:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+ return 1;
+ msr_info->data = vcpu->arch.perf_capabilities;
+ break;
case MSR_IA32_POWER_CTL:
msr_info->data = vcpu->arch.msr_ia32_power_ctl;
break;
@@ -3334,17 +3382,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
- if (!ignore_msrs) {
- vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
- msr_info->index);
- return 1;
- } else {
- if (report_ignored_msrs)
- vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
- msr_info->index);
- msr_info->data = 0;
- }
- break;
+ return KVM_MSR_RET_INVALID;
}
return 0;
}
@@ -3479,6 +3517,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
case KVM_CAP_SET_GUEST_DEBUG:
+ case KVM_CAP_LAST_CPU:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -3541,6 +3580,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
break;
+ case KVM_CAP_SMALLER_MAXPHYADDR:
+ r = (int) allow_smaller_maxphyaddr;
+ break;
default:
break;
}
@@ -8157,7 +8199,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_x86_ops.set_efer(vcpu, 0);
#endif
- kvm_update_cpuid(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
kvm_mmu_reset_context(vcpu);
}
@@ -8509,7 +8551,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- guest_enter_irqoff();
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@ -8551,6 +8592,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (hw_breakpoint_active())
hw_breakpoint_restore();
+ vcpu->arch.last_vmentry_cpu = vcpu->cpu;
vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -8571,7 +8613,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
local_irq_disable();
kvm_after_interrupt(vcpu);
- guest_exit_irqoff();
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
if (delta != S64_MIN) {
@@ -9176,7 +9217,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
(X86_CR4_OSXSAVE | X86_CR4_PKE));
kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
if (cpuid_update_needed)
- kvm_update_cpuid(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (is_pae_paging(vcpu)) {
@@ -9280,7 +9321,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
*/
kvm_set_rflags(vcpu, rflags);
- kvm_x86_ops.update_bp_intercept(vcpu);
+ kvm_x86_ops.update_exception_bitmap(vcpu);
r = 0;
@@ -9478,7 +9519,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
- vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
@@ -10629,11 +10669,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ int ret;
irqfd->producer = prod;
+ kvm_arch_start_assignment(irqfd->kvm);
+ ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
+ prod->irq, irqfd->gsi, 1);
- return kvm_x86_ops.update_pi_irte(irqfd->kvm,
- prod->irq, irqfd->gsi, 1);
+ if (ret)
+ kvm_arch_end_assignment(irqfd->kvm);
+
+ return ret;
}
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -10656,6 +10702,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
if (ret)
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
" fails: %d\n", irqfd->consumer.token, ret);
+
+ kvm_arch_end_assignment(irqfd->kvm);
}
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
@@ -10675,28 +10723,53 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
-u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu)
+
+int kvm_spec_ctrl_test_value(u64 value)
{
- uint64_t bits = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD;
+ /*
+ * test that setting IA32_SPEC_CTRL to given value
+ * is allowed by the host processor
+ */
+
+ u64 saved_value;
+ unsigned long flags;
+ int ret = 0;
+
+ local_irq_save(flags);
+
+ if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
+ ret = 1;
+ else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
+ ret = 1;
+ else
+ wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
+
+ local_irq_restore(flags);
- /* The STIBP bit doesn't fault even if it's not advertised */
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
- bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP);
- if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
- !boot_cpu_has(X86_FEATURE_AMD_IBRS))
- bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL_SSBD) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
- bits &= ~SPEC_CTRL_SSBD;
- if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
- !boot_cpu_has(X86_FEATURE_AMD_SSBD))
- bits &= ~SPEC_CTRL_SSBD;
+void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
+{
+ struct x86_exception fault;
- return bits;
+ if (!(error_code & PFERR_PRESENT_MASK) ||
+ vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, error_code, &fault) != UNMAPPED_GVA) {
+ /*
+ * If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page
+ * tables probably do not match the TLB. Just proceed
+ * with the error code that the processor gave.
+ */
+ fault.vector = PF_VECTOR;
+ fault.error_code_valid = true;
+ fault.error_code = error_code;
+ fault.nested_page_fault = false;
+ fault.address = gva;
+ }
+ vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
}
-EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits);
+EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);