summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/emulate.c10
-rw-r--r--arch/x86/kvm/svm.c125
-rw-r--r--arch/x86/kvm/vmx.c37
-rw-r--r--arch/x86/kvm/x86.c139
4 files changed, 109 insertions, 202 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b372a7557c16..9da95b9daf8d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2418,7 +2418,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
u64 val, cr0, cr4;
u32 base3;
u16 selector;
- int i;
+ int i, r;
for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
@@ -2460,13 +2460,17 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smbase, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return X86EMUL_CONTINUE;
}
static int em_rsm(struct x86_emulate_ctxt *ctxt)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 94b7d15db3fc..2f9ed1ff0632 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -514,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->vmcb->control.next_rip != 0) {
- WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS));
+ WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
}
@@ -866,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
-#define MTRR_TYPE_UC_MINUS 7
-#define MTRR2PROTVAL_INVALID 0xff
-
-static u8 mtrr2protval[8];
-
-static u8 fallback_mtrr_type(int mtrr)
-{
- /*
- * WT and WP aren't always available in the host PAT. Treat
- * them as UC and UC- respectively. Everything else should be
- * there.
- */
- switch (mtrr)
- {
- case MTRR_TYPE_WRTHROUGH:
- return MTRR_TYPE_UNCACHABLE;
- case MTRR_TYPE_WRPROT:
- return MTRR_TYPE_UC_MINUS;
- default:
- BUG();
- }
-}
-
-static void build_mtrr2protval(void)
-{
- int i;
- u64 pat;
-
- for (i = 0; i < 8; i++)
- mtrr2protval[i] = MTRR2PROTVAL_INVALID;
-
- /* Ignore the invalid MTRR types. */
- mtrr2protval[2] = 0;
- mtrr2protval[3] = 0;
-
- /*
- * Use host PAT value to figure out the mapping from guest MTRR
- * values to nested page table PAT/PCD/PWT values. We do not
- * want to change the host PAT value every time we enter the
- * guest.
- */
- rdmsrl(MSR_IA32_CR_PAT, pat);
- for (i = 0; i < 8; i++) {
- u8 mtrr = pat >> (8 * i);
-
- if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
- mtrr2protval[mtrr] = __cm_idx2pte(i);
- }
-
- for (i = 0; i < 8; i++) {
- if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
- u8 fallback = fallback_mtrr_type(i);
- mtrr2protval[i] = mtrr2protval[fallback];
- BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
- }
- }
-}
-
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -990,7 +932,6 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
- build_mtrr2protval();
return 0;
err:
@@ -1145,43 +1086,6 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
return target_tsc - tsc;
}
-static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
-{
- struct kvm_vcpu *vcpu = &svm->vcpu;
-
- /* Unlike Intel, AMD takes the guest's CR0.CD into account.
- *
- * AMD does not have IPAT. To emulate it for the case of guests
- * with no assigned devices, just set everything to WB. If guests
- * have assigned devices, however, we cannot force WB for RAM
- * pages only, so use the guest PAT directly.
- */
- if (!kvm_arch_has_assigned_device(vcpu->kvm))
- *g_pat = 0x0606060606060606;
- else
- *g_pat = vcpu->arch.pat;
-}
-
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
- u8 mtrr;
-
- /*
- * 1. MMIO: trust guest MTRR, so same as item 3.
- * 2. No passthrough: always map as WB, and force guest PAT to WB as well
- * 3. Passthrough: can't guarantee the result, try to trust guest.
- */
- if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
- return 0;
-
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
- kvm_read_cr0(vcpu) & X86_CR0_CD)
- return _PAGE_NOCACHE;
-
- mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
- return mtrr2protval[mtrr];
-}
-
static void init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1278,7 +1182,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
save->g_pat = svm->vcpu.arch.pat;
- svm_set_guest_pat(svm, &save->g_pat);
save->cr3 = 0;
save->cr4 = 0;
}
@@ -1673,10 +1576,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!vcpu->fpu_active)
cr0 |= X86_CR0_TS;
-
- /* These are emulated via page tables. */
- cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
-
+ /*
+ * re-enable caching here because the QEMU bios
+ * does not do it - this results in some delay at
+ * reboot
+ */
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+ cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
update_cr0_intercept(svm);
@@ -3351,16 +3257,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
- case MSR_IA32_CR_PAT:
- if (npt_enabled) {
- if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
- return 1;
- vcpu->arch.pat = data;
- svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
- mark_dirty(svm->vmcb, VMCB_NPT);
- break;
- }
- /* fall through */
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -4195,6 +4091,11 @@ static bool svm_has_high_real_mode_segbase(void)
return true;
}
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+ return 0;
+}
+
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64076740251e..6a8bc64566ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4105,17 +4105,13 @@ static void seg_setup(int seg)
static int alloc_apic_access_page(struct kvm *kvm)
{
struct page *page;
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done)
goto out;
- kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
if (r)
goto out;
@@ -4140,17 +4136,12 @@ static int alloc_identity_pagetable(struct kvm *kvm)
{
/* Called with kvm->slots_lock held. */
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
BUG_ON(kvm->arch.ept_identity_pagetable_done);
- kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr =
- kvm->arch.ept_identity_map_addr;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+ kvm->arch.ept_identity_map_addr, PAGE_SIZE);
return r;
}
@@ -4949,14 +4940,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
- struct kvm_userspace_memory_region tss_mem = {
- .slot = TSS_PRIVATE_MEMSLOT,
- .guest_phys_addr = addr,
- .memory_size = PAGE_SIZE * 3,
- .flags = 0,
- };
- ret = x86_set_memory_region(kvm, &tss_mem);
+ ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
+ PAGE_SIZE * 3);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
@@ -8617,17 +8603,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
u64 ipat = 0;
/* For VT-d and EPT combination
- * 1. MMIO: guest may want to apply WC, trust it.
+ * 1. MMIO: always map as UC
* 2. EPT with VT-d:
* a. VT-d without snooping control feature: can't guarantee the
- * result, try to trust guest. So the same as item 1.
+ * result, try to trust guest.
* b. VT-d with snooping control feature: snooping control feature of
* VT-d engine can guarantee the cache correctness. Just set it
* to WB to keep consistent with host. So the same as item 3.
* 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
* consistent with host MTRR
*/
- if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+ if (is_mmio) {
+ cache = MTRR_TYPE_UNCACHABLE;
+ goto exit;
+ }
+
+ if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
ipat = VMX_EPT_IPAT_BIT;
cache = MTRR_TYPE_WRBACK;
goto exit;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991466bf8dee..9a9a19830321 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1708,8 +1708,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->pvclock_set_guest_stopped_request = false;
}
- pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
-
/* If the host uses TSC clocksource, then it is stable */
if (use_master_clock)
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@ -2007,8 +2005,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
&vcpu->requests);
ka->boot_vcpu_runs_old_kvmclock = tmp;
-
- ka->kvmclock_offset = -get_kernel_ns();
}
vcpu->arch.time = data;
@@ -6457,6 +6453,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
return 1;
}
+static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+ !vcpu->arch.apf.halted);
+}
+
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
@@ -6465,8 +6467,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
for (;;) {
- if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
+ if (kvm_vcpu_running(vcpu))
r = vcpu_enter_guest(vcpu);
else
r = vcpu_block(kvm, vcpu);
@@ -7478,34 +7479,66 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int i, r;
+ unsigned long hva;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *slot, old;
/* Called with kvm->slots_lock held. */
- BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
+ return -EINVAL;
+ slot = id_to_memslot(slots, id);
+ if (size) {
+ if (WARN_ON(slot->npages))
+ return -EEXIST;
+
+ /*
+ * MAP_SHARED to prevent internal slot pages from being moved
+ * by fork()/COW.
+ */
+ hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, 0);
+ if (IS_ERR((void *)hva))
+ return PTR_ERR((void *)hva);
+ } else {
+ if (!slot->npages)
+ return 0;
+
+ hva = 0;
+ }
+
+ old = *slot;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_userspace_memory_region m = *mem;
+ struct kvm_userspace_memory_region m;
- m.slot |= i << 16;
+ m.slot = id | (i << 16);
+ m.flags = 0;
+ m.guest_phys_addr = gpa;
+ m.userspace_addr = hva;
+ m.memory_size = size;
r = __kvm_set_memory_region(kvm, &m);
if (r < 0)
return r;
}
+ if (!size) {
+ r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
+ WARN_ON(r < 0);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(__x86_set_memory_region);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int r;
mutex_lock(&kvm->slots_lock);
- r = __x86_set_memory_region(kvm, mem);
+ r = __x86_set_memory_region(kvm, id, gpa, size);
mutex_unlock(&kvm->slots_lock);
return r;
@@ -7520,16 +7553,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
* unless the the memory map has changed due to process exit
* or fd copying.
*/
- struct kvm_userspace_memory_region mem;
- memset(&mem, 0, sizeof(mem));
- mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = TSS_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
@@ -7632,27 +7658,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- /*
- * Only private memory slots need to be mapped here since
- * KVM_SET_MEMORY_REGION ioctl is no longer supported.
- */
- if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
- unsigned long userspace_addr;
-
- /*
- * MAP_SHARED to prevent internal slot pages from being moved
- * by fork()/COW.
- */
- userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, 0);
-
- if (IS_ERR((void *)userspace_addr))
- return PTR_ERR((void *)userspace_addr);
-
- memslot->userspace_addr = userspace_addr;
- }
-
return 0;
}
@@ -7714,17 +7719,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int nr_mmu_pages = 0;
- if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
- int ret;
-
- ret = vm_munmap(old->userspace_addr,
- old->npages * PAGE_SIZE);
- if (ret < 0)
- printk(KERN_WARNING
- "kvm_vm_ioctl_set_memory_region: "
- "failed to munmap memory\n");
- }
-
if (!kvm->arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -7773,19 +7767,36 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvm_mmu_invalidate_zap_all_pages(kvm);
}
+static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+{
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+
+ if (kvm_apic_has_events(vcpu))
+ return true;
+
+ if (vcpu->arch.pv.pv_unhalted)
+ return true;
+
+ if (atomic_read(&vcpu->arch.nmi_queued))
+ return true;
+
+ if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+ return true;
+
+ if (kvm_arch_interrupt_allowed(vcpu) &&
+ kvm_cpu_has_interrupt(vcpu))
+ return true;
+
+ return false;
+}
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
kvm_x86_ops->check_nested_events(vcpu, false);
- return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
- || !list_empty_careful(&vcpu->async_pf.done)
- || kvm_apic_has_events(vcpu)
- || vcpu->arch.pv.pv_unhalted
- || atomic_read(&vcpu->arch.nmi_queued) ||
- (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu));
+ return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)