aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Kconfig2
-rw-r--r--arch/arm64/kvm/Makefile4
-rw-r--r--arch/arm64/kvm/arch_timer.c157
-rw-r--r--arch/arm64/kvm/arm.c65
-rw-r--r--arch/arm64/kvm/fpsimd.c6
-rw-r--r--arch/arm64/kvm/guest.c79
-rw-r--r--arch/arm64/kvm/handle_exit.c68
-rw-r--r--arch/arm64/kvm/hyp/Makefile24
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c8
-rw-r--r--arch/arm64/kvm/hyp/entry.S4
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S1
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S21
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h (renamed from arch/arm64/kvm/hyp/debug-sr.c)88
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h511
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h193
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile62
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c77
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S (renamed from arch/arm64/kvm/hyp-init.S)5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c272
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c46
-rw-r--r--arch/arm64/kvm/hyp/nvhe/timer-sr.c (renamed from arch/arm64/kvm/hyp/timer-sr.c)6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c154
-rw-r--r--arch/arm64/kvm/hyp/smccc_wa.S32
-rw-r--r--arch/arm64/kvm/hyp/switch.c936
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c333
-rw-r--r--arch/arm64/kvm/hyp/tlb.c242
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c4
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c134
-rw-r--r--arch/arm64/kvm/hyp/vhe/Makefile11
-rw-r--r--arch/arm64/kvm/hyp/vhe/debug-sr.c26
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c219
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c114
-rw-r--r--arch/arm64/kvm/hyp/vhe/timer-sr.c12
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c162
-rw-r--r--arch/arm64/kvm/inject_fault.c2
-rw-r--r--arch/arm64/kvm/mmio.c17
-rw-r--r--arch/arm64/kvm/mmu.c372
-rw-r--r--arch/arm64/kvm/regmap.c37
-rw-r--r--arch/arm64/kvm/reset.c23
-rw-r--r--arch/arm64/kvm/sys_regs.c220
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c96
-rw-r--r--arch/arm64/kvm/trace_arm.h8
-rw-r--r--arch/arm64/kvm/va_layout.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c24
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c2
46 files changed, 2593 insertions, 2291 deletions
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 13489aff4440..318c8f2df245 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -58,7 +58,7 @@ config KVM_ARM_PMU
virtual machines.
config KVM_INDIRECT_VECTORS
- def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS
+ def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE
endif # KVM
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 8d3d9513cbfe..99977c1972cc 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,8 +13,8 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
$(KVM)/vfio.o $(KVM)/irqchip.o \
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
- inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \
- guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \
+ inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \
+ guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
aarch32.o arch_timer.o \
vgic/vgic.o vgic/vgic-init.o \
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index a1fe0ea3254e..32ba6fbc3814 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -51,6 +51,93 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg);
+u32 timer_get_ctl(struct arch_timer_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
+ case TIMER_PTIMER:
+ return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+u64 timer_get_cval(struct arch_timer_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ case TIMER_PTIMER:
+ return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+static u64 timer_get_offset(struct arch_timer_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ default:
+ return 0;
+ }
+}
+
+static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
+ break;
+ case TIMER_PTIMER:
+ __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
+ break;
+ case TIMER_PTIMER:
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset;
+ break;
+ default:
+ WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
+ }
+}
+
u64 kvm_phys_timer_read(void)
{
return timecounter->cc->read(timecounter->cc);
@@ -124,8 +211,8 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
{
u64 cval, now;
- cval = timer_ctx->cnt_cval;
- now = kvm_phys_timer_read() - timer_ctx->cntvoff;
+ cval = timer_get_cval(timer_ctx);
+ now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
if (now < cval) {
u64 ns;
@@ -144,8 +231,8 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
{
WARN_ON(timer_ctx && timer_ctx->loaded);
return timer_ctx &&
- !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
- (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
+ ((timer_get_ctl(timer_ctx) &
+ (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
}
/*
@@ -256,8 +343,8 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
if (!kvm_timer_irq_can_fire(timer_ctx))
return false;
- cval = timer_ctx->cnt_cval;
- now = kvm_phys_timer_read() - timer_ctx->cntvoff;
+ cval = timer_get_cval(timer_ctx);
+ now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
return cval <= now;
}
@@ -350,8 +437,8 @@ static void timer_save_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
- ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
- ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL);
+ timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
+ timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTV_CTL);
@@ -359,8 +446,8 @@ static void timer_save_state(struct arch_timer_context *ctx)
break;
case TIMER_PTIMER:
- ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
- ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL);
+ timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
+ timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL));
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTP_CTL);
@@ -429,14 +516,14 @@ static void timer_restore_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
- write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL);
+ write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
isb();
- write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL);
+ write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
- write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL);
+ write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL);
isb();
- write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL);
+ write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
BUG();
@@ -528,7 +615,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_load_nogic(vcpu);
}
- set_cntvoff(map.direct_vtimer->cntvoff);
+ set_cntvoff(timer_get_offset(map.direct_vtimer));
kvm_timer_unblocking(vcpu);
@@ -615,7 +702,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
}
}
-void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
+void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -639,8 +726,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
* resets the timer to be disabled and unmasked and is compliant with
* the ARMv7 architecture.
*/
- vcpu_vtimer(vcpu)->cnt_ctl = 0;
- vcpu_ptimer(vcpu)->cnt_ctl = 0;
+ timer_set_ctl(vcpu_vtimer(vcpu), 0);
+ timer_set_ctl(vcpu_ptimer(vcpu), 0);
if (timer->enabled) {
kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
@@ -668,13 +755,13 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
mutex_lock(&kvm->lock);
kvm_for_each_vcpu(i, tmp, kvm)
- vcpu_vtimer(tmp)->cntvoff = cntvoff;
+ timer_set_offset(vcpu_vtimer(tmp), cntvoff);
/*
* When called from the vcpu create path, the CPU being created is not
* included in the loop above, so we just set it here as well.
*/
- vcpu_vtimer(vcpu)->cntvoff = cntvoff;
+ timer_set_offset(vcpu_vtimer(vcpu), cntvoff);
mutex_unlock(&kvm->lock);
}
@@ -684,9 +771,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ vtimer->vcpu = vcpu;
+ ptimer->vcpu = vcpu;
+
/* Synchronize cntvoff across all vtimers of a VM. */
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
- ptimer->cntvoff = 0;
+ timer_set_offset(ptimer, 0);
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
@@ -704,9 +794,6 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
-
- vtimer->vcpu = vcpu;
- ptimer->vcpu = vcpu;
}
static void kvm_timer_init_interrupt(void *info)
@@ -756,10 +843,12 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
* UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
* regardless of ENABLE bit for our implementation convenience.
*/
+ u32 ctl = timer_get_ctl(timer);
+
if (!kvm_timer_compute_delta(timer))
- return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT;
- else
- return timer->cnt_ctl;
+ ctl |= ARCH_TIMER_CTRL_IT_STAT;
+
+ return ctl;
}
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
@@ -795,8 +884,8 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
switch (treg) {
case TIMER_REG_TVAL:
- val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
- val &= lower_32_bits(val);
+ val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
+ val = lower_32_bits(val);
break;
case TIMER_REG_CTL:
@@ -804,11 +893,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
break;
case TIMER_REG_CVAL:
- val = timer->cnt_cval;
+ val = timer_get_cval(timer);
break;
case TIMER_REG_CNT:
- val = kvm_phys_timer_read() - timer->cntvoff;
+ val = kvm_phys_timer_read() - timer_get_offset(timer);
break;
default:
@@ -842,15 +931,15 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
{
switch (treg) {
case TIMER_REG_TVAL:
- timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val;
+ timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
break;
case TIMER_REG_CTL:
- timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT;
+ timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
break;
case TIMER_REG_CVAL:
- timer->cnt_cval = val;
+ timer_set_cval(timer, val);
break;
default:
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a66bee7ff2d9..349d380e9ff0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -106,22 +106,15 @@ static int kvm_arm_default_max_vcpus(void)
*/
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
- int ret, cpu;
+ int ret;
ret = kvm_arm_setup_stage2(kvm, type);
if (ret)
return ret;
- kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
- if (!kvm->arch.last_vcpu_ran)
- return -ENOMEM;
-
- for_each_possible_cpu(cpu)
- *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
-
- ret = kvm_alloc_stage2_pgd(kvm);
+ ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
if (ret)
- goto out_fail_alloc;
+ return ret;
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
if (ret)
@@ -129,18 +122,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
- /* Mark the initial VMID generation invalid */
- kvm->arch.vmid.vmid_gen = 0;
-
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
return ret;
out_free_stage2_pgd:
- kvm_free_stage2_pgd(kvm);
-out_fail_alloc:
- free_percpu(kvm->arch.last_vcpu_ran);
- kvm->arch.last_vcpu_ran = NULL;
+ kvm_free_stage2_pgd(&kvm->arch.mmu);
return ret;
}
@@ -160,9 +147,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_vgic_destroy(kvm);
- free_percpu(kvm->arch.last_vcpu_ran);
- kvm->arch.last_vcpu_ran = NULL;
-
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_vcpu_destroy(kvm->vcpus[i]);
@@ -270,6 +254,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+ vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
+
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@@ -279,6 +265,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_arm_pvtime_vcpu_init(&vcpu->arch);
+ vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+
err = kvm_vgic_vcpu_init(vcpu);
if (err)
return err;
@@ -334,16 +322,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvm_s2_mmu *mmu;
int *last_ran;
- last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+ mmu = vcpu->arch.hw_mmu;
+ last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
/*
* We might get preempted before the vCPU actually runs, but
* over-invalidation doesn't affect correctness.
*/
if (*last_ran != vcpu->vcpu_id) {
- kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+ kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
*last_ran = vcpu->vcpu_id;
}
@@ -351,7 +341,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
- kvm_vcpu_load_sysregs(vcpu);
+ if (has_vhe())
+ kvm_vcpu_load_sysregs_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
@@ -369,7 +360,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_arch_vcpu_put_fp(vcpu);
- kvm_vcpu_put_sysregs(vcpu);
+ if (has_vhe())
+ kvm_vcpu_put_sysregs_vhe(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
@@ -466,7 +458,6 @@ static bool need_new_vmid_gen(struct kvm_vmid *vmid)
/**
* update_vmid - Update the vmid with a valid VMID for the current generation
- * @kvm: The guest that struct vmid belongs to
* @vmid: The stage-2 VMID information struct
*/
static void update_vmid(struct kvm_vmid *vmid)
@@ -658,7 +649,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret;
if (run->exit_reason == KVM_EXIT_MMIO) {
- ret = kvm_handle_mmio_return(vcpu, run);
+ ret = kvm_handle_mmio_return(vcpu);
if (ret)
return ret;
}
@@ -678,7 +669,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
cond_resched();
- update_vmid(&vcpu->kvm->arch.vmid);
+ update_vmid(&vcpu->arch.hw_mmu->vmid);
check_vcpu_requests(vcpu);
@@ -727,13 +718,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
- if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
+ if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
isb(); /* Ensure work in x_flush_hwstate is committed */
kvm_pmu_sync_hwstate(vcpu);
if (static_branch_unlikely(&userspace_irqchip_in_use))
- kvm_timer_sync_hwstate(vcpu);
+ kvm_timer_sync_user(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
preempt_enable();
@@ -748,11 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
- if (has_vhe()) {
- ret = kvm_vcpu_run_vhe(vcpu);
- } else {
- ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
- }
+ ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -782,7 +769,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* timer virtual interrupt state.
*/
if (static_branch_unlikely(&userspace_irqchip_in_use))
- kvm_timer_sync_hwstate(vcpu);
+ kvm_timer_sync_user(vcpu);
kvm_arch_vcpu_ctxsync_fp(vcpu);
@@ -810,11 +797,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/* Exit types that need handling before we can be preempted */
- handle_exit_early(vcpu, run, ret);
+ handle_exit_early(vcpu, ret);
preempt_enable();
- ret = handle_exit(vcpu, run, ret);
+ ret = handle_exit(vcpu, ret);
}
/* Tell userspace about in-kernel device output levels */
@@ -1285,7 +1272,7 @@ static void cpu_init_hyp_mode(void)
* so that we can use adr_l to access per-cpu variables in EL2.
*/
tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) -
- (unsigned long)kvm_ksym_ref(kvm_host_data));
+ (unsigned long)kvm_ksym_ref(&kvm_host_data));
pgd_ptr = kvm_mmu_get_httbr();
hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
@@ -1306,7 +1293,7 @@ static void cpu_init_hyp_mode(void)
*/
if (this_cpu_has_cap(ARM64_SSBS) &&
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
- kvm_call_hyp(__kvm_enable_ssbs);
+ kvm_call_hyp_nvhe(__kvm_enable_ssbs);
}
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index e329a36b2bee..3e081d556e81 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -85,7 +85,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
+ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
vcpu->arch.sve_state,
vcpu->arch.sve_max_vl);
@@ -109,12 +109,10 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
-
fpsimd_save_and_flush_cpu_state();
if (guest_has_sve)
- *guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12);
} else if (host_has_sve) {
/*
* The FPSIMD/SVE state in the CPU has not been touched, and we
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index aea43ec60f37..dfb5218137ca 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -101,19 +101,69 @@ static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off)
return size;
}
-static int validate_core_offset(const struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
+static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
u64 off = core_reg_offset_from_id(reg->id);
int size = core_reg_size_from_offset(vcpu, off);
if (size < 0)
- return -EINVAL;
+ return NULL;
if (KVM_REG_SIZE(reg->id) != size)
- return -EINVAL;
+ return NULL;
- return 0;
+ switch (off) {
+ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+ KVM_REG_ARM_CORE_REG(regs.regs[30]):
+ off -= KVM_REG_ARM_CORE_REG(regs.regs[0]);
+ off /= 2;
+ return &vcpu->arch.ctxt.regs.regs[off];
+
+ case KVM_REG_ARM_CORE_REG(regs.sp):
+ return &vcpu->arch.ctxt.regs.sp;
+
+ case KVM_REG_ARM_CORE_REG(regs.pc):
+ return &vcpu->arch.ctxt.regs.pc;
+
+ case KVM_REG_ARM_CORE_REG(regs.pstate):
+ return &vcpu->arch.ctxt.regs.pstate;
+
+ case KVM_REG_ARM_CORE_REG(sp_el1):
+ return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1);
+
+ case KVM_REG_ARM_CORE_REG(elr_el1):
+ return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1);
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]):
+ return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1);
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]):
+ return &vcpu->arch.ctxt.spsr_abt;
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]):
+ return &vcpu->arch.ctxt.spsr_und;
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]):
+ return &vcpu->arch.ctxt.spsr_irq;
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]):
+ return &vcpu->arch.ctxt.spsr_fiq;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]);
+ off /= 4;
+ return &vcpu->arch.ctxt.fp_regs.vregs[off];
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ return &vcpu->arch.ctxt.fp_regs.fpsr;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ return &vcpu->arch.ctxt.fp_regs.fpcr;
+
+ default:
+ return NULL;
+ }
}
static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
@@ -125,8 +175,8 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
* off the index in the "array".
*/
__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
- struct kvm_regs *regs = vcpu_gp_regs(vcpu);
- int nr_regs = sizeof(*regs) / sizeof(__u32);
+ int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
+ void *addr;
u32 off;
/* Our ID is an index into the kvm_regs struct. */
@@ -135,10 +185,11 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
- if (validate_core_offset(vcpu, reg))
+ addr = core_reg_addr(vcpu, reg);
+ if (!addr)
return -EINVAL;
- if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+ if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
return 0;
@@ -147,10 +198,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
- struct kvm_regs *regs = vcpu_gp_regs(vcpu);
- int nr_regs = sizeof(*regs) / sizeof(__u32);
+ int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
__uint128_t tmp;
- void *valp = &tmp;
+ void *valp = &tmp, *addr;
u64 off;
int err = 0;
@@ -160,7 +210,8 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
- if (validate_core_offset(vcpu, reg))
+ addr = core_reg_addr(vcpu, reg);
+ if (!addr)
return -EINVAL;
if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
@@ -198,7 +249,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
}
}
- memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+ memcpy(addr, valp, KVM_REG_SIZE(reg->id));
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
int i;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 5a02d4c90559..fe6c7d79309d 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -25,7 +25,7 @@
#define CREATE_TRACE_POINTS
#include "trace_handle_exit.h"
-typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+typedef int (*exit_handle_fn)(struct kvm_vcpu *);
static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
{
@@ -33,7 +33,7 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
kvm_inject_vabt(vcpu);
}
-static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int handle_hvc(struct kvm_vcpu *vcpu)
{
int ret;
@@ -50,7 +50,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
-static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int handle_smc(struct kvm_vcpu *vcpu)
{
/*
* "If an SMC instruction executed at Non-secure EL1 is
@@ -69,7 +69,7 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Guest access to FP/ASIMD registers are routed to this handler only
* when the system doesn't support FP/ASIMD.
*/
-static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
@@ -87,9 +87,9 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
*/
-static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
+ if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
@@ -109,23 +109,23 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
* kvm_handle_guest_debug - handle a debug exception instruction
*
* @vcpu: the vcpu pointer
- * @run: access to the kvm_run structure for results
*
* We route all debug exceptions through the same handler. If both the
* guest and host are using the same debug facilities it will be up to
* userspace to re-inject the correct exception for guest delivery.
*
- * @return: 0 (while setting run->exit_reason), -1 for error
+ * @return: 0 (while setting vcpu->run->exit_reason), -1 for error
*/
-static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
{
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ struct kvm_run *run = vcpu->run;
+ u32 esr = kvm_vcpu_get_esr(vcpu);
int ret = 0;
run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.hsr = hsr;
+ run->debug.arch.hsr = esr;
- switch (ESR_ELx_EC(hsr)) {
+ switch (ESR_ELx_EC(esr)) {
case ESR_ELx_EC_WATCHPT_LOW:
run->debug.arch.far = vcpu->arch.fault.far_el2;
/* fall through */
@@ -135,8 +135,8 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
case ESR_ELx_EC_BRK64:
break;
default:
- kvm_err("%s: un-handled case hsr: %#08x\n",
- __func__, (unsigned int) hsr);
+ kvm_err("%s: un-handled case esr: %#08x\n",
+ __func__, (unsigned int) esr);
ret = -1;
break;
}
@@ -144,18 +144,18 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
-static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
{
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
- kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
- hsr, esr_get_class_string(hsr));
+ kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n",
+ esr, esr_get_class_string(esr));
kvm_inject_undefined(vcpu);
return 1;
}
-static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int handle_sve(struct kvm_vcpu *vcpu)
{
/* Until SVE is supported for guests: */
kvm_inject_undefined(vcpu);
@@ -167,7 +167,7 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
* a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
* that we can do is give the guest an UNDEF.
*/
-static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
@@ -200,10 +200,10 @@ static exit_handle_fn arm_exit_handlers[] = {
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
- u8 hsr_ec = ESR_ELx_EC(hsr);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
+ u8 esr_ec = ESR_ELx_EC(esr);
- return arm_exit_handlers[hsr_ec];
+ return arm_exit_handlers[esr_ec];
}
/*
@@ -212,7 +212,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
* KVM_EXIT_DEBUG, otherwise userspace needs to complete its
* emulation first.
*/
-static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
{
int handled;
@@ -227,7 +227,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
exit_handle_fn exit_handler;
exit_handler = kvm_get_exit_handler(vcpu);
- handled = exit_handler(vcpu, run);
+ handled = exit_handler(vcpu);
}
return handled;
@@ -237,19 +237,20 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
* proper exit to userspace.
*/
-int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index)
+int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
{
+ struct kvm_run *run = vcpu->run;
+
if (ARM_SERROR_PENDING(exception_index)) {
- u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
+ u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
/*
* HVC/SMC already have an adjusted PC, which we need
* to correct in order to return to after having
* injected the SError.
*/
- if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 ||
- hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) {
+ if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 ||
+ esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) {
u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2;
*vcpu_pc(vcpu) -= adj;
}
@@ -265,7 +266,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
case ARM_EXCEPTION_EL1_SERROR:
return 1;
case ARM_EXCEPTION_TRAP:
- return handle_trap_exceptions(vcpu, run);
+ return handle_trap_exceptions(vcpu);
case ARM_EXCEPTION_HYP_GONE:
/*
* EL2 has been reset to the hyp-stub. This happens when a guest
@@ -289,8 +290,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
/* For exit types that need handling before we can be preempted */
-void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index)
+void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
{
if (ARM_SERROR_PENDING(exception_index)) {
if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
@@ -307,5 +307,5 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
exception_index = ARM_EXCEPTION_CODE(exception_index);
if (exception_index == ARM_EXCEPTION_EL1_SERROR)
- kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu));
+ kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
}
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 8c9880783839..f54f0e89a71c 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,18 +3,12 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
- $(DISABLE_STACKLEAK_PLUGIN)
-
-obj-$(CONFIG_KVM) += hyp.o
-
-hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \
- debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o
-
-# KVM code is run at a different exception code with a different map, so
-# compiler instrumentation that inserts callbacks or checks into the code may
-# cause crashes. Just disable it.
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
+incdir := $(srctree)/$(src)/include
+subdir-asflags-y := -I$(incdir)
+subdir-ccflags-y := -I$(incdir) \
+ -fno-stack-protector \
+ -DDISABLE_BRANCH_PROFILING \
+ $(DISABLE_STACKLEAK_PLUGIN)
+
+obj-$(CONFIG_KVM) += vhe/ nvhe/
+obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index 25c0e47d57cb..ae56d8a4b382 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -44,14 +44,14 @@ static const unsigned short cc_map[16] = {
/*
* Check if a trapped instruction should have been executed or not.
*/
-bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
{
unsigned long cpsr;
u32 cpsr_cond;
int cond;
/* Top two bits non-zero? Unconditional. */
- if (kvm_vcpu_get_hsr(vcpu) >> 30)
+ if (kvm_vcpu_get_esr(vcpu) >> 30)
return true;
/* Is condition field valid? */
@@ -93,7 +93,7 @@ bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu)
*
* IT[7:0] -> CPSR[26:25],CPSR[15:10]
*/
-static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
{
unsigned long itbits, cond;
unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -123,7 +123,7 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu)
* kvm_skip_instr - skip a trapped instruction and proceed to the next
* @vcpu: The vcpu pointer
*/
-void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
{
u32 pc = *vcpu_pc(vcpu);
bool is_thumb;
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 90186cf6473e..ee32a7743389 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -16,12 +16,10 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_ptrauth.h>
-#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
-#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x)
#define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8)
.text
- .pushsection .hyp.text, "ax"
/*
* We treat x18 as callee-saved as the host may use it as a platform
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 5b8ff517ff10..01f114aa47b0 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -9,7 +9,6 @@
#include <asm/fpsimdmacros.h>
.text
- .pushsection .hyp.text, "ax"
SYM_FUNC_START(__fpsimd_save_state)
fpsimd_save x0, 1
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9c5cfb04170e..689fccbc9de7 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -16,7 +16,6 @@
#include <asm/mmu.h>
.text
- .pushsection .hyp.text, "ax"
.macro do_el2_call
/*
@@ -40,6 +39,7 @@ el1_sync: // Guest trapped into EL2
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
+#ifdef __KVM_NVHE_HYPERVISOR__
mrs x1, vttbr_el2 // If vttbr is valid, the guest
cbnz x1, el1_hvc_guest // called HVC
@@ -74,6 +74,7 @@ el1_sync: // Guest trapped into EL2
eret
sb
+#endif /* __KVM_NVHE_HYPERVISOR__ */
el1_hvc_guest:
/*
@@ -180,6 +181,7 @@ el2_error:
eret
sb
+#ifdef __KVM_NVHE_HYPERVISOR__
SYM_FUNC_START(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
@@ -189,6 +191,7 @@ SYM_FUNC_START(__hyp_do_panic)
eret
sb
SYM_FUNC_END(__hyp_do_panic)
+#endif
SYM_CODE_START(__hyp_panic)
get_host_ctxt x0, x1
@@ -318,20 +321,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs)
1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ
.org 1b
SYM_CODE_END(__bp_harden_hyp_vecs)
-
- .popsection
-
-SYM_CODE_START(__smccc_workaround_1_smc)
- esb
- sub sp, sp, #(8 * 4)
- stp x2, x3, [sp, #(8 * 0)]
- stp x0, x1, [sp, #(8 * 2)]
- mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
- smc #0
- ldp x2, x3, [sp, #(8 * 0)]
- ldp x0, x1, [sp, #(8 * 2)]
- add sp, sp, #(8 * 4)
-1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
- .org 1b
-SYM_CODE_END(__smccc_workaround_1_smc)
#endif
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index e95af204fec7..0297dc63988c 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -4,6 +4,9 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__
+#define __ARM64_KVM_HYP_DEBUG_SR_H__
+
#include <linux/compiler.h>
#include <linux/kvm_host.h>
@@ -85,53 +88,8 @@
default: write_debug(ptr[0], reg, 0); \
}
-static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
-{
- u64 reg;
-
- /* Clear pmscr in case of early return */
- *pmscr_el1 = 0;
-
- /* SPE present on this CPU? */
- if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
- ID_AA64DFR0_PMSVER_SHIFT))
- return;
-
- /* Yes; is it owned by EL3? */
- reg = read_sysreg_s(SYS_PMBIDR_EL1);
- if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
- return;
-
- /* No; is the host actually using the thing? */
- reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
- if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
- return;
-
- /* Yes; save the control register and disable data generation */
- *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
- write_sysreg_s(0, SYS_PMSCR_EL1);
- isb();
-
- /* Now drain all buffered data to memory */
- psb_csync();
- dsb(nsh);
-}
-
-static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
-{
- if (!pmscr_el1)
- return;
-
- /* The host page table is installed, but not yet synchronised */
- isb();
-
- /* Re-enable data generation */
- write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
-}
-
-static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
@@ -145,12 +103,11 @@ static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
save_debug(dbg->dbg_wcr, dbgwcr, wrps);
save_debug(dbg->dbg_wvr, dbgwvr, wrps);
- ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
+ ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1);
}
-static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __debug_restore_state(struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
@@ -165,23 +122,16 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
- write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
+ write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1);
}
-void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- /*
- * Non-VHE: Disable and flush SPE data generation
- * VHE: The vcpu can run, but it can't hide.
- */
- if (!has_vhe())
- __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
-
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
@@ -190,20 +140,17 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
- __debug_save_state(vcpu, host_dbg, host_ctxt);
- __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
+ __debug_save_state(host_dbg, host_ctxt);
+ __debug_restore_state(guest_dbg, guest_ctxt);
}
-void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
+static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!has_vhe())
- __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
-
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
@@ -212,13 +159,10 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
- __debug_save_state(vcpu, guest_dbg, guest_ctxt);
- __debug_restore_state(vcpu, host_dbg, host_ctxt);
+ __debug_save_state(guest_dbg, guest_ctxt);
+ __debug_restore_state(host_dbg, host_ctxt);
vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
-u32 __hyp_text __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
+#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
new file mode 100644
index 000000000000..426ef65601dd
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_SWITCH_H__
+#define __ARM64_KVM_HYP_SWITCH_H__
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+extern const char __hyp_panic_string[];
+
+/* Check whether the FP regs were dirtied while in the host-side run loop: */
+static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When the system doesn't support FP/SIMD, we cannot rely on
+ * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
+ * abort on the very first access to FP and thus we should never
+ * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
+ * trap the accesses.
+ */
+ if (!system_supports_fpsimd() ||
+ vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_FP_HOST);
+
+ return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
+}
+
+/* Save the 32-bit only FPSIMD system register state */
+static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2);
+}
+
+static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We are about to set CPTR_EL2.TFP to trap all floating point
+ * register accesses to EL2, however, the ARM ARM clearly states that
+ * traps are only taken to EL2 if the operation would not otherwise
+ * trap to EL1. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+ * it will cause an exception.
+ */
+ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
+ write_sysreg(1 << 30, fpexc32_el2);
+ isb();
+ }
+}
+
+static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+ /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
+ write_sysreg(1 << 15, hstr_el2);
+
+ /*
+ * Make sure we trap PMU access from EL0 to EL2. Also sanitize
+ * PMSELR_EL0 to make sure it never contains the cycle
+ * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
+ * EL1 instead of being trapped to EL2.
+ */
+ write_sysreg(0, pmselr_el0);
+ write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static inline void __deactivate_traps_common(void)
+{
+ write_sysreg(0, hstr_el2);
+ write_sysreg(0, pmuserenr_el0);
+}
+
+static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+ hcr |= HCR_TVM;
+
+ write_sysreg(hcr, hcr_el2);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+}
+
+static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If we pended a virtual abort, preserve it until it gets
+ * cleared. See D1.14.3 (Virtual Interrupts) for details, but
+ * the crucial bit is "On taking a vSError interrupt,
+ * HCR_EL2.VSE is cleared to 0."
+ */
+ if (vcpu->arch.hcr_el2 & HCR_VSE) {
+ vcpu->arch.hcr_el2 &= ~HCR_VSE;
+ vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ }
+}
+
+static inline void __activate_vm(struct kvm_s2_mmu *mmu)
+{
+ __load_guest_stage2(mmu);
+}
+
+static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg(par_el1);
+ asm volatile("at s1e1r, %0" : : "r" (far));
+ isb();
+
+ tmp = read_sysreg(par_el1);
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & SYS_PAR_EL1_F))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = PAR_TO_HPFAR(tmp);
+ return true;
+}
+
+static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u8 ec;
+ u64 esr;
+ u64 hpfar, far;
+
+ esr = vcpu->arch.fault.esr_el2;
+ ec = ESR_ELx_EC(esr);
+
+ if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+ return true;
+
+ far = read_sysreg_el2(SYS_FAR);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
+ (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ vcpu->arch.fault.far_el2 = far;
+ vcpu->arch.fault.hpfar_el2 = hpfar;
+ return true;
+}
+
+/* Check for an FPSIMD/SVE trap and handle as appropriate */
+static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
+{
+ bool vhe, sve_guest, sve_host;
+ u8 esr_ec;
+
+ if (!system_supports_fpsimd())
+ return false;
+
+ /*
+ * Currently system_supports_sve() currently implies has_vhe(),
+ * so the check is redundant. However, has_vhe() can be determined
+ * statically and helps the compiler remove dead code.
+ */
+ if (has_vhe() && system_supports_sve()) {
+ sve_guest = vcpu_has_sve(vcpu);
+ sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
+ vhe = true;
+ } else {
+ sve_guest = false;
+ sve_host = false;
+ vhe = has_vhe();
+ }
+
+ esr_ec = kvm_vcpu_trap_get_class(vcpu);
+ if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
+ esr_ec != ESR_ELx_EC_SVE)
+ return false;
+
+ /* Don't handle SVE traps for non-SVE vcpus here: */
+ if (!sve_guest)
+ if (esr_ec != ESR_ELx_EC_FP_ASIMD)
+ return false;
+
+ /* Valid trap. Switch the context: */
+
+ if (vhe) {
+ u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
+
+ if (sve_guest)
+ reg |= CPACR_EL1_ZEN;
+
+ write_sysreg(reg, cpacr_el1);
+ } else {
+ write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+ cptr_el2);
+ }
+
+ isb();
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+ /*
+ * In the SVE case, VHE is assumed: it is enforced by
+ * Kconfig and kvm_arch_init().
+ */
+ if (sve_host) {
+ struct thread_struct *thread = container_of(
+ vcpu->arch.host_fpsimd_state,
+ struct thread_struct, uw.fpsimd_state);
+
+ sve_save_state(sve_pffr(thread),
+ &vcpu->arch.host_fpsimd_state->fpsr);
+ } else {
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ }
+
+ vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+ }
+
+ if (sve_guest) {
+ sve_load_state(vcpu_sve_pffr(vcpu),
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
+ write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
+ } else {
+ __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
+ }
+
+ /* Skip restoring fpexc32 for AArch64 guests */
+ if (!(read_sysreg(hcr_el2) & HCR_RW))
+ write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
+
+ vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+ return true;
+}
+
+static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ int rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ /*
+ * The normal sysreg handling code expects to see the traps,
+ * let's not do anything here.
+ */
+ if (vcpu->arch.hcr_el2 & HCR_TVM)
+ return false;
+
+ switch (sysreg) {
+ case SYS_SCTLR_EL1:
+ write_sysreg_el1(val, SYS_SCTLR);
+ break;
+ case SYS_TTBR0_EL1:
+ write_sysreg_el1(val, SYS_TTBR0);
+ break;
+ case SYS_TTBR1_EL1:
+ write_sysreg_el1(val, SYS_TTBR1);
+ break;
+ case SYS_TCR_EL1:
+ write_sysreg_el1(val, SYS_TCR);
+ break;
+ case SYS_ESR_EL1:
+ write_sysreg_el1(val, SYS_ESR);
+ break;
+ case SYS_FAR_EL1:
+ write_sysreg_el1(val, SYS_FAR);
+ break;
+ case SYS_AFSR0_EL1:
+ write_sysreg_el1(val, SYS_AFSR0);
+ break;
+ case SYS_AFSR1_EL1:
+ write_sysreg_el1(val, SYS_AFSR1);
+ break;
+ case SYS_MAIR_EL1:
+ write_sysreg_el1(val, SYS_MAIR);
+ break;
+ case SYS_AMAIR_EL1:
+ write_sysreg_el1(val, SYS_AMAIR);
+ break;
+ case SYS_CONTEXTIDR_EL1:
+ write_sysreg_el1(val, SYS_CONTEXTIDR);
+ break;
+ default:
+ return false;
+ }
+
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
+static inline bool esr_is_ptrauth_trap(u32 esr)
+{
+ u32 ec = ESR_ELx_EC(esr);
+
+ if (ec == ESR_ELx_EC_PAC)
+ return true;
+
+ if (ec != ESR_ELx_EC_SYS64)
+ return false;
+
+ switch (esr_sys64_to_sysreg(esr)) {
+ case SYS_APIAKEYLO_EL1:
+ case SYS_APIAKEYHI_EL1:
+ case SYS_APIBKEYLO_EL1:
+ case SYS_APIBKEYHI_EL1:
+ case SYS_APDAKEYLO_EL1:
+ case SYS_APDAKEYHI_EL1:
+ case SYS_APDBKEYLO_EL1:
+ case SYS_APDBKEYHI_EL1:
+ case SYS_APGAKEYLO_EL1:
+ case SYS_APGAKEYHI_EL1:
+ return true;
+ }
+
+ return false;
+}
+
+#define __ptrauth_save_key(ctxt, key) \
+ do { \
+ u64 __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
+} while(0)
+
+static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *ctxt;
+ u64 val;
+
+ if (!vcpu_has_ptrauth(vcpu) ||
+ !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ return false;
+
+ ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ __ptrauth_save_key(ctxt, APIA);
+ __ptrauth_save_key(ctxt, APIB);
+ __ptrauth_save_key(ctxt, APDA);
+ __ptrauth_save_key(ctxt, APDB);
+ __ptrauth_save_key(ctxt, APGA);
+
+ vcpu_ptrauth_enable(vcpu);
+
+ val = read_sysreg(hcr_el2);
+ val |= (HCR_API | HCR_APK);
+ write_sysreg(val, hcr_el2);
+
+ return true;
+}
+
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
+
+ /*
+ * We're using the raw exception code in order to only process
+ * the trap if no SError is pending. We will come back to the
+ * same PC once the SError has been injected, and replay the
+ * trapping instruction.
+ */
+ if (*exit_code != ARM_EXCEPTION_TRAP)
+ goto exit;
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ * Similarly for trapped SVE accesses.
+ */
+ if (__hyp_handle_fpsimd(vcpu))
+ return true;
+
+ if (__hyp_handle_ptrauth(vcpu))
+ return true;
+
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+ bool valid;
+
+ valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
+ kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ kvm_vcpu_dabt_isvalid(vcpu) &&
+ !kvm_vcpu_abt_issea(vcpu) &&
+ !kvm_vcpu_dabt_iss1tw(vcpu);
+
+ if (valid) {
+ int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+
+ /* Promote an illegal access to an SError.*/
+ if (ret == -1)
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
+
+ goto exit;
+ }
+ }
+
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
+ int ret = __vgic_v3_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+ }
+
+exit:
+ /* Return to the host kernel and handle the exit */
+ return false;
+}
+
+static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+ if (!cpus_have_final_cap(ARM64_SSBD))
+ return false;
+
+ return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * The host runs with the workaround always present. If the
+ * guest wants it disabled, so be it...
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * If the guest has disabled the workaround, bring it back on.
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
+#endif /* __ARM64_KVM_HYP_SWITCH_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
new file mode 100644
index 000000000000..7a986030145f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__
+#define __ARM64_KVM_HYP_SYSREG_SR_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+}
+
+static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0);
+ ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
+}
+
+static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
+ ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
+ ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR);
+ ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
+ ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1);
+ ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR);
+ ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
+ ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
+ ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1);
+ ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR);
+ ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR);
+ ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR);
+ ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
+ ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR);
+ ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
+ ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1);
+ ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+
+ ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
+ ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
+ ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
+}
+
+static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
+ ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
+}
+
+static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+}
+
+static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
+}
+
+static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
+ write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1);
+
+ if (has_vhe() ||
+ !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ } else if (!ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for guest registers, hence the context
+ * test. We're coming from the host, so SCTLR.M is already
+ * set. Pairs with nVHE's __activate_traps().
+ */
+ write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) |
+ TCR_EPD1_MASK | TCR_EPD0_MASK),
+ SYS_TCR);
+ isb();
+ }
+
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
+ write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+
+ if (!has_vhe() &&
+ cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
+ ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for host registers, hence the context
+ * test. Pairs with nVHE's __deactivate_traps().
+ */
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * deconfigured and disabled. We can now restore the host's
+ * S1 configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ }
+
+ write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
+}
+
+static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ u64 pstate = ctxt->regs.pstate;
+ u64 mode = pstate & PSR_AA32_MODE_MASK;
+
+ /*
+ * Safety check to ensure we're setting the CPU up to enter the guest
+ * in a less privileged mode.
+ *
+ * If we are attempting a return to EL2 or higher in AArch64 state,
+ * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
+ * we'll take an illegal exception state exception immediately after
+ * the ERET to the guest. Attempts to return to AArch32 Hyp will
+ * result in an illegal exception return because EL2's execution state
+ * is determined by SCR_EL3.RW.
+ */
+ if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
+ pstate = PSR_MODE_EL2h | PSR_IL_BIT;
+
+ write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
+ write_sysreg_el2(pstate, SYS_SPSR);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
+}
+
+static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt);
+ vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und);
+ vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq);
+ vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq);
+
+ __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
+ __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
+}
+
+static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt);
+ write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und);
+ write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq);
+ write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq);
+
+ write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
+ write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
+}
+
+#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
new file mode 100644
index 000000000000..aef76487edc2
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
+#
+
+asflags-y := -D__KVM_NVHE_HYPERVISOR__
+ccflags-y := -D__KVM_NVHE_HYPERVISOR__
+
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o
+obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
+ ../fpsimd.o ../hyp-entry.o
+
+obj-y := $(patsubst %.o,%.hyp.o,$(obj-y))
+extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y))
+
+$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE
+ $(call if_changed_rule,cc_o_c)
+$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE
+ $(call if_changed_rule,as_o_S)
+$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE
+ $(call if_changed,hypcopy)
+
+# Disable reordering functions by GCC (enabled at -O2).
+# This pass puts functions into '.text.*' sections to aid the linker
+# in optimizing ELF layout. See HYPCOPY comment below for more info.
+ccflags-y += $(call cc-option,-fno-reorder-functions)
+
+# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
+# and relevant ELF section names to avoid clashes with VHE code/data.
+#
+# Hyp code is assumed to be in the '.text' section of the input object
+# files (with the exception of specialized sections such as
+# '.hyp.idmap.text'). This assumption may be broken by a compiler that
+# divides code into sections like '.text.unlikely' so as to optimize
+# ELF layout. HYPCOPY checks that no such sections exist in the input
+# using `objdump`, otherwise they would be linked together with other
+# kernel code and not memory-mapped correctly at runtime.
+quiet_cmd_hypcopy = HYPCOPY $@
+ cmd_hypcopy = \
+ if $(OBJDUMP) -h $< | grep -F '.text.'; then \
+ echo "$@: function reordering not supported in nVHE hyp code" >&2; \
+ /bin/false; \
+ fi; \
+ $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \
+ --rename-section=.text=.hyp.text \
+ $< $@
+
+# Remove ftrace and Shadow Call Stack CFLAGS.
+# This is equivalent to the 'notrace' and '__noscs' annotations.
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
+# KVM nVHE code is run at a different exception code with a different map, so
+# compiler instrumentation that inserts callbacks or checks into the code may
+# cause crashes. Just disable it.
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+# Skip objtool checking for this directory because nVHE code is compiled with
+# non-standard build rules.
+OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
new file mode 100644
index 000000000000..91a711aa8382
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/debug-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static void __debug_save_spe(u64 *pmscr_el1)
+{
+ u64 reg;
+
+ /* Clear pmscr in case of early return */
+ *pmscr_el1 = 0;
+
+ /* SPE present on this CPU? */
+ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+ ID_AA64DFR0_PMSVER_SHIFT))
+ return;
+
+ /* Yes; is it owned by EL3? */
+ reg = read_sysreg_s(SYS_PMBIDR_EL1);
+ if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
+ return;
+
+ /* No; is the host actually using the thing? */
+ reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
+ if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
+ return;
+
+ /* Yes; save the control register and disable data generation */
+ *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
+ write_sysreg_s(0, SYS_PMSCR_EL1);
+ isb();
+
+ /* Now drain all buffered data to memory */
+ psb_csync();
+ dsb(nsh);
+}
+
+static void __debug_restore_spe(u64 pmscr_el1)
+{
+ if (!pmscr_el1)
+ return;
+
+ /* The host page table is installed, but not yet synchronised */
+ isb();
+
+ /* Re-enable data generation */
+ write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ /* Disable and flush SPE data generation */
+ __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_switch_to_guest_common(vcpu);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_switch_to_host_common(vcpu);
+}
+
+u32 __kvm_get_mdcr_el2(void)
+{
+ return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 86971fe26f3d..d9434e90c06d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -105,6 +105,11 @@ alternative_else_nop_endif
*/
mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
+ orr x4, x4, x5
+alternative_else_nop_endif
msr sctlr_el2, x4
isb
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
new file mode 100644
index 000000000000..341be2f2f312
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/switch.h>
+#include <hyp/sysreg-sr.h>
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ ___activate_traps(vcpu);
+ __activate_traps_common(vcpu);
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
+ if (!update_fp_enabled(vcpu)) {
+ val |= CPTR_EL2_TFP;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cptr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * configured and enabled. We can now restore the guest's S1
+ * configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ }
+}
+
+static void __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 mdcr_el2;
+
+ ___deactivate_traps(vcpu);
+
+ mdcr_el2 = read_sysreg(mdcr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ u64 val;
+
+ /*
+ * Set the TCR and SCTLR registers in the exact opposite
+ * sequence as __activate_traps (first prevent walks,
+ * then force the MMU on). A generous sprinkling of isb()
+ * ensure that things happen in this exact order.
+ */
+ val = read_sysreg_el1(SYS_TCR);
+ write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+ isb();
+ val = read_sysreg_el1(SYS_SCTLR);
+ write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+ isb();
+ }
+
+ __deactivate_traps_common();
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK;
+ mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+ write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+ write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+}
+
+static void __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+ write_sysreg(0, vttbr_el2);
+}
+
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
+ __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
+ }
+}
+
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
+ __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
+ }
+}
+
+/**
+ * Disable host events, enable guest events
+ */
+static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenclr_el0);
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenset_el0);
+
+ return (pmu->events_host || pmu->events_guest);
+}
+
+/**
+ * Disable guest events, enable host events
+ */
+static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenclr_el0);
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenset_el0);
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool pmu_switch_needed;
+ u64 exit_code;
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ */
+ if (system_uses_irq_prio_masking()) {
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+ pmr_sync();
+ }
+
+ vcpu = kern_hyp_va(vcpu);
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+
+ __sysreg_save_state_nvhe(host_ctxt);
+
+ /*
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ *
+ * Also, and in order to be able to deal with erratum #1319537 (A57)
+ * and #1319367 (A72), we must ensure that all VM-related sysreg are
+ * restored before we enable S2 translation.
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_state_nvhe(guest_ctxt);
+
+ __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu));
+ __activate_traps(vcpu);
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ __sysreg_save_state_nvhe(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+ __timer_disable_traps(vcpu);
+ __hyp_vgic_save_state(vcpu);
+
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+
+ __sysreg_restore_state_nvhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ /*
+ * This must come after restoring the host sysregs, since a non-VHE
+ * system may enable SPE here and make use of the TTBRs.
+ */
+ __debug_switch_to_host(vcpu);
+
+ if (pmu_switch_needed)
+ __pmu_switch_to_host(host_ctxt);
+
+ /* Returning to host will clear PSR.I, remask PMR if needed */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQOFF);
+
+ return exit_code;
+}
+
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+ u64 spsr = read_sysreg_el2(SYS_SPSR);
+ u64 elr = read_sysreg_el2(SYS_ELR);
+ u64 par = read_sysreg(par_el1);
+ struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu;
+ unsigned long str_va;
+
+ if (read_sysreg(vttbr_el2)) {
+ __timer_disable_traps(vcpu);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state_nvhe(host_ctxt);
+ }
+
+ /*
+ * Force the panic string to be loaded from the literal pool,
+ * making sure it is a kernel address and not a PC-relative
+ * reference.
+ */
+ asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
+
+ __hyp_do_panic(str_va,
+ spsr, elr,
+ read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
+ read_sysreg(hpfar_el2), par, vcpu);
+ unreachable();
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
new file mode 100644
index 000000000000..88a25fc8fcd3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/sysreg-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * Non-VHE: Both host and guest must save everything.
+ */
+
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_el1_state(ctxt);
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_user_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_user_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+
+void __kvm_enable_ssbs(void)
+{
+ u64 tmp;
+
+ asm volatile(
+ "mrs %0, sctlr_el2\n"
+ "orr %0, %0, %1\n"
+ "msr sctlr_el2, %0"
+ : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
+}
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
index fb5c0be33223..9072e71693ba 100644
--- a/arch/arm64/kvm/hyp/timer-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -10,7 +10,7 @@
#include <asm/kvm_hyp.h>
-void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff)
+void __kvm_timer_set_cntvoff(u64 cntvoff)
{
write_sysreg(cntvoff, cntvoff_el2);
}
@@ -19,7 +19,7 @@ void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff)
* Should only be called on non-VHE systems.
* VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
*/
-void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
+void __timer_disable_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@@ -33,7 +33,7 @@ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
* Should only be called on non-VHE systems.
* VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
*/
-void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
+void __timer_enable_traps(struct kvm_vcpu *vcpu)
{
u64 val;
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
new file mode 100644
index 000000000000..69eae608d670
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/tlbflush.h>
+
+struct tlb_inv_context {
+ u64 tcr;
+};
+
+static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
+{
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ u64 val;
+
+ /*
+ * For CPUs that are affected by ARM 1319367, we need to
+ * avoid a host Stage-1 walk while we have the guest's
+ * VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the S1 MMU is enabled, so we can
+ * simply set the EPD bits to avoid any further TLB fill.
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ isb();
+ }
+
+ __load_guest_stage2(mmu);
+}
+
+static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+{
+ write_sysreg(0, vttbr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /* Ensure write of the host VMID */
+ isb();
+ /* Restore the host's TCR_EL1 */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ }
+}
+
+void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa, int level)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ ipa >>= 12;
+ __tlbi_level(ipas2e1is, ipa, level);
+
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ /*
+ * If the host is running at EL1 and we have a VPIPT I-cache,
+ * then we must perform I-cache maintenance at EL2 in order for
+ * it to have an effect on the guest. Since the guest cannot hit
+ * I-cache lines allocated with a different VMID, we don't need
+ * to worry about junk out of guest reset (we nuke the I-cache on
+ * VMID rollover), but we do need to be careful when remapping
+ * executable pages for the same guest. This can happen when KSM
+ * takes a CoW fault on an executable page, copies the page into
+ * a page that was previously mapped in the guest and then needs
+ * to invalidate the guest view of the I-cache for that page
+ * from EL1. To solve this, we invalidate the entire I-cache when
+ * unmapping a page from a guest if we have a VPIPT I-cache but
+ * the host is running at EL1. As above, we could do better if
+ * we had the VA.
+ *
+ * The moral of this story is: if you have a VPIPT I-cache, then
+ * you should be running with VHE enabled.
+ */
+ if (icache_is_vpipt())
+ __flush_icache_all();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalls12e1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_flush_vm_context(void)
+{
+ dsb(ishst);
+ __tlbi(alle1is);
+
+ /*
+ * VIPT and PIPT caches are not affected by VMID, so no maintenance
+ * is necessary across a VMID rollover.
+ *
+ * VPIPT caches constrain lookup and maintenance to the active VMID,
+ * so we need to invalidate lines with a stale VMID to avoid an ABA
+ * race after multiple rollovers.
+ *
+ */
+ if (icache_is_vpipt())
+ asm volatile("ic ialluis");
+
+ dsb(ish);
+}
diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S
new file mode 100644
index 000000000000..b0441dbdf68b
--- /dev/null
+++ b/arch/arm64/kvm/hyp/smccc_wa.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2018 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/linkage.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+ /*
+ * This is not executed directly and is instead copied into the vectors
+ * by install_bp_hardening_cb().
+ */
+ .data
+ .pushsection .rodata
+ .global __smccc_workaround_1_smc
+SYM_DATA_START(__smccc_workaround_1_smc)
+ esb
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
+ .org 1b
+SYM_DATA_END(__smccc_workaround_1_smc)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
deleted file mode 100644
index db1c4487d95d..000000000000
--- a/arch/arm64/kvm/hyp/switch.c
+++ /dev/null
@@ -1,936 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-#include <linux/jump_label.h>
-#include <uapi/linux/psci.h>
-
-#include <kvm/arm_psci.h>
-
-#include <asm/barrier.h>
-#include <asm/cpufeature.h>
-#include <asm/kprobes.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-#include <asm/fpsimd.h>
-#include <asm/debug-monitors.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-
-/* Check whether the FP regs were dirtied while in the host-side run loop: */
-static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
-{
- /*
- * When the system doesn't support FP/SIMD, we cannot rely on
- * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
- * abort on the very first access to FP and thus we should never
- * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
- * trap the accesses.
- */
- if (!system_supports_fpsimd() ||
- vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
- KVM_ARM64_FP_HOST);
-
- return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
-}
-
-/* Save the 32-bit only FPSIMD system register state */
-static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
-{
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-}
-
-static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
-{
- /*
- * We are about to set CPTR_EL2.TFP to trap all floating point
- * register accesses to EL2, however, the ARM ARM clearly states that
- * traps are only taken to EL2 if the operation would not otherwise
- * trap to EL1. Therefore, always make sure that for 32-bit guests,
- * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
- * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
- * it will cause an exception.
- */
- if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
- write_sysreg(1 << 30, fpexc32_el2);
- isb();
- }
-}
-
-static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
-{
- /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
- write_sysreg(1 << 15, hstr_el2);
-
- /*
- * Make sure we trap PMU access from EL0 to EL2. Also sanitize
- * PMSELR_EL0 to make sure it never contains the cycle
- * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
- * EL1 instead of being trapped to EL2.
- */
- write_sysreg(0, pmselr_el0);
- write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-}
-
-static void __hyp_text __deactivate_traps_common(void)
-{
- write_sysreg(0, hstr_el2);
- write_sysreg(0, pmuserenr_el0);
-}
-
-static void activate_traps_vhe(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- val = read_sysreg(cpacr_el1);
- val |= CPACR_EL1_TTA;
- val &= ~CPACR_EL1_ZEN;
-
- /*
- * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
- * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
- * except for some missing controls, such as TAM.
- * In this case, CPTR_EL2.TAM has the same position with or without
- * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
- * shift value for trapping the AMU accesses.
- */
-
- val |= CPTR_EL2_TAM;
-
- if (update_fp_enabled(vcpu)) {
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
- } else {
- val &= ~CPACR_EL1_FPEN;
- __activate_traps_fpsimd32(vcpu);
- }
-
- write_sysreg(val, cpacr_el1);
-
- write_sysreg(kvm_get_hyp_vector(), vbar_el1);
-}
-NOKPROBE_SYMBOL(activate_traps_vhe);
-
-static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- __activate_traps_common(vcpu);
-
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
- if (!update_fp_enabled(vcpu)) {
- val |= CPTR_EL2_TFP;
- __activate_traps_fpsimd32(vcpu);
- }
-
- write_sysreg(val, cptr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
-
- isb();
- /*
- * At this stage, and thanks to the above isb(), S2 is
- * configured and enabled. We can now restore the guest's S1
- * configuration: SCTLR, and only then TCR.
- */
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- isb();
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- }
-}
-
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
-{
- u64 hcr = vcpu->arch.hcr_el2;
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
- hcr |= HCR_TVM;
-
- write_sysreg(hcr, hcr_el2);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- if (has_vhe())
- activate_traps_vhe(vcpu);
- else
- __activate_traps_nvhe(vcpu);
-}
-
-static void deactivate_traps_vhe(void)
-{
- extern char vectors[]; /* kernel exception vectors */
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
-
- /*
- * ARM errata 1165522 and 1530923 require the actual execution of the
- * above before we can switch to the EL2/EL0 translation regime used by
- * the host.
- */
- asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
-
- write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
- write_sysreg(vectors, vbar_el1);
-}
-NOKPROBE_SYMBOL(deactivate_traps_vhe);
-
-static void __hyp_text __deactivate_traps_nvhe(void)
-{
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- u64 val;
-
- /*
- * Set the TCR and SCTLR registers in the exact opposite
- * sequence as __activate_traps_nvhe (first prevent walks,
- * then force the MMU on). A generous sprinkling of isb()
- * ensure that things happen in this exact order.
- */
- val = read_sysreg_el1(SYS_TCR);
- write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
- isb();
- val = read_sysreg_el1(SYS_SCTLR);
- write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
- isb();
- }
-
- __deactivate_traps_common();
-
- mdcr_el2 &= MDCR_EL2_HPMN_MASK;
- mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
-
- write_sysreg(mdcr_el2, mdcr_el2);
- write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
- write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
-}
-
-static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
-{
- /*
- * If we pended a virtual abort, preserve it until it gets
- * cleared. See D1.14.3 (Virtual Interrupts) for details, but
- * the crucial bit is "On taking a vSError interrupt,
- * HCR_EL2.VSE is cleared to 0."
- */
- if (vcpu->arch.hcr_el2 & HCR_VSE) {
- vcpu->arch.hcr_el2 &= ~HCR_VSE;
- vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
- }
-
- if (has_vhe())
- deactivate_traps_vhe();
- else
- __deactivate_traps_nvhe();
-}
-
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
-{
- __activate_traps_common(vcpu);
-}
-
-void deactivate_traps_vhe_put(void)
-{
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
- mdcr_el2 &= MDCR_EL2_HPMN_MASK |
- MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
- MDCR_EL2_TPMS;
-
- write_sysreg(mdcr_el2, mdcr_el2);
-
- __deactivate_traps_common();
-}
-
-static void __hyp_text __activate_vm(struct kvm *kvm)
-{
- __load_guest_stage2(kvm);
-}
-
-static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
-{
- write_sysreg(0, vttbr_el2);
-}
-
-/* Save VGICv3 state on non-VHE systems */
-static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
- __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
- }
-}
-
-/* Restore VGICv3 state on non_VEH systems */
-static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
- __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
- }
-}
-
-static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
-{
- u64 par, tmp;
-
- /*
- * Resolve the IPA the hard way using the guest VA.
- *
- * Stage-1 translation already validated the memory access
- * rights. As such, we can use the EL1 translation regime, and
- * don't have to distinguish between EL0 and EL1 access.
- *
- * We do need to save/restore PAR_EL1 though, as we haven't
- * saved the guest context yet, and we may return early...
- */
- par = read_sysreg(par_el1);
- asm volatile("at s1e1r, %0" : : "r" (far));
- isb();
-
- tmp = read_sysreg(par_el1);
- write_sysreg(par, par_el1);
-
- if (unlikely(tmp & SYS_PAR_EL1_F))
- return false; /* Translation failed, back to guest */
-
- /* Convert PAR to HPFAR format */
- *hpfar = PAR_TO_HPFAR(tmp);
- return true;
-}
-
-static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
-{
- u8 ec;
- u64 esr;
- u64 hpfar, far;
-
- esr = vcpu->arch.fault.esr_el2;
- ec = ESR_ELx_EC(esr);
-
- if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
- return true;
-
- far = read_sysreg_el2(SYS_FAR);
-
- /*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
- *
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
- */
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
- hpfar = read_sysreg(hpfar_el2);
- }
-
- vcpu->arch.fault.far_el2 = far;
- vcpu->arch.fault.hpfar_el2 = hpfar;
- return true;
-}
-
-/* Check for an FPSIMD/SVE trap and handle as appropriate */
-static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
-{
- bool vhe, sve_guest, sve_host;
- u8 hsr_ec;
-
- if (!system_supports_fpsimd())
- return false;
-
- if (system_supports_sve()) {
- sve_guest = vcpu_has_sve(vcpu);
- sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
- vhe = true;
- } else {
- sve_guest = false;
- sve_host = false;
- vhe = has_vhe();
- }
-
- hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
- hsr_ec != ESR_ELx_EC_SVE)
- return false;
-
- /* Don't handle SVE traps for non-SVE vcpus here: */
- if (!sve_guest)
- if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
- return false;
-
- /* Valid trap. Switch the context: */
-
- if (vhe) {
- u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
-
- if (sve_guest)
- reg |= CPACR_EL1_ZEN;
-
- write_sysreg(reg, cpacr_el1);
- } else {
- write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
- cptr_el2);
- }
-
- isb();
-
- if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
- /*
- * In the SVE case, VHE is assumed: it is enforced by
- * Kconfig and kvm_arch_init().
- */
- if (sve_host) {
- struct thread_struct *thread = container_of(
- vcpu->arch.host_fpsimd_state,
- struct thread_struct, uw.fpsimd_state);
-
- sve_save_state(sve_pffr(thread),
- &vcpu->arch.host_fpsimd_state->fpsr);
- } else {
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
- }
-
- vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
- }
-
- if (sve_guest) {
- sve_load_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
- sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
- write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
- } else {
- __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
- }
-
- /* Skip restoring fpexc32 for AArch64 guests */
- if (!(read_sysreg(hcr_el2) & HCR_RW))
- write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
- fpexc32_el2);
-
- vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
-
- return true;
-}
-
-static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
-{
- u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
- int rt = kvm_vcpu_sys_get_rt(vcpu);
- u64 val = vcpu_get_reg(vcpu, rt);
-
- /*
- * The normal sysreg handling code expects to see the traps,
- * let's not do anything here.
- */
- if (vcpu->arch.hcr_el2 & HCR_TVM)
- return false;
-
- switch (sysreg) {
- case SYS_SCTLR_EL1:
- write_sysreg_el1(val, SYS_SCTLR);
- break;
- case SYS_TTBR0_EL1:
- write_sysreg_el1(val, SYS_TTBR0);
- break;
- case SYS_TTBR1_EL1:
- write_sysreg_el1(val, SYS_TTBR1);
- break;
- case SYS_TCR_EL1:
- write_sysreg_el1(val, SYS_TCR);
- break;
- case SYS_ESR_EL1:
- write_sysreg_el1(val, SYS_ESR);
- break;
- case SYS_FAR_EL1:
- write_sysreg_el1(val, SYS_FAR);
- break;
- case SYS_AFSR0_EL1:
- write_sysreg_el1(val, SYS_AFSR0);
- break;
- case SYS_AFSR1_EL1:
- write_sysreg_el1(val, SYS_AFSR1);
- break;
- case SYS_MAIR_EL1:
- write_sysreg_el1(val, SYS_MAIR);
- break;
- case SYS_AMAIR_EL1:
- write_sysreg_el1(val, SYS_AMAIR);
- break;
- case SYS_CONTEXTIDR_EL1:
- write_sysreg_el1(val, SYS_CONTEXTIDR);
- break;
- default:
- return false;
- }
-
- __kvm_skip_instr(vcpu);
- return true;
-}
-
-static bool __hyp_text esr_is_ptrauth_trap(u32 esr)
-{
- u32 ec = ESR_ELx_EC(esr);
-
- if (ec == ESR_ELx_EC_PAC)
- return true;
-
- if (ec != ESR_ELx_EC_SYS64)
- return false;
-
- switch (esr_sys64_to_sysreg(esr)) {
- case SYS_APIAKEYLO_EL1:
- case SYS_APIAKEYHI_EL1:
- case SYS_APIBKEYLO_EL1:
- case SYS_APIBKEYHI_EL1:
- case SYS_APDAKEYLO_EL1:
- case SYS_APDAKEYHI_EL1:
- case SYS_APDBKEYLO_EL1:
- case SYS_APDBKEYHI_EL1:
- case SYS_APGAKEYLO_EL1:
- case SYS_APGAKEYHI_EL1:
- return true;
- }
-
- return false;
-}
-
-#define __ptrauth_save_key(regs, key) \
-({ \
- regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
- regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
-})
-
-static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *ctxt;
- u64 val;
-
- if (!vcpu_has_ptrauth(vcpu) ||
- !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu)))
- return false;
-
- ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- __ptrauth_save_key(ctxt->sys_regs, APIA);
- __ptrauth_save_key(ctxt->sys_regs, APIB);
- __ptrauth_save_key(ctxt->sys_regs, APDA);
- __ptrauth_save_key(ctxt->sys_regs, APDB);
- __ptrauth_save_key(ctxt->sys_regs, APGA);
-
- vcpu_ptrauth_enable(vcpu);
-
- val = read_sysreg(hcr_el2);
- val |= (HCR_API | HCR_APK);
- write_sysreg(val, hcr_el2);
-
- return true;
-}
-
-/*
- * Return true when we were able to fixup the guest exit and should return to
- * the guest, false when we should restore the host state and return to the
- * main run loop.
- */
-static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
- vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
-
- /*
- * We're using the raw exception code in order to only process
- * the trap if no SError is pending. We will come back to the
- * same PC once the SError has been injected, and replay the
- * trapping instruction.
- */
- if (*exit_code != ARM_EXCEPTION_TRAP)
- goto exit;
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
- handle_tx2_tvm(vcpu))
- return true;
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- * Similarly for trapped SVE accesses.
- */
- if (__hyp_handle_fpsimd(vcpu))
- return true;
-
- if (__hyp_handle_ptrauth(vcpu))
- return true;
-
- if (!__populate_fault_info(vcpu))
- return true;
-
- if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
- bool valid;
-
- valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
- kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
- kvm_vcpu_dabt_isvalid(vcpu) &&
- !kvm_vcpu_dabt_isextabt(vcpu) &&
- !kvm_vcpu_dabt_iss1tw(vcpu);
-
- if (valid) {
- int ret = __vgic_v2_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- return true;
-
- /* Promote an illegal access to an SError.*/
- if (ret == -1)
- *exit_code = ARM_EXCEPTION_EL1_SERROR;
-
- goto exit;
- }
- }
-
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
- int ret = __vgic_v3_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- return true;
- }
-
-exit:
- /* Return to the host kernel and handle the exit */
- return false;
-}
-
-static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
-{
- if (!cpus_have_final_cap(ARM64_SSBD))
- return false;
-
- return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
-}
-
-static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
- /*
- * The host runs with the workaround always present. If the
- * guest wants it disabled, so be it...
- */
- if (__needs_ssbd_off(vcpu) &&
- __hyp_this_cpu_read(arm64_ssbd_callback_required))
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
-#endif
-}
-
-static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
- /*
- * If the guest has disabled the workaround, bring it back on.
- */
- if (__needs_ssbd_off(vcpu) &&
- __hyp_this_cpu_read(arm64_ssbd_callback_required))
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
-#endif
-}
-
-/**
- * Disable host events, enable guest events
- */
-static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenclr_el0);
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenset_el0);
-
- return (pmu->events_host || pmu->events_guest);
-}
-
-/**
- * Disable guest events, enable host events
- */
-static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenclr_el0);
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenset_el0);
-}
-
-/* Switch to the guest for VHE systems running in EL2 */
-static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- u64 exit_code;
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- sysreg_save_host_state_vhe(host_ctxt);
-
- /*
- * ARM erratum 1165522 requires us to configure both stage 1 and
- * stage 2 translation for the guest context before we clear
- * HCR_EL2.TGE.
- *
- * We have already configured the guest's stage 1 translation in
- * kvm_vcpu_load_sysregs above. We must now call __activate_vm
- * before __activate_traps, because __activate_vm configures
- * stage 2 translation, and __activate_traps clear HCR_EL2.TGE
- * (among other things).
- */
- __activate_vm(vcpu->kvm);
- __activate_traps(vcpu);
-
- sysreg_restore_guest_state_vhe(guest_ctxt);
- __debug_switch_to_guest(vcpu);
-
- __set_guest_arch_workaround_state(vcpu);
-
- do {
- /* Jump in the fire! */
- exit_code = __guest_enter(vcpu, host_ctxt);
-
- /* And we're baaack! */
- } while (fixup_guest_exit(vcpu, &exit_code));
-
- __set_host_arch_workaround_state(vcpu);
-
- sysreg_save_guest_state_vhe(guest_ctxt);
-
- __deactivate_traps(vcpu);
-
- sysreg_restore_host_state_vhe(host_ctxt);
-
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
- __fpsimd_save_fpexc32(vcpu);
-
- __debug_switch_to_host(vcpu);
-
- return exit_code;
-}
-NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
-
-int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
-{
- int ret;
-
- local_daif_mask();
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- *
- * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
- * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
- */
- pmr_sync();
-
- ret = __kvm_vcpu_run_vhe(vcpu);
-
- /*
- * local_daif_restore() takes care to properly restore PSTATE.DAIF
- * and the GIC PMR if the host is using IRQ priorities.
- */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
- */
- isb();
-
- return ret;
-}
-
-/* Switch to the guest for legacy non-VHE systems */
-int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- bool pmu_switch_needed;
- u64 exit_code;
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- */
- if (system_uses_irq_prio_masking()) {
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
- pmr_sync();
- }
-
- vcpu = kern_hyp_va(vcpu);
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
-
- __sysreg_save_state_nvhe(host_ctxt);
-
- /*
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- *
- * Also, and in order to be able to deal with erratum #1319537 (A57)
- * and #1319367 (A72), we must ensure that all VM-related sysreg are
- * restored before we enable S2 translation.
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_state_nvhe(guest_ctxt);
-
- __activate_vm(kern_hyp_va(vcpu->kvm));
- __activate_traps(vcpu);
-
- __hyp_vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
- __debug_switch_to_guest(vcpu);
-
- __set_guest_arch_workaround_state(vcpu);
-
- do {
- /* Jump in the fire! */
- exit_code = __guest_enter(vcpu, host_ctxt);
-
- /* And we're baaack! */
- } while (fixup_guest_exit(vcpu, &exit_code));
-
- __set_host_arch_workaround_state(vcpu);
-
- __sysreg_save_state_nvhe(guest_ctxt);
- __sysreg32_save_state(vcpu);
- __timer_disable_traps(vcpu);
- __hyp_vgic_save_state(vcpu);
-
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
-
- __sysreg_restore_state_nvhe(host_ctxt);
-
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
- __fpsimd_save_fpexc32(vcpu);
-
- /*
- * This must come after restoring the host sysregs, since a non-VHE
- * system may enable SPE here and make use of the TTBRs.
- */
- __debug_switch_to_host(vcpu);
-
- if (pmu_switch_needed)
- __pmu_switch_to_host(host_ctxt);
-
- /* Returning to host will clear PSR.I, remask PMR if needed */
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQOFF);
-
- return exit_code;
-}
-
-static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
- struct kvm_cpu_context *__host_ctxt)
-{
- struct kvm_vcpu *vcpu;
- unsigned long str_va;
-
- vcpu = __host_ctxt->__hyp_running_vcpu;
-
- if (read_sysreg(vttbr_el2)) {
- __timer_disable_traps(vcpu);
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
- __sysreg_restore_state_nvhe(__host_ctxt);
- }
-
- /*
- * Force the panic string to be loaded from the literal pool,
- * making sure it is a kernel address and not a PC-relative
- * reference.
- */
- asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va));
-
- __hyp_do_panic(str_va,
- spsr, elr,
- read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
- read_sysreg(hpfar_el2), par, vcpu);
-}
-
-static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
- struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_vcpu *vcpu;
- vcpu = host_ctxt->__hyp_running_vcpu;
-
- __deactivate_traps(vcpu);
- sysreg_restore_host_state_vhe(host_ctxt);
-
- panic(__hyp_panic_string,
- spsr, elr,
- read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
- read_sysreg(hpfar_el2), par, vcpu);
-}
-NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
-
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
-{
- u64 spsr = read_sysreg_el2(SYS_SPSR);
- u64 elr = read_sysreg_el2(SYS_ELR);
- u64 par = read_sysreg(par_el1);
-
- if (!has_vhe())
- __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
- else
- __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
-
- unreachable();
-}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
deleted file mode 100644
index cc7e957f5b2c..000000000000
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ /dev/null
@@ -1,333 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kprobes.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-
-/*
- * Non-VHE: Both host and guest must save everything.
- *
- * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
- * pstate, which are handled as part of the el2 return state) on every
- * switch (sp_el0 is being dealt with in the assembly code).
- * tpidr_el0 and tpidrro_el0 only need to be switched when going
- * to host userspace or a different VCPU. EL1 registers only need to be
- * switched when potentially going to run a different VCPU. The latter two
- * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
- */
-
-static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
-}
-
-static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
- ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
-}
-
-static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
- ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR);
- ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR);
- ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0);
- ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1);
- ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR);
- ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR);
- ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0);
- ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1);
- ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR);
- ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR);
- ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR);
- ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR);
- ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR);
- ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL);
- ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
- ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
-
- ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
- ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR);
- ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR);
-}
-
-static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR);
- ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
-}
-
-void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_el1_state(ctxt);
- __sysreg_save_common_state(ctxt);
- __sysreg_save_user_state(ctxt);
- __sysreg_save_el2_return_state(ctxt);
-}
-
-void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_common_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
-
-void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_common_state(ctxt);
- __sysreg_save_el2_return_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
-
-static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
-}
-
-static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
- write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
-}
-
-static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
- write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
-
- if (has_vhe() ||
- !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- } else if (!ctxt->__hyp_running_vcpu) {
- /*
- * Must only be done for guest registers, hence the context
- * test. We're coming from the host, so SCTLR.M is already
- * set. Pairs with __activate_traps_nvhe().
- */
- write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
- TCR_EPD1_MASK | TCR_EPD0_MASK),
- SYS_TCR);
- isb();
- }
-
- write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
- write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
- write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
- write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
- write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
- write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
- write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR);
- write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR);
- write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR);
- write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR);
- write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR);
- write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL);
- write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
- write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
-
- if (!has_vhe() &&
- cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
- ctxt->__hyp_running_vcpu) {
- /*
- * Must only be done for host registers, hence the context
- * test. Pairs with __deactivate_traps_nvhe().
- */
- isb();
- /*
- * At this stage, and thanks to the above isb(), S2 is
- * deconfigured and disabled. We can now restore the host's
- * S1 configuration: SCTLR, and only then TCR.
- */
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- isb();
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- }
-
- write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
- write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
- write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
-}
-
-static void __hyp_text
-__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
-{
- u64 pstate = ctxt->gp_regs.regs.pstate;
- u64 mode = pstate & PSR_AA32_MODE_MASK;
-
- /*
- * Safety check to ensure we're setting the CPU up to enter the guest
- * in a less privileged mode.
- *
- * If we are attempting a return to EL2 or higher in AArch64 state,
- * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
- * we'll take an illegal exception state exception immediately after
- * the ERET to the guest. Attempts to return to AArch32 Hyp will
- * result in an illegal exception return because EL2's execution state
- * is determined by SCR_EL3.RW.
- */
- if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
- pstate = PSR_MODE_EL2h | PSR_IL_BIT;
-
- write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR);
- write_sysreg_el2(pstate, SYS_SPSR);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
-}
-
-void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_el1_state(ctxt);
- __sysreg_restore_common_state(ctxt);
- __sysreg_restore_user_state(ctxt);
- __sysreg_restore_el2_return_state(ctxt);
-}
-
-void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_common_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
-
-void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_common_state(ctxt);
- __sysreg_restore_el2_return_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
-
-void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
-{
- u64 *spsr, *sysreg;
-
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- spsr = vcpu->arch.ctxt.gp_regs.spsr;
- sysreg = vcpu->arch.ctxt.sys_regs;
-
- spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
- spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
- spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
- spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
-
- sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
- sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
-
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
- sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
-}
-
-void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
-{
- u64 *spsr, *sysreg;
-
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- spsr = vcpu->arch.ctxt.gp_regs.spsr;
- sysreg = vcpu->arch.ctxt.sys_regs;
-
- write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
- write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
- write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
- write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
-
- write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
- write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
-
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
- write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
-}
-
-/**
- * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
- *
- * @vcpu: The VCPU pointer
- *
- * Load system registers that do not affect the host's execution, for
- * example EL1 system registers on a VHE system where the host kernel
- * runs at EL2. This function is called from KVM's vcpu_load() function
- * and loading system register state early avoids having to load them on
- * every entry to the VM.
- */
-void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
- struct kvm_cpu_context *host_ctxt;
-
- if (!has_vhe())
- return;
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- __sysreg_save_user_state(host_ctxt);
-
- /*
- * Load guest EL1 and user state
- *
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_user_state(guest_ctxt);
- __sysreg_restore_el1_state(guest_ctxt);
-
- vcpu->arch.sysregs_loaded_on_cpu = true;
-
- activate_traps_vhe_load(vcpu);
-}
-
-/**
- * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
- *
- * @vcpu: The VCPU pointer
- *
- * Save guest system registers that do not affect the host's execution, for
- * example EL1 system registers on a VHE system where the host kernel
- * runs at EL2. This function is called from KVM's vcpu_put() function
- * and deferring saving system register state until we're no longer running the
- * VCPU avoids having to save them on every exit from the VM.
- */
-void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
- struct kvm_cpu_context *host_ctxt;
-
- if (!has_vhe())
- return;
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- deactivate_traps_vhe_put();
-
- __sysreg_save_el1_state(guest_ctxt);
- __sysreg_save_user_state(guest_ctxt);
- __sysreg32_save_state(vcpu);
-
- /* Restore host user state */
- __sysreg_restore_user_state(host_ctxt);
-
- vcpu->arch.sysregs_loaded_on_cpu = false;
-}
-
-void __hyp_text __kvm_enable_ssbs(void)
-{
- u64 tmp;
-
- asm volatile(
- "mrs %0, sctlr_el2\n"
- "orr %0, %0, %1\n"
- "msr sctlr_el2, %0"
- : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
-}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
deleted file mode 100644
index d063a576d511..000000000000
--- a/arch/arm64/kvm/hyp/tlb.c
+++ /dev/null
@@ -1,242 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/irqflags.h>
-
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-#include <asm/tlbflush.h>
-
-struct tlb_inv_context {
- unsigned long flags;
- u64 tcr;
- u64 sctlr;
-};
-
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- u64 val;
-
- local_irq_save(cxt->flags);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /*
- * For CPUs that are affected by ARM errata 1165522 or 1530923,
- * we cannot trust stage-1 to be in a correct state at that
- * point. Since we do not want to force a full load of the
- * vcpu state, we prevent the EL1 page-table walker to
- * allocate new TLBs. This is done by setting the EPD bits
- * in the TCR_EL1 register. We also need to prevent it to
- * allocate IPA->PA walks, so we enable the S1 MMU...
- */
- val = cxt->tcr = read_sysreg_el1(SYS_TCR);
- val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
- write_sysreg_el1(val, SYS_TCR);
- val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
- val |= SCTLR_ELx_M;
- write_sysreg_el1(val, SYS_SCTLR);
- }
-
- /*
- * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
- * most TLB operations target EL2/EL0. In order to affect the
- * guest TLBs (EL1/EL0), we need to change one of these two
- * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
- * let's flip TGE before executing the TLB operation.
- *
- * ARM erratum 1165522 requires some special handling (again),
- * as we need to make sure both stages of translation are in
- * place before clearing TGE. __load_guest_stage2() already
- * has an ISB in order to deal with this.
- */
- __load_guest_stage2(kvm);
- val = read_sysreg(hcr_el2);
- val &= ~HCR_TGE;
- write_sysreg(val, hcr_el2);
- isb();
-}
-
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- u64 val;
-
- /*
- * For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
- */
- val = cxt->tcr = read_sysreg_el1(SYS_TCR);
- val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
- write_sysreg_el1(val, SYS_TCR);
- isb();
- }
-
- /* __load_guest_stage2() includes an ISB for the workaround. */
- __load_guest_stage2(kvm);
- asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
-}
-
-static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (has_vhe())
- __tlb_switch_to_guest_vhe(kvm, cxt);
- else
- __tlb_switch_to_guest_nvhe(kvm, cxt);
-}
-
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- /*
- * We're done with the TLB operation, let's restore the host's
- * view of HCR_EL2.
- */
- write_sysreg(0, vttbr_el2);
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
- isb();
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Restore the registers to what they were */
- write_sysreg_el1(cxt->tcr, SYS_TCR);
- write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
- }
-
- local_irq_restore(cxt->flags);
-}
-
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- write_sysreg(0, vttbr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the host VMID */
- isb();
- /* Restore the host's TCR_EL1 */
- write_sysreg_el1(cxt->tcr, SYS_TCR);
- }
-}
-
-static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (has_vhe())
- __tlb_switch_to_host_vhe(kvm, cxt);
- else
- __tlb_switch_to_host_nvhe(kvm, cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
-{
- struct tlb_inv_context cxt;
-
- dsb(ishst);
-
- /* Switch to requested VMID */
- kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest(kvm, &cxt);
-
- /*
- * We could do so much better if we had the VA as well.
- * Instead, we invalidate Stage-2 for this IPA, and the
- * whole of Stage-1. Weep...
- */
- ipa >>= 12;
- __tlbi(ipas2e1is, ipa);
-
- /*
- * We have to ensure completion of the invalidation at Stage-2,
- * since a table walk on another CPU could refill a TLB with a
- * complete (S1 + S2) walk based on the old Stage-2 mapping if
- * the Stage-1 invalidation happened first.
- */
- dsb(ish);
- __tlbi(vmalle1is);
- dsb(ish);
- isb();
-
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (!has_vhe() && icache_is_vpipt())
- __flush_icache_all();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
-{
- struct tlb_inv_context cxt;
-
- dsb(ishst);
-
- /* Switch to requested VMID */
- kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest(kvm, &cxt);
-
- __tlbi(vmalls12e1is);
- dsb(ish);
- isb();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
- struct tlb_inv_context cxt;
-
- /* Switch to requested VMID */
- __tlb_switch_to_guest(kvm, &cxt);
-
- __tlbi(vmalle1);
- dsb(nsh);
- isb();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_flush_vm_context(void)
-{
- dsb(ishst);
- __tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
- dsb(ish);
-}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 4f3a087e36d5..bd1bab551d48 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -13,7 +13,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
-static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
+static bool __is_be(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT);
@@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
* 0: Not a GICV access
* -1: Illegal GICV access successfully performed
*/
-int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct vgic_dist *vgic = &kvm->arch.vgic;
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 10ed539835c1..5a0073511efb 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -16,7 +16,7 @@
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
-static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
+static u64 __gic_v3_get_lr(unsigned int lr)
{
switch (lr & 0xf) {
case 0:
@@ -56,7 +56,7 @@ static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
unreachable();
}
-static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
+static void __gic_v3_set_lr(u64 val, int lr)
{
switch (lr & 0xf) {
case 0:
@@ -110,7 +110,7 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
}
}
-static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n)
+static void __vgic_v3_write_ap0rn(u32 val, int n)
{
switch (n) {
case 0:
@@ -128,7 +128,7 @@ static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n)
}
}
-static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n)
+static void __vgic_v3_write_ap1rn(u32 val, int n)
{
switch (n) {
case 0:
@@ -146,7 +146,7 @@ static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n)
}
}
-static u32 __hyp_text __vgic_v3_read_ap0rn(int n)
+static u32 __vgic_v3_read_ap0rn(int n)
{
u32 val;
@@ -170,7 +170,7 @@ static u32 __hyp_text __vgic_v3_read_ap0rn(int n)
return val;
}
-static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
+static u32 __vgic_v3_read_ap1rn(int n)
{
u32 val;
@@ -194,7 +194,7 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
return val;
}
-void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
{
u64 used_lrs = cpu_if->used_lrs;
@@ -229,7 +229,7 @@ void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
{
u64 used_lrs = cpu_if->used_lrs;
int i;
@@ -255,7 +255,7 @@ void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
{
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -302,7 +302,7 @@ void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
-void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
@@ -328,7 +328,7 @@ void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
-void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -361,7 +361,7 @@ void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -394,7 +394,7 @@ void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_init_lrs(void)
+void __vgic_v3_init_lrs(void)
{
int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
int i;
@@ -403,30 +403,30 @@ void __hyp_text __vgic_v3_init_lrs(void)
__gic_v3_set_lr(0, i);
}
-u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void)
+u64 __vgic_v3_get_ich_vtr_el2(void)
{
return read_gicreg(ICH_VTR_EL2);
}
-u64 __hyp_text __vgic_v3_read_vmcr(void)
+u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
-void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
+void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
-static int __hyp_text __vgic_v3_bpr_min(void)
+static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2));
}
-static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu)
+static int __vgic_v3_get_group(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
return crm != 8;
@@ -434,9 +434,8 @@ static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu)
#define GICv3_IDLE_PRIORITY 0xff
-static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
- u32 vmcr,
- u64 *lr_val)
+static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr,
+ u64 *lr_val)
{
unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
u8 priority = GICv3_IDLE_PRIORITY;
@@ -474,8 +473,8 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
return lr;
}
-static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
- int intid, u64 *lr_val)
+static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid,
+ u64 *lr_val)
{
unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
int i;
@@ -494,7 +493,7 @@ static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
return -1;
}
-static int __hyp_text __vgic_v3_get_highest_active_priority(void)
+static int __vgic_v3_get_highest_active_priority(void)
{
u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
u32 hap = 0;
@@ -526,12 +525,12 @@ static int __hyp_text __vgic_v3_get_highest_active_priority(void)
return GICv3_IDLE_PRIORITY;
}
-static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr)
+static unsigned int __vgic_v3_get_bpr0(u32 vmcr)
{
return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
}
-static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr)
+static unsigned int __vgic_v3_get_bpr1(u32 vmcr)
{
unsigned int bpr;
@@ -550,7 +549,7 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr)
* Convert a priority to a preemption level, taking the relevant BPR
* into account by zeroing the sub-priority bits.
*/
-static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
+static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
{
unsigned int bpr;
@@ -568,7 +567,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
* matter what the guest does with its BPR, we can always set/get the
* same value of a priority.
*/
-static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
+static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
{
u8 pre, ap;
u32 val;
@@ -587,7 +586,7 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
}
}
-static int __hyp_text __vgic_v3_clear_highest_active_priority(void)
+static int __vgic_v3_clear_highest_active_priority(void)
{
u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
u32 hap = 0;
@@ -625,7 +624,7 @@ static int __hyp_text __vgic_v3_clear_highest_active_priority(void)
return GICv3_IDLE_PRIORITY;
}
-static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 lr_val;
u8 lr_prio, pmr;
@@ -661,7 +660,7 @@ spurious:
vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS);
}
-static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val)
+static void __vgic_v3_clear_active_lr(int lr, u64 lr_val)
{
lr_val &= ~ICH_LR_ACTIVE_BIT;
if (lr_val & ICH_LR_HW) {
@@ -674,7 +673,7 @@ static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val)
__gic_v3_set_lr(lr_val, lr);
}
-static void __hyp_text __vgic_v3_bump_eoicount(void)
+static void __vgic_v3_bump_eoicount(void)
{
u32 hcr;
@@ -683,8 +682,7 @@ static void __hyp_text __vgic_v3_bump_eoicount(void)
write_gicreg(hcr, ICH_HCR_EL2);
}
-static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vid = vcpu_get_reg(vcpu, rt);
u64 lr_val;
@@ -707,7 +705,7 @@ static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu,
__vgic_v3_clear_active_lr(lr, lr_val);
}
-static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vid = vcpu_get_reg(vcpu, rt);
u64 lr_val;
@@ -744,17 +742,17 @@ static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int
__vgic_v3_clear_active_lr(lr, lr_val);
}
-static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK));
}
-static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK));
}
-static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
@@ -766,7 +764,7 @@ static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr,
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
@@ -778,17 +776,17 @@ static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr,
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr));
}
-static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr));
}
-static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
u8 bpr_min = __vgic_v3_bpr_min() - 1;
@@ -805,7 +803,7 @@ static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
u8 bpr_min = __vgic_v3_bpr_min();
@@ -825,7 +823,7 @@ static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
{
u32 val;
@@ -837,7 +835,7 @@ static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n
vcpu_set_reg(vcpu, rt, val);
}
-static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
{
u32 val = vcpu_get_reg(vcpu, rt);
@@ -847,56 +845,49 @@ static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int
__vgic_v3_write_ap1rn(val, n);
}
-static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu,
+static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu,
u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 0);
}
-static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu,
+static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu,
u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 1);
}
-static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 2);
}
-static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 3);
}
-static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 0);
}
-static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 1);
}
-static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 2);
}
-static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 3);
}
-static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 lr_val;
int lr, lr_grp, grp;
@@ -915,16 +906,14 @@ spurious:
vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
}
-static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vmcr &= ICH_VMCR_PMR_MASK;
vmcr >>= ICH_VMCR_PMR_SHIFT;
vcpu_set_reg(vcpu, rt, vmcr);
}
-static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 val = vcpu_get_reg(vcpu, rt);
@@ -936,15 +925,13 @@ static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu,
write_gicreg(vmcr, ICH_VMCR_EL2);
}
-static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 val = __vgic_v3_get_highest_active_priority();
vcpu_set_reg(vcpu, rt, val);
}
-static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vtr, val;
@@ -965,8 +952,7 @@ static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu,
vcpu_set_reg(vcpu, rt, val);
}
-static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 val = vcpu_get_reg(vcpu, rt);
@@ -983,7 +969,7 @@ static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu,
write_gicreg(vmcr, ICH_VMCR_EL2);
}
-int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
+int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
u32 esr;
@@ -992,7 +978,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
bool is_read;
u32 sysreg;
- esr = kvm_vcpu_get_hsr(vcpu);
+ esr = kvm_vcpu_get_esr(vcpu);
if (vcpu_mode_is_32bit(vcpu)) {
if (!kvm_condition_valid(vcpu)) {
__kvm_skip_instr(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
new file mode 100644
index 000000000000..461e97c375cc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
+#
+
+asflags-y := -D__KVM_VHE_HYPERVISOR__
+ccflags-y := -D__KVM_VHE_HYPERVISOR__
+
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
+obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
+ ../fpsimd.o ../hyp-entry.o
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
new file mode 100644
index 000000000000..f1e2e5a00933
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/debug-sr.h>
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_hyp.h>
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ __debug_switch_to_guest_common(vcpu);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ __debug_switch_to_host_common(vcpu);
+}
+
+u32 __kvm_get_mdcr_el2(void)
+{
+ return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
new file mode 100644
index 000000000000..c52d714e0d75
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/switch.h>
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ ___activate_traps(vcpu);
+
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~CPACR_EL1_ZEN;
+
+ /*
+ * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
+ * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
+ * except for some missing controls, such as TAM.
+ * In this case, CPTR_EL2.TAM has the same position with or without
+ * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
+ * shift value for trapping the AMU accesses.
+ */
+
+ val |= CPTR_EL2_TAM;
+
+ if (update_fp_enabled(vcpu)) {
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ } else {
+ val &= ~CPACR_EL1_FPEN;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+NOKPROBE_SYMBOL(__activate_traps);
+
+static void __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ extern char vectors[]; /* kernel exception vectors */
+
+ ___deactivate_traps(vcpu);
+
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+
+ /*
+ * ARM errata 1165522 and 1530923 require the actual execution of the
+ * above before we can switch to the EL2/EL0 translation regime used by
+ * the host.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+
+ write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
+ write_sysreg(vectors, vbar_el1);
+}
+NOKPROBE_SYMBOL(__deactivate_traps);
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+ __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+
+ __deactivate_traps_common();
+}
+
+/* Switch to the guest for VHE systems running in EL2 */
+static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ u64 exit_code;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
+ /*
+ * ARM erratum 1165522 requires us to configure both stage 1 and
+ * stage 2 translation for the guest context before we clear
+ * HCR_EL2.TGE.
+ *
+ * We have already configured the guest's stage 1 translation in
+ * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm
+ * before __activate_traps, because __activate_vm configures
+ * stage 2 translation, and __activate_traps clear HCR_EL2.TGE
+ * (among other things).
+ */
+ __activate_vm(vcpu->arch.hw_mmu);
+ __activate_traps(vcpu);
+
+ sysreg_restore_guest_state_vhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ sysreg_save_guest_state_vhe(guest_ctxt);
+
+ __deactivate_traps(vcpu);
+
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ __debug_switch_to_host(vcpu);
+
+ return exit_code;
+}
+NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
+
+int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ local_daif_mask();
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ *
+ * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
+ * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
+ */
+ pmr_sync();
+
+ ret = __kvm_vcpu_run_vhe(vcpu);
+
+ /*
+ * local_daif_restore() takes care to properly restore PSTATE.DAIF
+ * and the GIC PMR if the host is using IRQ priorities.
+ */
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
+
+ return ret;
+}
+
+static void __hyp_call_panic(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_vcpu *vcpu;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ __deactivate_traps(vcpu);
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ panic(__hyp_panic_string,
+ spsr, elr,
+ read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
+ read_sysreg(hpfar_el2), par, vcpu);
+}
+NOKPROBE_SYMBOL(__hyp_call_panic);
+
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+ u64 spsr = read_sysreg_el2(SYS_SPSR);
+ u64 elr = read_sysreg_el2(SYS_ELR);
+ u64 par = read_sysreg(par_el1);
+
+ __hyp_call_panic(spsr, elr, par, host_ctxt);
+ unreachable();
+}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
new file mode 100644
index 000000000000..996471e4c138
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/sysreg-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
+ * pstate, which are handled as part of the el2 return state) on every
+ * switch (sp_el0 is being dealt with in the assembly code).
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU. EL1 registers only need to be
+ * switched when potentially going to run a different VCPU. The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
+ */
+
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
+
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
+
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
+
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
+
+/**
+ * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_cpu_context *host_ctxt;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ __sysreg_save_user_state(host_ctxt);
+
+ /*
+ * Load guest EL1 and user state
+ *
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_user_state(guest_ctxt);
+ __sysreg_restore_el1_state(guest_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = true;
+
+ activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_cpu_context *host_ctxt;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ deactivate_traps_vhe_put();
+
+ __sysreg_save_el1_state(guest_ctxt);
+ __sysreg_save_user_state(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+
+ /* Restore host user state */
+ __sysreg_restore_user_state(host_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = false;
+}
diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c
new file mode 100644
index 000000000000..4cda674a8be6
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <asm/kvm_hyp.h>
+
+void __kvm_timer_set_cntvoff(u64 cntvoff)
+{
+ write_sysreg(cntvoff, cntvoff_el2);
+}
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
new file mode 100644
index 000000000000..fd7895945bbc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/irqflags.h>
+
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/tlbflush.h>
+
+struct tlb_inv_context {
+ unsigned long flags;
+ u64 tcr;
+ u64 sctlr;
+};
+
+static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
+{
+ u64 val;
+
+ local_irq_save(cxt->flags);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /*
+ * For CPUs that are affected by ARM errata 1165522 or 1530923,
+ * we cannot trust stage-1 to be in a correct state at that
+ * point. Since we do not want to force a full load of the
+ * vcpu state, we prevent the EL1 page-table walker to
+ * allocate new TLBs. This is done by setting the EPD bits
+ * in the TCR_EL1 register. We also need to prevent it to
+ * allocate IPA->PA walks, so we enable the S1 MMU...
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ }
+
+ /*
+ * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+ * most TLB operations target EL2/EL0. In order to affect the
+ * guest TLBs (EL1/EL0), we need to change one of these two
+ * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+ * let's flip TGE before executing the TLB operation.
+ *
+ * ARM erratum 1165522 requires some special handling (again),
+ * as we need to make sure both stages of translation are in
+ * place before clearing TGE. __load_guest_stage2() already
+ * has an ISB in order to deal with this.
+ */
+ __load_guest_stage2(mmu);
+ val = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ write_sysreg(val, hcr_el2);
+ isb();
+}
+
+static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+{
+ /*
+ * We're done with the TLB operation, let's restore the host's
+ * view of HCR_EL2.
+ */
+ write_sysreg(0, vttbr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ isb();
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /* Restore the registers to what they were */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ }
+
+ local_irq_restore(cxt->flags);
+}
+
+void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa, int level)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ ipa >>= 12;
+ __tlbi_level(ipas2e1is, ipa, level);
+
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalls12e1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_flush_vm_context(void)
+{
+ dsb(ishst);
+ __tlbi(alle1is);
+
+ /*
+ * VIPT and PIPT caches are not affected by VMID, so no maintenance
+ * is necessary across a VMID rollover.
+ *
+ * VPIPT caches constrain lookup and maintenance to the active VMID,
+ * so we need to invalidate lines with a stale VMID to avoid an ABA
+ * race after multiple rollovers.
+ *
+ */
+ if (icache_is_vpipt())
+ asm volatile("ic ialluis");
+
+ dsb(ish);
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index e21fdd93027a..ebfdfc27b2bd 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -64,7 +64,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
case PSR_MODE_EL1h:
vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1);
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
- vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
+ vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
break;
default:
/* Don't do that */
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 4e0366759726..6a2826f1bf5e 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -77,9 +77,8 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
* or in-kernel IO emulation
*
* @vcpu: The VCPU pointer
- * @run: The VCPU run struct containing the mmio data
*/
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
{
unsigned long data;
unsigned int len;
@@ -92,6 +91,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->mmio_needed = 0;
if (!kvm_vcpu_dabt_iswrite(vcpu)) {
+ struct kvm_run *run = vcpu->run;
+
len = kvm_vcpu_dabt_get_as(vcpu);
data = kvm_mmio_read_buf(run->mmio.data, len);
@@ -119,9 +120,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
- phys_addr_t fault_ipa)
+int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
+ struct kvm_run *run = vcpu->run;
unsigned long data;
unsigned long rt;
int ret;
@@ -145,12 +146,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
return -ENOSYS;
}
- /* Page table accesses IO mem: tell guest to fix its TTBR */
- if (kvm_vcpu_dabt_iss1tw(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
-
/*
* Prepare MMIO operation. First decode the syndrome data we get
* from the CPU. Then try if some in-kernel emulation feels
@@ -188,7 +183,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (!is_write)
memcpy(run->mmio.data, data_buf, len);
vcpu->stat.mmio_exit_kernel++;
- kvm_handle_mmio_return(vcpu, run);
+ kvm_handle_mmio_return(vcpu);
return 1;
}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 31058e6e7c2a..0121ef2c7c8d 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -55,12 +55,13 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
*/
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+ kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
}
-static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
+ int level)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level);
}
/*
@@ -90,106 +91,80 @@ static bool kvm_is_device_pfn(unsigned long pfn)
/**
* stage2_dissolve_pmd() - clear and flush huge PMD entry
- * @kvm: pointer to kvm structure.
+ * @mmu: pointer to mmu structure to operate on
* @addr: IPA
* @pmd: pmd pointer for IPA
*
* Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs.
*/
-static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd)
{
if (!pmd_thp_or_huge(*pmd))
return;
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
put_page(virt_to_page(pmd));
}
/**
* stage2_dissolve_pud() - clear and flush huge PUD entry
- * @kvm: pointer to kvm structure.
+ * @mmu: pointer to mmu structure to operate on
* @addr: IPA
* @pud: pud pointer for IPA
*
* Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs.
*/
-static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
+static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp)
{
+ struct kvm *kvm = mmu->kvm;
+
if (!stage2_pud_huge(kvm, *pudp))
return;
stage2_pud_clear(kvm, pudp);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
put_page(virt_to_page(pudp));
}
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
- int min, int max)
-{
- void *page;
-
- BUG_ON(max > KVM_NR_MEM_OBJS);
- if (cache->nobjs >= min)
- return 0;
- while (cache->nobjs < max) {
- page = (void *)__get_free_page(GFP_PGTABLE_USER);
- if (!page)
- return -ENOMEM;
- cache->objects[cache->nobjs++] = page;
- }
- return 0;
-}
-
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
-{
- while (mc->nobjs)
- free_page((unsigned long)mc->objects[--mc->nobjs]);
-}
-
-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
-{
- void *p;
-
- BUG_ON(!mc || !mc->nobjs);
- p = mc->objects[--mc->nobjs];
- return p;
-}
-
-static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL);
stage2_pgd_clear(kvm, pgd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_p4d_free(kvm, p4d_table);
put_page(virt_to_page(pgd));
}
-static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr)
+static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0);
stage2_p4d_clear(kvm, p4d);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_pud_free(kvm, pud_table);
put_page(virt_to_page(p4d));
}
-static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
+
VM_BUG_ON(stage2_pud_huge(kvm, *pud));
stage2_pud_clear(kvm, pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_pmd_free(kvm, pmd_table);
put_page(virt_to_page(pud));
}
-static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr)
{
pte_t *pte_table = pte_offset_kernel(pmd, 0);
VM_BUG_ON(pmd_thp_or_huge(*pmd));
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
free_page((unsigned long)pte_table);
put_page(virt_to_page(pmd));
}
@@ -255,7 +230,7 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp)
* we then fully enforce cacheability of RAM, no matter what the guest
* does.
*/
-static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t start_addr = addr;
@@ -267,7 +242,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
pte_t old_pte = *pte;
kvm_set_pte(pte, __pte(0));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
/* No need to invalidate the cache for device mappings */
if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@@ -277,13 +252,14 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- if (stage2_pte_table_empty(kvm, start_pte))
- clear_stage2_pmd_entry(kvm, pmd, start_addr);
+ if (stage2_pte_table_empty(mmu->kvm, start_pte))
+ clear_stage2_pmd_entry(mmu, pmd, start_addr);
}
-static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
pmd_t *pmd, *start_pmd;
@@ -295,24 +271,25 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
pmd_t old_pmd = *pmd;
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
kvm_flush_dcache_pmd(old_pmd);
put_page(virt_to_page(pmd));
} else {
- unmap_stage2_ptes(kvm, pmd, addr, next);
+ unmap_stage2_ptes(mmu, pmd, addr, next);
}
}
} while (pmd++, addr = next, addr != end);
if (stage2_pmd_table_empty(kvm, start_pmd))
- clear_stage2_pud_entry(kvm, pud, start_addr);
+ clear_stage2_pud_entry(mmu, pud, start_addr);
}
-static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
+static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
pud_t *pud, *start_pud;
@@ -324,22 +301,23 @@ static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
pud_t old_pud = *pud;
stage2_pud_clear(kvm, pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
kvm_flush_dcache_pud(old_pud);
put_page(virt_to_page(pud));
} else {
- unmap_stage2_pmds(kvm, pud, addr, next);
+ unmap_stage2_pmds(mmu, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
if (stage2_pud_table_empty(kvm, start_pud))
- clear_stage2_p4d_entry(kvm, p4d, start_addr);
+ clear_stage2_p4d_entry(mmu, p4d, start_addr);
}
-static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
p4d_t *p4d, *start_p4d;
@@ -347,11 +325,11 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- unmap_stage2_puds(kvm, p4d, addr, next);
+ unmap_stage2_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
if (stage2_p4d_table_empty(kvm, start_p4d))
- clear_stage2_pgd_entry(kvm, pgd, start_addr);
+ clear_stage2_pgd_entry(mmu, pgd, start_addr);
}
/**
@@ -365,8 +343,9 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
@@ -374,18 +353,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
assert_spin_locked(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
/*
* Make sure the page table is still active, as another thread
* could have possibly freed the page table, while we released
* the lock.
*/
- if (!READ_ONCE(kvm->arch.pgd))
+ if (!READ_ONCE(mmu->pgd))
break;
next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd))
- unmap_stage2_p4ds(kvm, pgd, addr, next);
+ unmap_stage2_p4ds(mmu, pgd, addr, next);
/*
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
@@ -395,7 +374,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
} while (pgd++, addr = next, addr != end);
}
-static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
pte_t *pte;
@@ -407,9 +386,10 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
} while (pte++, addr += PAGE_SIZE, addr != end);
}
-static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd;
phys_addr_t next;
@@ -420,14 +400,15 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
if (pmd_thp_or_huge(*pmd))
kvm_flush_dcache_pmd(*pmd);
else
- stage2_flush_ptes(kvm, pmd, addr, next);
+ stage2_flush_ptes(mmu, pmd, addr, next);
}
} while (pmd++, addr = next, addr != end);
}
-static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
+static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
phys_addr_t next;
@@ -438,14 +419,15 @@ static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
if (stage2_pud_huge(kvm, *pud))
kvm_flush_dcache_pud(*pud);
else
- stage2_flush_pmds(kvm, pud, addr, next);
+ stage2_flush_pmds(mmu, pud, addr, next);
}
} while (pud++, addr = next, addr != end);
}
-static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
phys_addr_t next;
@@ -453,23 +435,24 @@ static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- stage2_flush_puds(kvm, p4d, addr, next);
+ stage2_flush_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
static void stage2_flush_memslot(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd))
- stage2_flush_p4ds(kvm, pgd, addr, next);
+ stage2_flush_p4ds(mmu, pgd, addr, next);
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
@@ -996,21 +979,23 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
}
/**
- * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
- * @kvm: The KVM struct pointer for the VM.
+ * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
+ * @kvm: The pointer to the KVM structure
+ * @mmu: The pointer to the s2 MMU structure
*
* Allocates only the stage-2 HW PGD level table(s) of size defined by
- * stage2_pgd_size(kvm).
+ * stage2_pgd_size(mmu->kvm).
*
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
*/
-int kvm_alloc_stage2_pgd(struct kvm *kvm)
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
{
phys_addr_t pgd_phys;
pgd_t *pgd;
+ int cpu;
- if (kvm->arch.pgd != NULL) {
+ if (mmu->pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
@@ -1024,8 +1009,20 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
return -EINVAL;
- kvm->arch.pgd = pgd;
- kvm->arch.pgd_phys = pgd_phys;
+ mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
+ if (!mmu->last_vcpu_ran) {
+ free_pages_exact(pgd, stage2_pgd_size(kvm));
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
+
+ mmu->kvm = kvm;
+ mmu->pgd = pgd;
+ mmu->pgd_phys = pgd_phys;
+ mmu->vmid.vmid_gen = 0;
+
return 0;
}
@@ -1064,7 +1061,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
if (!(vma->vm_flags & VM_PFNMAP)) {
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
- unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
}
hva = vm_end;
} while (hva < reg_end);
@@ -1096,43 +1093,38 @@ void stage2_unmap_vm(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx);
}
-/**
- * kvm_free_stage2_pgd - free all stage-2 tables
- * @kvm: The KVM struct pointer for the VM.
- *
- * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
- * underlying level-2 and level-3 tables before freeing the actual level-1 table
- * and setting the struct pointer to NULL.
- */
-void kvm_free_stage2_pgd(struct kvm *kvm)
+void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
{
+ struct kvm *kvm = mmu->kvm;
void *pgd = NULL;
spin_lock(&kvm->mmu_lock);
- if (kvm->arch.pgd) {
- unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
- pgd = READ_ONCE(kvm->arch.pgd);
- kvm->arch.pgd = NULL;
- kvm->arch.pgd_phys = 0;
+ if (mmu->pgd) {
+ unmap_stage2_range(mmu, 0, kvm_phys_size(kvm));
+ pgd = READ_ONCE(mmu->pgd);
+ mmu->pgd = NULL;
}
spin_unlock(&kvm->mmu_lock);
/* Free the HW pgd, one page at a time */
- if (pgd)
+ if (pgd) {
free_pages_exact(pgd, stage2_pgd_size(kvm));
+ free_percpu(mmu->last_vcpu_ran);
+ }
}
-static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
p4d_t *p4d;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
if (stage2_pgd_none(kvm, *pgd)) {
if (!cache)
return NULL;
- p4d = mmu_memory_cache_alloc(cache);
+ p4d = kvm_mmu_memory_cache_alloc(cache);
stage2_pgd_populate(kvm, pgd, p4d);
get_page(virt_to_page(pgd));
}
@@ -1140,17 +1132,18 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_p4d_offset(kvm, pgd, addr);
}
-static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
pud_t *pud;
- p4d = stage2_get_p4d(kvm, cache, addr);
+ p4d = stage2_get_p4d(mmu, cache, addr);
if (stage2_p4d_none(kvm, *p4d)) {
if (!cache)
return NULL;
- pud = mmu_memory_cache_alloc(cache);
+ pud = kvm_mmu_memory_cache_alloc(cache);
stage2_p4d_populate(kvm, p4d, pud);
get_page(virt_to_page(p4d));
}
@@ -1158,20 +1151,21 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_pud_offset(kvm, p4d, addr);
}
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
pmd_t *pmd;
- pud = stage2_get_pud(kvm, cache, addr);
+ pud = stage2_get_pud(mmu, cache, addr);
if (!pud || stage2_pud_huge(kvm, *pud))
return NULL;
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return NULL;
- pmd = mmu_memory_cache_alloc(cache);
+ pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
@@ -1179,13 +1173,14 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_pmd_offset(kvm, pud, addr);
}
-static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
- *cache, phys_addr_t addr, const pmd_t *new_pmd)
+static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr, const pmd_t *new_pmd)
{
pmd_t *pmd, old_pmd;
retry:
- pmd = stage2_get_pmd(kvm, cache, addr);
+ pmd = stage2_get_pmd(mmu, cache, addr);
VM_BUG_ON(!pmd);
old_pmd = *pmd;
@@ -1218,7 +1213,7 @@ retry:
* get handled accordingly.
*/
if (!pmd_thp_or_huge(old_pmd)) {
- unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
+ unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE);
goto retry;
}
/*
@@ -1234,7 +1229,7 @@ retry:
*/
WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
} else {
get_page(virt_to_page(pmd));
}
@@ -1243,13 +1238,15 @@ retry:
return 0;
}
-static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pud_t *new_pudp)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pudp, old_pud;
retry:
- pudp = stage2_get_pud(kvm, cache, addr);
+ pudp = stage2_get_pud(mmu, cache, addr);
VM_BUG_ON(!pudp);
old_pud = *pudp;
@@ -1268,13 +1265,13 @@ retry:
* the range for this block and retry.
*/
if (!stage2_pud_huge(kvm, old_pud)) {
- unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
+ unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE);
goto retry;
}
WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
stage2_pud_clear(kvm, pudp);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
} else {
get_page(virt_to_page(pudp));
}
@@ -1289,9 +1286,10 @@ retry:
* leaf-entry is returned in the appropriate level variable - pudpp,
* pmdpp, ptepp.
*/
-static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
+static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr,
pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -1300,7 +1298,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
*pmdpp = NULL;
*ptepp = NULL;
- pudp = stage2_get_pud(kvm, NULL, addr);
+ pudp = stage2_get_pud(mmu, NULL, addr);
if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp))
return false;
@@ -1326,14 +1324,14 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
return true;
}
-static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
+static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr, unsigned long sz)
{
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
bool found;
- found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep);
+ found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep);
if (!found)
return false;
@@ -1345,10 +1343,12 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
return sz == PAGE_SIZE && kvm_s2pte_exec(ptep);
}
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pte(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pte_t *new_pte,
unsigned long flags)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
pmd_t *pmd;
pte_t *pte, old_pte;
@@ -1358,7 +1358,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
VM_BUG_ON(logging_active && !cache);
/* Create stage-2 page table mapping - Levels 0 and 1 */
- pud = stage2_get_pud(kvm, cache, addr);
+ pud = stage2_get_pud(mmu, cache, addr);
if (!pud) {
/*
* Ignore calls from kvm_set_spte_hva for unallocated
@@ -1372,12 +1372,12 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* on to allocate page.
*/
if (logging_active)
- stage2_dissolve_pud(kvm, addr, pud);
+ stage2_dissolve_pud(mmu, addr, pud);
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
- pmd = mmu_memory_cache_alloc(cache);
+ pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
@@ -1396,13 +1396,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* allocate page.
*/
if (logging_active)
- stage2_dissolve_pmd(kvm, addr, pmd);
+ stage2_dissolve_pmd(mmu, addr, pmd);
/* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
- pte = mmu_memory_cache_alloc(cache);
+ pte = kvm_mmu_memory_cache_alloc(cache);
kvm_pmd_populate(pmd, pte);
get_page(virt_to_page(pmd));
}
@@ -1420,7 +1420,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
return 0;
kvm_set_pte(pte, __pte(0));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
} else {
get_page(virt_to_page(pte));
}
@@ -1469,7 +1469,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t addr, end;
int ret = 0;
unsigned long pfn;
- struct kvm_mmu_memory_cache cache = { 0, };
+ struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(pa);
@@ -1480,14 +1480,13 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (writable)
pte = kvm_s2pte_mkwrite(pte);
- ret = mmu_topup_memory_cache(&cache,
- kvm_mmu_cache_min_pages(kvm),
- KVM_NR_MEM_OBJS);
+ ret = kvm_mmu_topup_memory_cache(&cache,
+ kvm_mmu_cache_min_pages(kvm));
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = stage2_set_pte(kvm, &cache, addr, &pte,
- KVM_S2PTE_FLAG_IS_IOMAP);
+ ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte,
+ KVM_S2PTE_FLAG_IS_IOMAP);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -1496,7 +1495,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
}
out:
- mmu_free_memory_cache(&cache);
+ kvm_mmu_free_memory_cache(&cache);
return ret;
}
@@ -1526,9 +1525,10 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
+static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd;
phys_addr_t next;
@@ -1549,13 +1549,14 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
/**
* stage2_wp_puds - write protect P4D range
- * @pgd: pointer to pgd entry
+ * @p4d: pointer to p4d entry
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
+static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
phys_addr_t next;
@@ -1567,7 +1568,7 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
if (!kvm_s2pud_readonly(pud))
kvm_set_s2pud_readonly(pud);
} else {
- stage2_wp_pmds(kvm, pud, addr, next);
+ stage2_wp_pmds(mmu, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
@@ -1579,9 +1580,10 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
phys_addr_t next;
@@ -1589,7 +1591,7 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- stage2_wp_puds(kvm, p4d, addr, next);
+ stage2_wp_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
@@ -1599,12 +1601,13 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
* @addr: Start address of range
* @end: End address of range
*/
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
@@ -1616,11 +1619,11 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
* the lock.
*/
cond_resched_lock(&kvm->mmu_lock);
- if (!READ_ONCE(kvm->arch.pgd))
+ if (!READ_ONCE(mmu->pgd))
break;
next = stage2_pgd_addr_end(kvm, addr, end);
if (stage2_pgd_present(kvm, *pgd))
- stage2_wp_p4ds(kvm, pgd, addr, next);
+ stage2_wp_p4ds(mmu, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -1650,7 +1653,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- stage2_wp_range(kvm, start, end);
+ stage2_wp_range(&kvm->arch.mmu, start, end);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
@@ -1674,7 +1677,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
- stage2_wp_range(kvm, start, end);
+ stage2_wp_range(&kvm->arch.mmu, start, end);
}
/*
@@ -1837,6 +1840,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pgprot_t mem_type = PAGE_S2;
bool logging_active = memslot_is_logging(memslot);
unsigned long vma_pagesize, flags = 0;
+ struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
write_fault = kvm_is_write_fault(vcpu);
exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
@@ -1882,8 +1886,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mmap_read_unlock(current->mm);
/* We need minimum second+third level pages */
- ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
- KVM_NR_MEM_OBJS);
+ ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm));
if (ret)
return ret;
@@ -1959,7 +1962,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
*/
needs_exec = exec_fault ||
(fault_status == FSC_PERM &&
- stage2_is_exec(kvm, fault_ipa, vma_pagesize));
+ stage2_is_exec(mmu, fault_ipa, vma_pagesize));
if (vma_pagesize == PUD_SIZE) {
pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
@@ -1971,7 +1974,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pud = kvm_s2pud_mkexec(new_pud);
- ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud);
+ ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud);
} else if (vma_pagesize == PMD_SIZE) {
pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type);
@@ -1983,7 +1986,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pmd = kvm_s2pmd_mkexec(new_pmd);
- ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+ ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd);
} else {
pte_t new_pte = kvm_pfn_pte(pfn, mem_type);
@@ -1995,7 +1998,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pte = kvm_s2pte_mkexec(new_pte);
- ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
+ ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags);
}
out_unlock:
@@ -2024,7 +2027,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
spin_lock(&vcpu->kvm->mmu_lock);
- if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte))
goto out;
if (pud) { /* HugeTLB */
@@ -2050,7 +2053,6 @@ out:
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
- * @run: the kvm_run structure
*
* Any abort that gets to the host is almost guaranteed to be caused by a
* missing second stage translation table entry, which can mean that either the
@@ -2059,7 +2061,7 @@ out:
* space. The distinction is based on the IPA causing the fault and whether this
* memory region has been registered as standard RAM by user space.
*/
-int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
{
unsigned long fault_status;
phys_addr_t fault_ipa;
@@ -2075,21 +2077,18 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
/* Synchronous External Abort? */
- if (kvm_vcpu_dabt_isextabt(vcpu)) {
+ if (kvm_vcpu_abt_issea(vcpu)) {
/*
* For RAS the host kernel may handle this abort.
* There is no need to pass the error into the guest.
*/
- if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
- return 1;
-
- if (unlikely(!is_iabt)) {
+ if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
kvm_inject_vabt(vcpu);
- return 1;
- }
+
+ return 1;
}
- trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
+ trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
@@ -2098,7 +2097,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
- (unsigned long)kvm_vcpu_get_hsr(vcpu));
+ (unsigned long)kvm_vcpu_get_esr(vcpu));
return -EFAULT;
}
@@ -2109,12 +2108,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
write_fault = kvm_is_write_fault(vcpu);
if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
+ /*
+ * The guest has put either its instructions or its page-tables
+ * somewhere it shouldn't have. Userspace won't be able to do
+ * anything about this (there's no syndrome for a start), so
+ * re-inject the abort back into the guest.
+ */
if (is_iabt) {
- /* Prefetch Abort on I/O address */
ret = -ENOEXEC;
goto out;
}
+ if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ ret = 1;
+ goto out_unlock;
+ }
+
/*
* Check for a cache maintenance operation. Since we
* ended-up here, we know it is outside of any memory
@@ -2125,7 +2135,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
* So let's assume that the guest is just being
* cautious, and skip the instruction.
*/
- if (kvm_vcpu_dabt_is_cm(vcpu)) {
+ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
ret = 1;
goto out_unlock;
@@ -2138,7 +2148,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
* of the page size.
*/
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
- ret = io_mem_abort(vcpu, run, fault_ipa);
+ ret = io_mem_abort(vcpu, fault_ipa);
goto out_unlock;
}
@@ -2198,14 +2208,14 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(kvm, gpa, size);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, size);
return 0;
}
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_unmap_hva_range(start, end);
@@ -2225,7 +2235,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
* therefore stage2_set_pte() never needs to clear out a huge PMD
* through this calling path.
*/
- stage2_set_pte(kvm, NULL, gpa, pte, 0);
+ stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
return 0;
}
@@ -2236,7 +2246,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
kvm_pfn_t pfn = pte_pfn(pte);
pte_t stage2_pte;
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_set_spte_hva(hva);
@@ -2259,7 +2269,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
pte_t *pte;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte))
return 0;
if (pud)
@@ -2277,7 +2287,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
pte_t *pte;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte))
return 0;
if (pud)
@@ -2290,7 +2300,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_age_hva(start, end);
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
@@ -2298,7 +2308,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_test_age_hva(hva);
return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
@@ -2307,7 +2317,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
- mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
phys_addr_t kvm_mmu_get_httbr(void)
@@ -2511,7 +2521,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
- unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+ unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
else
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -2530,7 +2540,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
- kvm_free_stage2_pgd(kvm);
+ kvm_free_stage2_pgd(&kvm->arch.mmu);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -2540,7 +2550,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- unmap_stage2_range(kvm, gpa, size);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, size);
spin_unlock(&kvm->mmu_lock);
}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index a900181e3867..accc1d5fba61 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -100,7 +100,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
*/
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
{
- unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+ unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs;
unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
switch (mode) {
@@ -147,8 +147,20 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
{
int spsr_idx = vcpu_spsr32_mode(vcpu);
- if (!vcpu->arch.sysregs_loaded_on_cpu)
- return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+ if (!vcpu->arch.sysregs_loaded_on_cpu) {
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ return __vcpu_sys_reg(vcpu, SPSR_EL1);
+ case KVM_SPSR_ABT:
+ return vcpu->arch.ctxt.spsr_abt;
+ case KVM_SPSR_UND:
+ return vcpu->arch.ctxt.spsr_und;
+ case KVM_SPSR_IRQ:
+ return vcpu->arch.ctxt.spsr_irq;
+ case KVM_SPSR_FIQ:
+ return vcpu->arch.ctxt.spsr_fiq;
+ }
+ }
switch (spsr_idx) {
case KVM_SPSR_SVC:
@@ -171,7 +183,24 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
int spsr_idx = vcpu_spsr32_mode(vcpu);
if (!vcpu->arch.sysregs_loaded_on_cpu) {
- vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = v;
+ break;
+ case KVM_SPSR_ABT:
+ vcpu->arch.ctxt.spsr_abt = v;
+ break;
+ case KVM_SPSR_UND:
+ vcpu->arch.ctxt.spsr_und = v;
+ break;
+ case KVM_SPSR_IRQ:
+ vcpu->arch.ctxt.spsr_irq = v;
+ break;
+ case KVM_SPSR_FIQ:
+ vcpu->arch.ctxt.spsr_fiq = v;
+ break;
+ }
+
return;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 6ed36be51b4b..ee33875c5c2a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -42,6 +42,11 @@ static u32 kvm_ipa_limit;
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
+static bool system_has_full_ptr_auth(void)
+{
+ return system_supports_address_auth() && system_supports_generic_auth();
+}
+
/**
* kvm_arch_vm_ioctl_check_extension
*
@@ -80,8 +85,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
case KVM_CAP_ARM_PTRAUTH_GENERIC:
- r = has_vhe() && system_supports_address_auth() &&
- system_supports_generic_auth();
+ r = system_has_full_ptr_auth();
break;
default:
r = 0;
@@ -205,19 +209,14 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
{
- /* Support ptrauth only if the system supports these capabilities. */
- if (!has_vhe())
- return -EINVAL;
-
- if (!system_supports_address_auth() ||
- !system_supports_generic_auth())
- return -EINVAL;
/*
* For now make sure that both address/generic pointer authentication
- * features are requested by the userspace together.
+ * features are requested by the userspace together and the system
+ * supports these capabilities.
*/
if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
- !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features))
+ !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) ||
+ !system_has_full_ptr_auth())
return -EINVAL;
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
@@ -292,7 +291,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset core registers */
memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
- vcpu_gp_regs(vcpu)->regs.pstate = pstate;
+ vcpu_gp_regs(vcpu)->pstate = pstate;
/* Reset system registers */
kvm_reset_sys_regs(vcpu);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d3196671c590..077293b5115f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -94,6 +94,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break;
case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
@@ -133,6 +134,7 @@ static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
@@ -242,6 +244,25 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_actlr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
+
+ if (p->is_aarch32) {
+ if (r->Op2 & 2)
+ p->regval = upper_32_bits(p->regval);
+ else
+ p->regval = lower_32_bits(p->regval);
+ }
+
+ return true;
+}
+
/*
* Trap handler for the GICv3 SGI generation system register.
* Forward the request to the VGIC emulation.
@@ -615,6 +636,12 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
}
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 actlr = read_sysreg(actlr_el1);
+ vcpu_write_sys_reg(vcpu, actlr, ACTLR_EL1);
+}
+
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 mpidr;
@@ -1518,6 +1545,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_UNALLOCATED(7,7),
{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
+ { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
@@ -1957,6 +1985,8 @@ static const struct sys_reg_desc cp14_64_regs[] = {
static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr },
{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr },
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
@@ -2109,36 +2139,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
return 0;
}
-/* Target specific emulation tables */
-static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
-
-void kvm_register_target_sys_reg_table(unsigned int target,
- struct kvm_sys_reg_target_table *table)
-{
- if (check_sysreg_table(table->table64.table, table->table64.num, false) ||
- check_sysreg_table(table->table32.table, table->table32.num, true))
- return;
-
- target_tables[target] = table;
-}
-
-/* Get specific register table for this target. */
-static const struct sys_reg_desc *get_target_table(unsigned target,
- bool mode_is_64,
- size_t *num)
-{
- struct kvm_sys_reg_target_table *table;
-
- table = target_tables[target];
- if (mode_is_64) {
- *num = table->table64.num;
- return table->table64.table;
- } else {
- *num = table->table32.num;
- return table->table32.table;
- }
-}
-
static int match_sys_reg(const void *key, const void *elt)
{
const unsigned long pval = (unsigned long)key;
@@ -2156,7 +2156,7 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
@@ -2220,10 +2220,10 @@ static int emulate_cp(struct kvm_vcpu *vcpu,
static void unhandled_cp_access(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
- u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+ u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
int cp = -1;
- switch(hsr_ec) {
+ switch (esr_ec) {
case ESR_ELx_EC_CP15_32:
case ESR_ELx_EC_CP15_64:
cp = 15;
@@ -2249,22 +2249,20 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
*/
static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *global,
- size_t nr_global,
- const struct sys_reg_desc *target_specific,
- size_t nr_specific)
+ size_t nr_global)
{
struct sys_reg_params params;
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
- int Rt2 = (hsr >> 10) & 0x1f;
+ int Rt2 = (esr >> 10) & 0x1f;
params.is_aarch32 = true;
params.is_32bit = false;
- params.CRm = (hsr >> 1) & 0xf;
- params.is_write = ((hsr & 1) == 0);
+ params.CRm = (esr >> 1) & 0xf;
+ params.is_write = ((esr & 1) == 0);
params.Op0 = 0;
- params.Op1 = (hsr >> 16) & 0xf;
+ params.Op1 = (esr >> 16) & 0xf;
params.Op2 = 0;
params.CRn = 0;
@@ -2278,14 +2276,11 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
}
/*
- * Try to emulate the coprocessor access using the target
- * specific table first, and using the global table afterwards.
- * If either of the tables contains a handler, handle the
+ * If the table contains a handler, handle the
* potential register operation in the case of a read and return
* with success.
*/
- if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
- !emulate_cp(vcpu, &params, global, nr_global)) {
+ if (!emulate_cp(vcpu, &params, global, nr_global)) {
/* Split up the value between registers for the read side */
if (!params.is_write) {
vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
@@ -2306,26 +2301,23 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
*/
static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *global,
- size_t nr_global,
- const struct sys_reg_desc *target_specific,
- size_t nr_specific)
+ size_t nr_global)
{
struct sys_reg_params params;
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
params.is_aarch32 = true;
params.is_32bit = true;
- params.CRm = (hsr >> 1) & 0xf;
+ params.CRm = (esr >> 1) & 0xf;
params.regval = vcpu_get_reg(vcpu, Rt);
- params.is_write = ((hsr & 1) == 0);
- params.CRn = (hsr >> 10) & 0xf;
+ params.is_write = ((esr & 1) == 0);
+ params.CRn = (esr >> 10) & 0xf;
params.Op0 = 0;
- params.Op1 = (hsr >> 14) & 0x7;
- params.Op2 = (hsr >> 17) & 0x7;
+ params.Op1 = (esr >> 14) & 0x7;
+ params.Op2 = (esr >> 17) & 0x7;
- if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
- !emulate_cp(vcpu, &params, global, nr_global)) {
+ if (!emulate_cp(vcpu, &params, global, nr_global)) {
if (!params.is_write)
vcpu_set_reg(vcpu, Rt, params.regval);
return 1;
@@ -2335,40 +2327,24 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
return 1;
}
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
{
- const struct sys_reg_desc *target_specific;
- size_t num;
-
- target_specific = get_target_table(vcpu->arch.target, false, &num);
- return kvm_handle_cp_64(vcpu,
- cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
- target_specific, num);
+ return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs));
}
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
{
- const struct sys_reg_desc *target_specific;
- size_t num;
-
- target_specific = get_target_table(vcpu->arch.target, false, &num);
- return kvm_handle_cp_32(vcpu,
- cp15_regs, ARRAY_SIZE(cp15_regs),
- target_specific, num);
+ return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
}
-int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_64(vcpu,
- cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
- NULL, 0);
+ return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs));
}
-int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_32(vcpu,
- cp14_regs, ARRAY_SIZE(cp14_regs),
- NULL, 0);
+ return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
}
static bool is_imp_def_sys_reg(struct sys_reg_params *params)
@@ -2380,15 +2356,9 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params)
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
- size_t num;
- const struct sys_reg_desc *table, *r;
+ const struct sys_reg_desc *r;
- table = get_target_table(vcpu->arch.target, true, &num);
-
- /* Search target-specific then generic table. */
- r = find_reg(params, table, num);
- if (!r)
- r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+ r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
if (likely(r)) {
perform_access(vcpu, params, r);
@@ -2403,25 +2373,30 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
return 1;
}
-static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *table, size_t num)
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
{
unsigned long i;
- for (i = 0; i < num; i++)
- if (table[i].reset)
- table[i].reset(vcpu, &table[i]);
+ for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++)
+ if (sys_reg_descs[i].reset)
+ sys_reg_descs[i].reset(vcpu, &sys_reg_descs[i]);
}
/**
* kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
* @vcpu: The VCPU pointer
- * @run: The kvm_run struct
*/
-int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
{
struct sys_reg_params params;
- unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
int ret;
@@ -2492,8 +2467,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
u64 id)
{
- size_t num;
- const struct sys_reg_desc *table, *r;
+ const struct sys_reg_desc *r;
struct sys_reg_params params;
/* We only do sys_reg for now. */
@@ -2503,10 +2477,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
if (!index_to_params(id, &params))
return NULL;
- table = get_target_table(vcpu->arch.target, true, &num);
- r = find_reg(&params, table, num);
- if (!r)
- r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+ r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
/* Not saved in the sys_reg array and not otherwise accessible? */
if (r && !(r->reg || r->get_user))
@@ -2806,35 +2777,17 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
/* Assumed ordered tables, see kvm_sys_reg_table_init. */
static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
{
- const struct sys_reg_desc *i1, *i2, *end1, *end2;
+ const struct sys_reg_desc *i2, *end2;
unsigned int total = 0;
- size_t num;
int err;
- /* We check for duplicates here, to allow arch-specific overrides. */
- i1 = get_target_table(vcpu->arch.target, true, &num);
- end1 = i1 + num;
i2 = sys_reg_descs;
end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
- BUG_ON(i1 == end1 || i2 == end2);
-
- /* Walk carefully, as both tables may refer to the same register. */
- while (i1 || i2) {
- int cmp = cmp_sys_reg(i1, i2);
- /* target-specific overrides generic entry. */
- if (cmp <= 0)
- err = walk_one_sys_reg(vcpu, i1, &uind, &total);
- else
- err = walk_one_sys_reg(vcpu, i2, &uind, &total);
-
+ while (i2 != end2) {
+ err = walk_one_sys_reg(vcpu, i2++, &uind, &total);
if (err)
return err;
-
- if (cmp <= 0 && ++i1 == end1)
- i1 = NULL;
- if (cmp >= 0 && ++i2 == end2)
- i2 = NULL;
}
return total;
}
@@ -2901,22 +2854,3 @@ void kvm_sys_reg_table_init(void)
/* Clear all higher bits. */
cache_levels &= (1 << (i*3))-1;
}
-
-/**
- * kvm_reset_sys_regs - sets system registers to reset value
- * @vcpu: The VCPU pointer
- *
- * This function finds the right table above and sets the registers on the
- * virtual CPU struct to their architecturally defined reset values.
- */
-void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
-{
- size_t num;
- const struct sys_reg_desc *table;
-
- /* Generic chip reset first (so target could override). */
- reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
-
- table = get_target_table(vcpu->arch.target, true, &num);
- reset_sys_reg_descs(vcpu, table, num);
-}
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
deleted file mode 100644
index aa9d356451eb..000000000000
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Based on arch/arm/kvm/coproc_a15.c:
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Authors: Rusty Russell <rusty@rustcorp.au>
- * Christoffer Dall <c.dall@virtualopensystems.com>
- */
-#include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
-#include <asm/sysreg.h>
-#include <linux/init.h>
-
-#include "sys_regs.h"
-
-static bool access_actlr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
-
- if (p->is_aarch32) {
- if (r->Op2 & 2)
- p->regval = upper_32_bits(p->regval);
- else
- p->regval = lower_32_bits(p->regval);
- }
-
- return true;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
-{
- __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
-}
-
-/*
- * Implementation specific sys-reg registers.
- * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
- */
-static const struct sys_reg_desc genericv8_sys_regs[] = {
- { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
-};
-
-static const struct sys_reg_desc genericv8_cp15_regs[] = {
- /* ACTLR */
- { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
- access_actlr },
- { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011),
- access_actlr },
-};
-
-static struct kvm_sys_reg_target_table genericv8_target_table = {
- .table64 = {
- .table = genericv8_sys_regs,
- .num = ARRAY_SIZE(genericv8_sys_regs),
- },
- .table32 = {
- .table = genericv8_cp15_regs,
- .num = ARRAY_SIZE(genericv8_cp15_regs),
- },
-};
-
-static int __init sys_reg_genericv8_init(void)
-{
- unsigned int i;
-
- for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
- BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
- &genericv8_sys_regs[i]) >= 0);
-
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8,
- &genericv8_target_table);
-
- return 0;
-}
-late_initcall(sys_reg_genericv8_init);
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index 4c71270cc097..4691053c5ee4 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -301,8 +301,8 @@ TRACE_EVENT(kvm_timer_save_state,
),
TP_fast_assign(
- __entry->ctl = ctx->cnt_ctl;
- __entry->cval = ctx->cnt_cval;
+ __entry->ctl = timer_get_ctl(ctx);
+ __entry->cval = timer_get_cval(ctx);
__entry->timer_idx = arch_timer_ctx_index(ctx);
),
@@ -323,8 +323,8 @@ TRACE_EVENT(kvm_timer_restore_state,
),
TP_fast_assign(
- __entry->ctl = ctx->cnt_ctl;
- __entry->cval = ctx->cnt_cval;
+ __entry->ctl = timer_get_ctl(ctx);
+ __entry->cval = timer_get_cval(ctx);
__entry->timer_idx = arch_timer_ctx_index(ctx);
),
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index a4f48c1ac28c..e0404bcab019 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -48,7 +48,7 @@ __init void kvm_compute_layout(void)
va_mask = GENMASK_ULL(tag_lsb - 1, 0);
tag_val = hyp_va_msb;
- if (tag_lsb != (vabits_actual - 1)) {
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) {
/* We have some free bits to insert a random tag. */
tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
}
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index d8cdfea5cc96..79f8899b234c 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -100,19 +100,33 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
/**
* kvm_arch_set_irq_inatomic: fast-path for irqfd injection
- *
- * Currently only direct MSI injection is supported.
*/
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
- if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) {
+ if (!level)
+ return -EWOULDBLOCK;
+
+ switch (e->type) {
+ case KVM_IRQ_ROUTING_MSI: {
struct kvm_msi msi;
+ if (!vgic_has_its(kvm))
+ break;
+
kvm_populate_msi(e, &msi);
- if (!vgic_its_inject_cached_translation(kvm, &msi))
- return 0;
+ return vgic_its_inject_cached_translation(kvm, &msi);
+ }
+
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ /*
+ * Injecting SPIs is always possible in atomic context
+ * as long as the damn vgic is initialized.
+ */
+ if (unlikely(!vgic_initialized(kvm)))
+ break;
+ return vgic_irqfd_set_irq(e, kvm, irq_source_id, 1, line_status);
}
return -EWOULDBLOCK;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index c012a52b19f5..40cbaca81333 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -757,9 +757,8 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
db = (u64)msi->address_hi << 32 | msi->address_lo;
irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data);
-
if (!irq)
- return -1;
+ return -EWOULDBLOCK;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index d2339a2b9fb9..5c786b915cd3 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -389,7 +389,7 @@ u64 vgic_sanitise_outer_cacheability(u64 field)
case GIC_BASER_CACHE_nC:
return field;
default:
- return GIC_BASER_CACHE_nC;
+ return GIC_BASER_CACHE_SameAsInner;
}
}