aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/events/amd/ibs.c18
-rw-r--r--arch/x86/include/asm/acenv.h14
-rw-r--r--arch/x86/include/asm/archrandom.h12
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/intel-family.h11
-rw-r--r--arch/x86/include/asm/kexec.h9
-rw-r--r--arch/x86/include/asm/msr-index.h25
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/suspend_32.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h12
-rw-r--r--arch/x86/include/asm/timex.h9
-rw-r--r--arch/x86/include/asm/tsc.h7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c235
-rw-r--r--arch/x86/kernel/cpu/common.c52
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/step.c3
-rw-r--r--arch/x86/kernel/sys_x86_64.c7
-rw-r--r--arch/x86/kvm/vmx.c77
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/mm/pat.c2
-rw-r--r--arch/x86/pci/xen.c5
-rw-r--r--arch/x86/um/ldt.c6
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_64.h5
26 files changed, 452 insertions, 76 deletions
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 46caca4d9141..a1c31bb23170 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -329,7 +329,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
static __init int vdso_setup(char *s)
{
vdso64_enabled = simple_strtoul(s, NULL, 0);
- return 0;
+ return 1;
}
__setup("vdso=", vdso_setup);
#endif
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 2d9e1372b070..d157d0adef06 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -324,6 +324,16 @@ static int perf_ibs_init(struct perf_event *event)
hwc->config_base = perf_ibs->msr;
hwc->config = config;
+ /*
+ * rip recorded by IbsOpRip will not be consistent with rsp and rbp
+ * recorded as part of interrupt regs. Thus we need to use rip from
+ * interrupt regs while unwinding call stack. Setting _EARLY flag
+ * makes sure we unwind call-stack before perf sample rip is set to
+ * IbsOpRip.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
+
return 0;
}
@@ -693,6 +703,14 @@ fail:
data.raw = &raw;
}
+ /*
+ * rip recorded by IbsOpRip will not be consistent with rsp and rbp
+ * recorded as part of interrupt regs. Thus we need to use rip from
+ * interrupt regs while unwinding call stack.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ data.callchain = perf_callchain(event, iregs);
+
throttle = perf_event_overflow(event, &data, &regs);
out:
if (throttle) {
diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 1b010a859b8b..6de59a4f723c 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -16,7 +16,19 @@
/* Asm macros */
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+/*
+ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
+ * It is required to prevent data loss.
+ *
+ * While running inside virtual machine, the kernel can bypass cache flushing.
+ * Changing sleep state in a virtual machine doesn't affect the host system
+ * sleep state and cannot lead to data loss.
+ */
+#define ACPI_FLUSH_CPU_CACHE() \
+do { \
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \
+ wbinvd(); \
+} while (0)
int __acpi_acquire_global_lock(unsigned int *lock);
int __acpi_release_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 3ac991d81e74..4d3cac3c9b25 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -86,10 +86,6 @@ static inline bool rdseed_int(unsigned int *v)
return ok;
}
-/* Conditional execution based on CPU type */
-#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND)
-#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
-
/*
* These are the generic interfaces; they must not be declared if the
* stubs in <linux/random.h> are to be invoked,
@@ -99,22 +95,22 @@ static inline bool rdseed_int(unsigned int *v)
static inline bool arch_get_random_long(unsigned long *v)
{
- return arch_has_random() ? rdrand_long(v) : false;
+ return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false;
}
static inline bool arch_get_random_int(unsigned int *v)
{
- return arch_has_random() ? rdrand_int(v) : false;
+ return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false;
}
static inline bool arch_get_random_seed_long(unsigned long *v)
{
- return arch_has_random_seed() ? rdseed_long(v) : false;
+ return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false;
}
static inline bool arch_get_random_seed_int(unsigned int *v)
{
- return arch_has_random_seed() ? rdseed_int(v) : false;
+ return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false;
}
extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8910dde943d1..8025b7da0495 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -394,5 +394,6 @@
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 2a8e5f78e23c..db06f16627b0 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -64,6 +64,14 @@
#define INTEL_FAM6_COMETLAKE 0xA5
#define INTEL_FAM6_COMETLAKE_L 0xA6
+#define INTEL_FAM6_ROCKETLAKE 0xA7
+
+/* Hybrid Core/Atom Processors */
+
+#define INTEL_FAM6_LAKEFIELD 0x8A
+#define INTEL_FAM6_ALDERLAKE 0x97
+#define INTEL_FAM6_ALDERLAKE_L 0x9A
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
@@ -83,7 +91,10 @@
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+
#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
+#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
+#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5125fca472bb..0a41c62369f2 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -21,6 +21,7 @@
#ifndef __ASSEMBLY__
#include <linux/string.h>
+#include <linux/module.h>
#include <asm/page.h>
#include <asm/ptrace.h>
@@ -217,6 +218,14 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+#ifdef CONFIG_KEXEC_FILE
+struct purgatory_info;
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab);
+#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+#endif
#endif
typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 892af8ab95d7..586be095ed08 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -96,6 +96,30 @@
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -113,6 +137,7 @@
/* SRBDS support */
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0)
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 812976a6d864..d3d68b6763d4 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -323,6 +323,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
#include <asm/segment.h>
/**
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 8be6afb58471..32662cbaa27e 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -21,7 +21,6 @@ struct saved_context {
#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
struct desc_ptr gdt_desc;
struct desc_ptr idt;
@@ -30,6 +29,7 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ bool misc_enable_saved;
} __attribute__((packed));
#endif /* _ASM_X86_SUSPEND_32_H */
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index a7af9f53c0cb..b2861400c6a2 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -14,9 +14,13 @@
* Image of the saved processor state, used by the low level ACPI suspend to
* RAM code and by the low level hibernation code.
*
- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
- * still work as required.
+ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
+ * and make sure that __save/__restore_processor_state(), defined in
+ * arch/x86/power/cpu.c, still work as required.
+ *
+ * Because the structure is packed, make sure to avoid unaligned members. For
+ * optimisation purposes but also because tools like kmemleak only search for
+ * pointers that are aligned.
*/
struct saved_context {
struct pt_regs regs;
@@ -36,7 +40,6 @@ struct saved_context {
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
unsigned long efer;
u16 gdt_pad; /* Unused */
@@ -48,6 +51,7 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ bool misc_enable_saved;
} __attribute__((packed));
#define loaddebug(thread,register) \
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index a4a8b1b16c0c..956e4145311b 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -5,6 +5,15 @@
#include <asm/processor.h>
#include <asm/tsc.h>
+static inline unsigned long random_get_entropy(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
+ return random_get_entropy_fallback();
+ return rdtsc();
+}
+#define random_get_entropy random_get_entropy
+
/* Assume we use the PIT time source for the clock tick */
#define CLOCK_TICK_RATE PIT_TICK_RATE
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index eb5bbfeccb66..196cf01f58fd 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -22,13 +22,12 @@ extern void disable_TSC(void);
static inline cycles_t get_cycles(void)
{
-#ifndef CONFIG_X86_TSC
- if (!boot_cpu_has(X86_FEATURE_TSC))
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
return 0;
-#endif
-
return rdtsc();
}
+#define get_cycles get_cycles
extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 9791828f3fcd..926939978c1c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -166,7 +166,7 @@ static __init int setup_apicpmtimer(char *s)
{
apic_calibrate_pmtmr = 1;
notsc_setup(NULL);
- return 0;
+ return 1;
}
__setup("apicpmtimer", setup_apicpmtimer);
#endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f7b5f1645507..058e92b93262 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -40,8 +40,10 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
-static void __init mds_print_mitigation(void);
+static void __init md_clear_update_mitigation(void);
+static void __init md_clear_select_mitigation(void);
static void __init taa_select_mitigation(void);
+static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
@@ -76,6 +78,10 @@ EXPORT_SYMBOL_GPL(mds_user_clear);
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
+/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */
+DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
+
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -108,16 +114,9 @@ void __init check_bugs(void)
spectre_v2_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
- mds_select_mitigation();
- taa_select_mitigation();
+ md_clear_select_mitigation();
srbds_select_mitigation();
- /*
- * As MDS and TAA mitigations are inter-related, print MDS
- * mitigation until after TAA mitigation selection is done.
- */
- mds_print_mitigation();
-
arch_smt_update();
#ifdef CONFIG_X86_32
@@ -257,14 +256,6 @@ static void __init mds_select_mitigation(void)
}
}
-static void __init mds_print_mitigation(void)
-{
- if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
- return;
-
- pr_info("%s\n", mds_strings[mds_mitigation]);
-}
-
static int __init mds_cmdline(char *str)
{
if (!boot_cpu_has_bug(X86_BUG_MDS))
@@ -312,7 +303,7 @@ static void __init taa_select_mitigation(void)
/* TSX previously disabled by tsx=off */
if (!boot_cpu_has(X86_FEATURE_RTM)) {
taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
- goto out;
+ return;
}
if (cpu_mitigations_off()) {
@@ -326,7 +317,7 @@ static void __init taa_select_mitigation(void)
*/
if (taa_mitigation == TAA_MITIGATION_OFF &&
mds_mitigation == MDS_MITIGATION_OFF)
- goto out;
+ return;
if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
taa_mitigation = TAA_MITIGATION_VERW;
@@ -358,18 +349,6 @@ static void __init taa_select_mitigation(void)
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
-
- /*
- * Update MDS mitigation, if necessary, as the mds_user_clear is
- * now enabled for TAA mitigation.
- */
- if (mds_mitigation == MDS_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_MDS)) {
- mds_mitigation = MDS_MITIGATION_FULL;
- mds_select_mitigation();
- }
-out:
- pr_info("%s\n", taa_strings[taa_mitigation]);
}
static int __init tsx_async_abort_parse_cmdline(char *str)
@@ -394,6 +373,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "MMIO Stale Data: " fmt
+
+enum mmio_mitigations {
+ MMIO_MITIGATION_OFF,
+ MMIO_MITIGATION_UCODE_NEEDED,
+ MMIO_MITIGATION_VERW,
+};
+
+/* Default mitigation for Processor MMIO Stale Data vulnerabilities */
+static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW;
+static bool mmio_nosmt __ro_after_init = false;
+
+static const char * const mmio_strings[] = {
+ [MMIO_MITIGATION_OFF] = "Vulnerable",
+ [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
+};
+
+static void __init mmio_select_mitigation(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+ cpu_mitigations_off()) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ return;
+ }
+
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ /*
+ * Enable CPU buffer clear mitigation for host and VMM, if also affected
+ * by MDS or TAA. Otherwise, enable mitigation for VMM only.
+ */
+ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
+ boot_cpu_has(X86_FEATURE_RTM)))
+ static_branch_enable(&mds_user_clear);
+ else
+ static_branch_enable(&mmio_stale_data_clear);
+
+ /*
+ * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can
+ * be propagated to uncore buffers, clearing the Fill buffers on idle
+ * is required irrespective of SMT state.
+ */
+ if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ static_branch_enable(&mds_idle_clear);
+
+ /*
+ * Check if the system has the right microcode.
+ *
+ * CPU Fill buffer clear mitigation is enumerated by either an explicit
+ * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
+ * affected systems.
+ */
+ if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
+ boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
+ !(ia32_cap & ARCH_CAP_MDS_NO)))
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ else
+ mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
+
+ if (mmio_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+}
+
+static int __init mmio_stale_data_parse_cmdline(char *str)
+{
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ return 0;
+
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ } else if (!strcmp(str, "full")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ } else if (!strcmp(str, "full,nosmt")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_nosmt = true;
+ }
+
+ return 0;
+}
+early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
+
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static void __init md_clear_update_mitigation(void)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!static_key_enabled(&mds_user_clear))
+ goto out;
+
+ /*
+ * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
+ * mitigation, if necessary.
+ */
+ if (mds_mitigation == MDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MDS)) {
+ mds_mitigation = MDS_MITIGATION_FULL;
+ mds_select_mitigation();
+ }
+ if (taa_mitigation == TAA_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_TAA)) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ taa_select_mitigation();
+ }
+ if (mmio_mitigation == MMIO_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_select_mitigation();
+ }
+out:
+ if (boot_cpu_has_bug(X86_BUG_MDS))
+ pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+ pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
+}
+
+static void __init md_clear_select_mitigation(void)
+{
+ mds_select_mitigation();
+ taa_select_mitigation();
+ mmio_select_mitigation();
+
+ /*
+ * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
+ * and print their mitigation after MDS, TAA and MMIO Stale Data
+ * mitigation selection is done.
+ */
+ md_clear_update_mitigation();
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) "SRBDS: " fmt
enum srbds_mitigations {
@@ -454,11 +578,13 @@ static void __init srbds_select_mitigation(void)
return;
/*
- * Check to see if this is one of the MDS_NO systems supporting
- * TSX that are only exposed to SRBDS when TSX is enabled.
+ * Check to see if this is one of the MDS_NO systems supporting TSX that
+ * are only exposed to SRBDS when TSX is enabled or when CPU is affected
+ * by Processor MMIO Stale Data vulnerability.
*/
ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
+ if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
@@ -1066,6 +1192,8 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1077,14 +1205,17 @@ static void update_mds_branch_idle(void)
if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
return;
- if (sched_smt_active())
+ if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
- else
+ } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
+ (ia32_cap & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
+ }
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
void arch_smt_update(void)
{
@@ -1129,6 +1260,16 @@ void arch_smt_update(void)
break;
}
+ switch (mmio_mitigation) {
+ case MMIO_MITIGATION_VERW:
+ case MMIO_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(MMIO_MSG_SMT);
+ break;
+ case MMIO_MITIGATION_OFF:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -1680,6 +1821,20 @@ static ssize_t tsx_async_abort_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t mmio_stale_data_show_state(char *buf)
+{
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ return sysfs_emit(buf, "%s; SMT Host state unknown\n",
+ mmio_strings[mmio_mitigation]);
+ }
+
+ return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation],
+ sched_smt_active() ? "vulnerable" : "disabled");
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
@@ -1780,6 +1935,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_SRBDS:
return srbds_show_state(buf);
+ case X86_BUG_MMIO_STALE_DATA:
+ return mmio_stale_data_show_state(buf);
+
default:
break;
}
@@ -1831,4 +1989,9 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
}
+
+ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7e7400af7797..389d11b2c70e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1019,18 +1019,42 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
X86_FEATURE_ANY, issues)
#define SRBDS BIT(0)
+/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
+#define MMIO BIT(1)
+/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
+#define MMIO_SBDS BIT(2)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_XEON_D,X86_STEPPINGS(0x3, 0x5), MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
+ BIT(7) | BIT(0xB), MMIO),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0x8), SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0x8), SRBDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPINGS(0x1, 0x1), MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
{}
};
@@ -1051,6 +1075,13 @@ u64 x86_read_arch_cap_msr(void)
return ia32_cap;
}
+static bool arch_cap_mmio_immune(u64 ia32_cap)
+{
+ return (ia32_cap & ARCH_CAP_FBSDP_NO &&
+ ia32_cap & ARCH_CAP_PSDP_NO &&
+ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
@@ -1102,12 +1133,27 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
/*
* SRBDS affects CPUs which support RDRAND or RDSEED and are listed
* in the vulnerability blacklist.
+ *
+ * Some of the implications and mitigation of Shared Buffers Data
+ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
+ * SRBDS.
*/
if ((cpu_has(c, X86_FEATURE_RDRAND) ||
cpu_has(c, X86_FEATURE_RDSEED)) &&
- cpu_matches(cpu_vuln_blacklist, SRBDS))
+ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
setup_force_cpu_bug(X86_BUG_SRBDS);
+ /*
+ * Processor MMIO Stale Data bug enumeration
+ *
+ * Affected CPU list is generally enough to enumerate the vulnerability,
+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+ * not want the guest to enumerate the bug.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+ !arch_cap_mmio_immune(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a5287b18a63f..76692590eff8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -71,7 +71,7 @@ static bool ring3mwait_disabled __read_mostly;
static int __init ring3mwait_disable(char *__unused)
{
ring3mwait_disabled = true;
- return 0;
+ return 1;
}
__setup("ring3mwait=disable", ring3mwait_disable);
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 60d2c3798ba2..2f97d1a1032f 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -175,8 +175,7 @@ void set_task_blockstep(struct task_struct *task, bool on)
*
* NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
* task is current or it can't be running, otherwise we can race
- * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
- * PTRACE_KILL is not safe.
+ * with __switch_to_xtra(). We rely on ptrace_freeze_traced().
*/
local_irq_disable();
debugctl = get_debugctlmsr();
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 6a78d4b36a79..9fb266c797fb 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -70,9 +70,6 @@ static int __init control_va_addr_alignment(char *str)
if (*str == 0)
return 1;
- if (*str == '=')
- str++;
-
if (!strcmp(str, "32"))
va_align.flags = ALIGN_VA_32;
else if (!strcmp(str, "64"))
@@ -82,11 +79,11 @@ static int __init control_va_addr_alignment(char *str)
else if (!strcmp(str, "on"))
va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
else
- return 0;
+ pr_warn("invalid option value: 'align_va_addr=%s'\n", str);
return 1;
}
-__setup("align_va_addr", control_va_addr_alignment);
+__setup("align_va_addr=", control_va_addr_alignment);
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 77b9ed5223f3..44cce3e8eb18 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -212,6 +212,9 @@ static const struct {
#define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages;
+/* Control for disabling CPU Fill buffer clear */
+static bool __read_mostly vmx_fb_clear_ctrl_available;
+
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
struct page *page;
@@ -1045,6 +1048,8 @@ struct vcpu_vmx {
u64 msr_ia32_feature_control;
u64 msr_ia32_feature_control_valid_bits;
u64 ept_pointer;
+ u64 msr_ia32_mcu_opt_ctrl;
+ bool disable_fb_clear;
};
enum segment_cache_field {
@@ -2107,6 +2112,60 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
BUG_ON(error);
}
+static void vmx_setup_fb_clear_ctrl(void)
+{
+ u64 msr;
+
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_TAA)) {
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+ if (msr & ARCH_CAP_FB_CLEAR_CTRL)
+ vmx_fb_clear_ctrl_available = true;
+ }
+}
+
+static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
+{
+ u64 msr;
+
+ if (!vmx->disable_fb_clear)
+ return;
+
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ msr |= FB_CLEAR_DIS;
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ /* Cache the MSR value to avoid reading it later */
+ vmx->msr_ia32_mcu_opt_ctrl = msr;
+}
+
+static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
+{
+ if (!vmx->disable_fb_clear)
+ return;
+
+ vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+}
+
+static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+{
+ vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+
+ /*
+ * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
+ * at VMEntry. Skip the MSR read/write when a guest has no use case to
+ * execute VERW.
+ */
+ if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) ||
+ ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO)))
+ vmx->disable_fb_clear = false;
+}
+
static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
{
int i;
@@ -4314,9 +4373,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
}
- ret = kvm_set_msr_common(vcpu, msr_info);
+ ret = kvm_set_msr_common(vcpu, msr_info);
}
+ /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */
+ if (msr_index == MSR_IA32_ARCH_CAPABILITIES)
+ vmx_update_fb_clear_dis(vcpu, vmx);
+
return ret;
}
@@ -6761,6 +6824,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vpid_sync_context(vmx->vpid);
if (init_event)
vmx_clear_hlt(vcpu);
+
+ vmx_update_fb_clear_dis(vcpu, vmx);
}
/*
@@ -10773,6 +10838,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
+ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_arch_has_assigned_device(vcpu->kvm))
+ mds_clear_cpu_buffers();
+
+ vmx_disable_fb_clear(vmx);
asm volatile (
/* Store host registers */
@@ -10918,6 +10988,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ vmx_enable_fb_clear(vmx);
+
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
@@ -14623,8 +14695,11 @@ static int __init vmx_init(void)
}
}
+ vmx_setup_fb_clear_ctrl();
+
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9829e0fbdd41..11391b7ace1e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1220,6 +1220,10 @@ u64 kvm_get_arch_capabilities(void)
/* KVM does not emulate MSR_IA32_TSX_CTRL. */
data &= ~ARCH_CAP_TSX_CTRL_MSR;
+
+ /* Guests don't need to know "Fill buffer clear control" exists */
+ data &= ~ARCH_CAP_FB_CLEAR_CTRL;
+
return data;
}
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 614c2c6b1959..68ca883abfdb 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -43,8 +43,8 @@ static void delay_loop(unsigned long loops)
" jnz 2b \n"
"3: dec %0 \n"
- : /* we don't need output */
- :"a" (loops)
+ : "+a" (loops)
+ :
);
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 324e26d0607b..b33304c0042a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -74,7 +74,7 @@ int pat_debug_enable;
static int __init pat_debug_setup(char *str)
{
pat_debug_enable = 1;
- return 0;
+ return 1;
}
__setup("debugpat", pat_debug_setup);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 22da9bfd8a45..bacf8d988f65 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -441,6 +441,11 @@ void __init xen_msi_init(void)
x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ /*
+ * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
+ * controlled by the hypervisor.
+ */
+ pci_msi_ignore_mask = 1;
}
#endif
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 3ee234b6234d..255a44dd415a 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -23,9 +23,11 @@ static long write_ldt_entry(struct mm_id *mm_idp, int func,
{
long res;
void *stub_addr;
+
+ BUILD_BUG_ON(sizeof(*desc) % sizeof(long));
+
res = syscall_stub_data(mm_idp, (unsigned long *)desc,
- (sizeof(*desc) + sizeof(long) - 1) &
- ~(sizeof(long) - 1),
+ sizeof(*desc) / sizeof(long),
addr, &stub_addr);
if (!res) {
unsigned long args[] = { func,
diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h
index 8a7d5e1da98e..1e6875b4ffd8 100644
--- a/arch/x86/um/shared/sysdep/syscalls_64.h
+++ b/arch/x86/um/shared/sysdep/syscalls_64.h
@@ -10,13 +10,12 @@
#include <linux/msg.h>
#include <linux/shm.h>
-typedef long syscall_handler_t(void);
+typedef long syscall_handler_t(long, long, long, long, long, long);
extern syscall_handler_t *sys_call_table[];
#define EXECUTE_SYSCALL(syscall, regs) \
- (((long (*)(long, long, long, long, long, long)) \
- (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
+ (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
UPT_SYSCALL_ARG2(&regs->regs), \
UPT_SYSCALL_ARG3(&regs->regs), \
UPT_SYSCALL_ARG4(&regs->regs), \