aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig20
-rw-r--r--arch/x86/boot/bioscall.S4
-rw-r--r--arch/x86/boot/boot.h36
-rw-r--r--arch/x86/boot/main.c2
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/entry/entry_64.S8
-rw-r--r--arch/x86/entry/vdso/vma.c4
-rw-r--r--arch/x86/events/intel/uncore_snbep.c1
-rw-r--r--arch/x86/ia32/ia32_aout.c3
-rw-r--r--arch/x86/include/asm/bugs.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/fpu/internal.h2
-rw-r--r--arch/x86/include/asm/i8259.h2
-rw-r--r--arch/x86/include/asm/intel-family.h5
-rw-r--r--arch/x86/include/asm/mem_encrypt.h2
-rw-r--r--arch/x86/include/asm/microcode.h5
-rw-r--r--arch/x86/include/asm/microcode_amd.h6
-rw-r--r--arch/x86/include/asm/msr-index.h21
-rw-r--r--arch/x86/include/asm/numa.h7
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/reboot.h2
-rw-r--r--arch/x86/include/asm/setup.h46
-rw-r--r--arch/x86/include/asm/virtext.h22
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/alternative.c15
-rw-r--r--arch/x86/kernel/apic/apic.c5
-rw-r--r--arch/x86/kernel/apic/io_apic.c14
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c5
-rw-r--r--arch/x86/kernel/apm_32.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c220
-rw-r--r--arch/x86/kernel/cpu/bugs.c246
-rw-r--r--arch/x86/kernel/cpu/common.c119
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c26
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c55
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c6
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c8
-rw-r--r--arch/x86/kernel/cpu/topology.c5
-rw-r--r--arch/x86/kernel/crash.c17
-rw-r--r--arch/x86/kernel/dumpstack.c11
-rw-r--r--arch/x86/kernel/fpu/init.c8
-rw-r--r--arch/x86/kernel/fpu/xstate.c8
-rw-r--r--arch/x86/kernel/i8259.c39
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/kprobes/opt.c6
-rw-r--r--arch/x86/kernel/quirks.c10
-rw-r--r--arch/x86/kernel/reboot.c88
-rw-r--r--arch/x86/kernel/smp.c6
-rw-r--r--arch/x86/kernel/smpboot.c25
-rw-r--r--arch/x86/kernel/sysfb_efi.c16
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c (renamed from arch/x86/kvm/pmu_intel.c)0
-rw-r--r--arch/x86/kvm/vmx/vmx.c (renamed from arch/x86/kvm/vmx.c)62
-rw-r--r--arch/x86/kvm/vmx/vmx_evmcs.h (renamed from arch/x86/kvm/vmx_evmcs.h)0
-rw-r--r--arch/x86/kvm/vmx/vmx_shadow_fields.h (renamed from arch/x86/kvm/vmx_shadow_fields.h)0
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lib/iomap_copy_64.S2
-rw-r--r--arch/x86/lib/misc.c2
-rw-r--r--arch/x86/mm/init.c25
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c3
-rw-r--r--arch/x86/mm/numa.c7
-rw-r--r--arch/x86/pci/fixup.c21
-rw-r--r--arch/x86/purgatory/Makefile10
-rw-r--r--arch/x86/um/vdso/um_vdso.c12
-rw-r--r--arch/x86/xen/smp.c41
-rw-r--r--arch/x86/xen/smp_pv.c14
-rw-r--r--arch/x86/xen/spinlock.c6
-rw-r--r--arch/x86/xen/xen-ops.h2
71 files changed, 986 insertions, 427 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index be4403a8e1b4..9fd2e3c2494a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,6 +50,7 @@ config X86
select ARCH_CLOCKSOURCE_DATA
select ARCH_DISCARD_MEMBLOCK
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
@@ -2437,6 +2438,25 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y
depends on X86_64 || X86_PAE
+config GDS_FORCE_MITIGATION
+ bool "Force GDS Mitigation"
+ depends on CPU_SUP_INTEL
+ default n
+ help
+ Gather Data Sampling (GDS) is a hardware vulnerability which allows
+ unprivileged speculative access to data which was previously stored in
+ vector registers.
+
+ This option is equivalent to setting gather_data_sampling=force on the
+ command line. The microcode mitigation is used if present, otherwise
+ AVX is disabled as a mitigation. On affected systems that are missing
+ the microcode any userspace code that unconditionally uses AVX will
+ break with this option set.
+
+ Setting this option on systems not vulnerable to GDS has no effect.
+
+ If in doubt, say N.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on X86_64 && HUGETLB_PAGE && MIGRATION
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index d401b4a262b0..a860390852ff 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -35,7 +35,7 @@ intcall:
movw %dx, %si
movw %sp, %di
movw $11, %cx
- rep; movsd
+ rep; movsl
/* Pop full state from the stack */
popal
@@ -70,7 +70,7 @@ intcall:
jz 4f
movw %sp, %si
movw $11, %cx
- rep; movsd
+ rep; movsl
4: addw $44, %sp
/* Restore state and return */
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index ef5a9cc66fb8..a4c5fb92b1cb 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -114,66 +114,78 @@ typedef unsigned int addr_t;
static inline u8 rdfs8(addr_t addr)
{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
u8 v;
- asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+ asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
return v;
}
static inline u16 rdfs16(addr_t addr)
{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
u16 v;
- asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+ asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline u32 rdfs32(addr_t addr)
{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
u32 v;
- asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+ asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline void wrfs8(u8 v, addr_t addr)
{
- asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
}
static inline void wrfs16(u16 v, addr_t addr)
{
- asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
}
static inline void wrfs32(u32 v, addr_t addr)
{
- asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
}
static inline u8 rdgs8(addr_t addr)
{
+ u8 *ptr = (u8 *)absolute_pointer(addr);
u8 v;
- asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+ asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
return v;
}
static inline u16 rdgs16(addr_t addr)
{
+ u16 *ptr = (u16 *)absolute_pointer(addr);
u16 v;
- asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+ asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline u32 rdgs32(addr_t addr)
{
+ u32 *ptr = (u32 *)absolute_pointer(addr);
u32 v;
- asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+ asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
return v;
}
static inline void wrgs8(u8 v, addr_t addr)
{
- asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+ u8 *ptr = (u8 *)absolute_pointer(addr);
+ asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
}
static inline void wrgs16(u16 v, addr_t addr)
{
- asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+ u16 *ptr = (u16 *)absolute_pointer(addr);
+ asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
}
static inline void wrgs32(u32 v, addr_t addr)
{
- asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+ u32 *ptr = (u32 *)absolute_pointer(addr);
+ asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
}
/* Note: these only return true/false, not a signed return value! */
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 9bcea386db65..98b4ff1c51f3 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -34,7 +34,7 @@ static void copy_boot_params(void)
u16 cl_offset;
};
const struct old_cmdline * const oldcmd =
- (const struct old_cmdline *)OLD_CL_ADDRESS;
+ absolute_pointer(OLD_CL_ADDRESS);
BUILD_BUG_ON(sizeof boot_params != 4096);
memcpy(&boot_params.hdr, &hdr, sizeof hdr);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 78b308f2f2ea..e6c258bf9511 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1500,13 +1500,13 @@ ENTRY(async_page_fault)
END(async_page_fault)
#endif
-ENTRY(rewind_stack_do_exit)
+ENTRY(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
- call do_exit
+ call make_task_dead
1: jmp 1b
-END(rewind_stack_do_exit)
+END(rewind_stack_and_make_dead)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index bb2e1818408e..bf08de4232e3 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1759,10 +1759,10 @@ END(nmi)
ENTRY(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
- sysret
+ sysretl
END(ignore_sysret)
-ENTRY(rewind_stack_do_exit)
+ENTRY(rewind_stack_and_make_dead)
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
@@ -1771,5 +1771,5 @@ ENTRY(rewind_stack_do_exit)
leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS
- call do_exit
-END(rewind_stack_do_exit)
+ call make_task_dead
+END(rewind_stack_and_make_dead)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index a1c31bb23170..a3cd828359f8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -228,8 +228,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
/* Round the lowest possible end address up to a PMD boundary. */
end = (start + len + PMD_SIZE - 1) & PMD_MASK;
- if (end >= TASK_SIZE_MAX)
- end = TASK_SIZE_MAX;
+ if (end >= DEFAULT_MAP_WINDOW)
+ end = DEFAULT_MAP_WINDOW;
end -= len;
if (end > start) {
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 2bf1170f7afd..34da6d27d839 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2699,6 +2699,7 @@ static bool hswep_has_limit_sbox(unsigned int device)
return false;
pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4);
+ pci_dev_put(dev);
if (!hswep_get_chop(capid4))
return true;
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 3ebd77770f98..4cfda3cfae7f 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -298,6 +298,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
set_personality_ia32(false);
setup_new_exec(bprm);
+ install_exec_creds(bprm);
regs->cs = __USER32_CS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
@@ -314,8 +315,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
if (retval < 0)
return retval;
- install_exec_creds(bprm);
-
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 542509b53e0f..dd8ff1ca2aef 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -4,8 +4,6 @@
#include <asm/processor.h>
-extern void check_bugs(void);
-
#if defined(CONFIG_CPU_SUP_INTEL)
void check_mpx_erratum(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index bd4374f56651..5cf8dca571cf 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -409,5 +409,6 @@
#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_GDS X86_BUG(29) /* CPU is affected by Gather Data Sampling */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 4d90f4b64437..49decc8339b3 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -42,7 +42,7 @@ extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
extern void fpu__init_cpu(void);
extern void fpu__init_system_xstate(void);
extern void fpu__init_cpu_xstate(void);
-extern void fpu__init_system(struct cpuinfo_x86 *c);
+extern void fpu__init_system(void);
extern void fpu__init_check_bugs(void);
extern void fpu__resume_cpu(void);
extern u64 fpu__get_supported_xfeatures_mask(void);
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 89789e8c80f6..e16574c16e93 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -67,6 +67,8 @@ struct legacy_pic {
void (*make_irq)(unsigned int irq);
};
+void legacy_pic_pcat_compat(void);
+
extern struct legacy_pic *legacy_pic;
extern struct legacy_pic null_legacy_pic;
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 1f2f52a34086..ccf07426a84d 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -74,6 +74,11 @@
#define INTEL_FAM6_LAKEFIELD 0x8A
#define INTEL_FAM6_ALDERLAKE 0x97
#define INTEL_FAM6_ALDERLAKE_L 0x9A
+#define INTEL_FAM6_ALDERLAKE_N 0xBE
+
+#define INTEL_FAM6_RAPTORLAKE 0xB7
+#define INTEL_FAM6_RAPTORLAKE_P 0xBA
+#define INTEL_FAM6_RAPTORLAKE_S 0xBF
/* "Small Core" Processors (Atom) */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 616f8e637bc3..b51ac6eed904 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -80,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void mem_encrypt_init(void) { }
+
#define __bss_decrypted
#endif /* CONFIG_AMD_MEM_ENCRYPT */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 91a06cef50c1..b675db12a8ab 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -5,6 +5,7 @@
#include <asm/cpu.h>
#include <linux/earlycpio.h>
#include <linux/initrd.h>
+#include <asm/microcode_amd.h>
struct ucode_patch {
struct list_head plist;
@@ -130,7 +131,7 @@ static inline unsigned int x86_cpuid_family(void)
int __init microcode_init(void);
extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
-void reload_early_microcode(void);
+void reload_early_microcode(unsigned int cpu);
extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
extern bool initrd_gone;
void microcode_bsp_resume(void);
@@ -138,7 +139,7 @@ void microcode_bsp_resume(void);
static inline int __init microcode_init(void) { return 0; };
static inline void __init load_ucode_bsp(void) { }
static inline void load_ucode_ap(void) { }
-static inline void reload_early_microcode(void) { }
+static inline void reload_early_microcode(unsigned int cpu) { }
static inline void microcode_bsp_resume(void) { }
static inline bool
get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 5c524d4f71cd..403a8e76b310 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -47,12 +47,14 @@ struct microcode_amd {
extern void __init load_ucode_amd_bsp(unsigned int family);
extern void load_ucode_amd_ap(unsigned int family);
extern int __init save_microcode_in_initrd_amd(unsigned int family);
-void reload_ucode_amd(void);
+void reload_ucode_amd(unsigned int cpu);
+extern void amd_check_microcode(void);
#else
static inline void __init load_ucode_amd_bsp(unsigned int family) {}
static inline void load_ucode_amd_ap(unsigned int family) {}
static inline int __init
save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
-void reload_ucode_amd(void) {}
+static inline void reload_ucode_amd(unsigned int cpu) {}
+static inline void amd_check_microcode(void) {}
#endif
#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 0bd07699dba3..4be36bfb2477 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -50,6 +50,10 @@
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+/* A mask for bits which the kernel toggles when controlling mitigations */
+#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
+ | SPEC_CTRL_RRSBA_DIS_S)
+
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -134,6 +138,15 @@
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
*/
+#define ARCH_CAP_GDS_CTRL BIT(25) /*
+ * CPU is vulnerable to Gather
+ * Data Sampling (GDS) and
+ * has controls for mitigation.
+ */
+#define ARCH_CAP_GDS_NO BIT(26) /*
+ * CPU is not vulnerable to Gather
+ * Data Sampling (GDS).
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -152,6 +165,8 @@
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0)
#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */
+#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
@@ -399,10 +414,12 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
+#define MSR_AMD64_TW_CFG 0xc0011023
#define MSR_AMD64_DE_CFG 0xc0011029
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
@@ -430,6 +447,10 @@
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index bbfde3d2662f..4bcd9d0c7bee 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -11,13 +11,6 @@
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-/*
- * Too small node sizes may confuse the VM badly. Usually they
- * result from BIOS bugs. So dont recognize nodes as standalone
- * NUMA entities that have less than this amount of RAM listed:
- */
-#define NODE_MIN_SIZE (4*1024*1024)
-
extern int numa_off;
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc4bb218f1c6..f3049d55c522 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -1001,4 +1001,6 @@ enum taa_mitigations {
TAA_MITIGATION_TSX_DISABLED,
};
+extern bool gds_ucode_mitigated(void);
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index a671a1145906..9177b4354c3f 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+void cpu_emergency_disable_virtualization(void);
+
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
void run_crash_ipi_callback(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ae13bc974416..c5ed79159975 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -91,27 +91,16 @@ extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
/*
- * Reserve space in the brk section. The name must be unique within
- * the file, and somewhat descriptive. The size is in bytes. Must be
- * used at file scope.
+ * Reserve space in the .brk section, which is a block of memory from which the
+ * caller is allowed to allocate very early (before even memblock is available)
+ * by calling extend_brk(). All allocated memory will be eventually converted
+ * to memblock. Any leftover unallocated memory will be freed.
*
- * (This uses a temp function to wrap the asm so we can pass it the
- * size parameter; otherwise we wouldn't be able to. We can't use a
- * "section" attribute on a normal variable because it always ends up
- * being @progbits, which ends up allocating space in the vmlinux
- * executable.)
+ * The size is in bytes.
*/
-#define RESERVE_BRK(name,sz) \
- static void __section(.discard.text) __used notrace \
- __brk_reservation_fn_##name##__(void) { \
- asm volatile ( \
- ".pushsection .brk_reservation,\"aw\",@nobits;" \
- ".brk." #name ":" \
- " 1:.skip %c0;" \
- " .size .brk." #name ", . - 1b;" \
- " .popsection" \
- : : "i" (sz)); \
- }
+#define RESERVE_BRK(name, size) \
+ __section(.bss..brk) __aligned(1) __used \
+ static char __brk_##name[size]
/* Helper for reserving space for arrays of things */
#define RESERVE_BRK_ARRAY(type, name, entries) \
@@ -129,12 +118,19 @@ asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
#endif /* __i386__ */
#endif /* _SETUP */
-#else
-#define RESERVE_BRK(name,sz) \
- .pushsection .brk_reservation,"aw",@nobits; \
-.brk.name: \
-1: .skip sz; \
- .size .brk.name,.-1b; \
+
+#else /* __ASSEMBLY */
+
+.macro __RESERVE_BRK name, size
+ .pushsection .bss..brk, "aw"
+GLOBAL(__brk_\name)
+ .skip \size
+END(__brk_\name)
.popsection
+.endm
+
+#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
+
#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 0116b2ee9e64..62810550024d 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -89,12 +89,6 @@ static inline int cpu_has_svm(const char **msg)
return 0;
}
- if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
- if (msg)
- *msg = "can't execute cpuid_8000000a";
- return 0;
- }
-
if (!boot_cpu_has(X86_FEATURE_SVM)) {
if (msg)
*msg = "svm not available";
@@ -114,7 +108,21 @@ static inline void cpu_svm_disable(void)
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+ if (efer & EFER_SVME) {
+ /*
+ * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
+ * aren't blocked, e.g. if a fatal error occurred between CLGI
+ * and STGI. Note, STGI may #UD if SVM is disabled from NMI
+ * context between reading EFER and executing STGI. In that
+ * case, GIF must already be set, otherwise the NMI would have
+ * been blocked, so just eat the fault.
+ */
+ asm_volatile_goto("1: stgi\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "memory" : fault);
+fault:
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+ }
}
/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8b1aa1206d98..2f0fa294d617 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -156,6 +156,9 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
madt->address);
}
+ if (madt->flags & ACPI_MADT_PCAT_COMPAT)
+ legacy_pic_pcat_compat();
+
default_acpi_madt_oem_check(madt->header.oem_id,
madt->header.oem_table_id);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 918a23704c0c..33882d5ab25d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -366,6 +366,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
u8 insnbuf[MAX_PATCH_LEN];
DPRINTK("alt table %px, -> %px", start, end);
+
+ /*
+ * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
+ * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
+ * During the process, KASAN becomes confused seeing partial LA57
+ * conversion and triggers a false-positive out-of-bound report.
+ *
+ * Disable KASAN until the patching is complete.
+ */
+ kasan_disable_current();
+
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
@@ -426,6 +437,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insnbuf, insnbuf_sz);
}
+
+ kasan_enable_current();
}
#ifdef CONFIG_SMP
@@ -677,8 +690,8 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
} else {
local_irq_save(flags);
memcpy(addr, opcode, len);
- local_irq_restore(flags);
sync_core();
+ local_irq_restore(flags);
/*
* Could also do a CLFLUSH here to speed up CPU recovery; but
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 926939978c1c..9318fe7d850e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -403,10 +403,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
if (vector && !eilvt_entry_is_changeable(vector, new))
/* may not change if vectors are different */
return rsvd;
- rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
- } while (rsvd != new);
+ } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
- rsvd &= ~APIC_EILVT_MASKED;
+ rsvd = new & ~APIC_EILVT_MASKED;
if (rsvd && rsvd != vector)
pr_info("LVT offset %d assigned for vector 0x%02x\n",
offset, rsvd);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 677508baf95a..af59aa9c5523 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2449,17 +2449,21 @@ static int io_apic_get_redir_entries(int ioapic)
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
+ unsigned int ret;
+
/*
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- if (!ioapic_initialized)
- return gsi_top;
+ ret = ioapic_dynirq_base ? : gsi_top;
+
/*
- * For DT enabled machines ioapic_dynirq_base is irrelevant and not
- * updated. So simply return @from if ioapic_dynirq_base == 0.
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and
+ * always 0. gsi_top can be 0 if there is no IO/APIC registered.
+ * 0 is an invalid interrupt number for dynamic allocations. Return
+ * @from instead.
*/
- return ioapic_dynirq_base ? : from;
+ return ret ? : from;
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8e70c2ba21b3..fb17767552ef 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -102,7 +102,10 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
+ if (!x2apic_mode)
+ return 0;
+
+ if (x2apic_phys || x2apic_fadt_phys())
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f7151cd03cb0..3d7a8049f637 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -247,12 +247,6 @@ extern int (*console_blank_hook)(int);
#endif
/*
- * The apm_bios device is one of the misc char devices.
- * This is its minor number.
- */
-#define APM_MINOR_DEV 134
-
-/*
* Various options can be changed at boot time as follows:
* (We allow underscores for compatibility with the modules code)
* apm=on/off enable/disable APM
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e017f64e09d6..84667781c41d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -23,11 +23,6 @@
#include "cpu.h"
-static const int amd_erratum_383[];
-static const int amd_erratum_400[];
-static const int amd_erratum_1054[];
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
-
/*
* nodes_per_socket: Stores the number of nodes per socket.
* Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
@@ -35,6 +30,83 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
*/
static u32 nodes_per_socket = 1;
+/*
+ * AMD errata checking
+ *
+ * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
+ * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
+ * have an OSVW id assigned, which it takes as first argument. Both take a
+ * variable number of family-specific model-stepping ranges created by
+ * AMD_MODEL_RANGE().
+ *
+ * Example:
+ *
+ * const int amd_erratum_319[] =
+ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
+ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
+ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
+ */
+
+#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
+#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
+static const int amd_erratum_400[] =
+ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+ AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+
+static const int amd_erratum_383[] =
+ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+
+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+
+static const int amd_zenbleed[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
+ AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
+
+static const int amd_erratum_1485[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
+ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+{
+ int osvw_id = *erratum++;
+ u32 range;
+ u32 ms;
+
+ if (osvw_id >= 0 && osvw_id < 65536 &&
+ cpu_has(cpu, X86_FEATURE_OSVW)) {
+ u64 osvw_len;
+
+ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
+ if (osvw_id < osvw_len) {
+ u64 osvw_bits;
+
+ rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
+ osvw_bits);
+ return osvw_bits & (1ULL << (osvw_id & 0x3f));
+ }
+ }
+
+ /* OSVW unavailable or ID unknown, match family-model-stepping range */
+ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
+ while ((range = *erratum++))
+ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+ (ms >= AMD_MODEL_RANGE_START(range)) &&
+ (ms <= AMD_MODEL_RANGE_END(range)))
+ return true;
+
+ return false;
+}
+
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -199,6 +271,15 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
#endif
+ /*
+ * Work around Erratum 1386. The XSAVES instruction malfunctions in
+ * certain circumstances on Zen1/2 uarch, and not all parts have had
+ * updated microcode at the time of writing (March 2023).
+ *
+ * Affected parts all have no supervisor XSAVE states, meaning that
+ * the XSAVEC instruction (which works fine) is equivalent.
+ */
+ clear_cpu_cap(c, X86_FEATURE_XSAVES);
}
static void init_amd_k7(struct cpuinfo_x86 *c)
@@ -900,6 +981,47 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
}
}
+static bool cpu_has_zenbleed_microcode(void)
+{
+ u32 good_rev = 0;
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
+ case 0x60 ... 0x67: good_rev = 0x0860010b; break;
+ case 0x68 ... 0x6f: good_rev = 0x08608105; break;
+ case 0x70 ... 0x7f: good_rev = 0x08701032; break;
+ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+
+ default:
+ return false;
+ break;
+ }
+
+ if (boot_cpu_data.microcode < good_rev)
+ return false;
+
+ return true;
+}
+
+static void zenbleed_check(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_amd_erratum(c, amd_zenbleed))
+ return;
+
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (!cpu_has(c, X86_FEATURE_AVX))
+ return;
+
+ if (!cpu_has_zenbleed_microcode()) {
+ pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n");
+ msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ } else {
+ msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ }
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
early_init_amd(c);
@@ -1002,6 +1124,12 @@ static void init_amd(struct cpuinfo_x86 *c)
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
check_null_seg_clears_base(c);
+
+ zenbleed_check(c);
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ cpu_has_amd_erratum(c, amd_erratum_1485))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
}
#ifdef CONFIG_X86_32
@@ -1097,73 +1225,6 @@ static const struct cpu_dev amd_cpu_dev = {
cpu_dev_register(amd_cpu_dev);
-/*
- * AMD errata checking
- *
- * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
- * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
- * have an OSVW id assigned, which it takes as first argument. Both take a
- * variable number of family-specific model-stepping ranges created by
- * AMD_MODEL_RANGE().
- *
- * Example:
- *
- * const int amd_erratum_319[] =
- * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
- * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
- * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
- */
-
-#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
-#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
-#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
- ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
-#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
-#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
-#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
-
-static const int amd_erratum_400[] =
- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
- AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
-
-static const int amd_erratum_383[] =
- AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-
-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
-static const int amd_erratum_1054[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
-{
- int osvw_id = *erratum++;
- u32 range;
- u32 ms;
-
- if (osvw_id >= 0 && osvw_id < 65536 &&
- cpu_has(cpu, X86_FEATURE_OSVW)) {
- u64 osvw_len;
-
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
- if (osvw_id < osvw_len) {
- u64 osvw_bits;
-
- rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
- osvw_bits);
- return osvw_bits & (1ULL << (osvw_id & 0x3f));
- }
- }
-
- /* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_stepping;
- while ((range = *erratum++))
- if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
- (ms >= AMD_MODEL_RANGE_START(range)) &&
- (ms <= AMD_MODEL_RANGE_END(range)))
- return true;
-
- return false;
-}
-
void set_dr_addr_mask(unsigned long mask, int dr)
{
if (!boot_cpu_has(X86_FEATURE_BPEXT))
@@ -1182,3 +1243,18 @@ void set_dr_addr_mask(unsigned long mask, int dr)
break;
}
}
+
+static void zenbleed_check_cpu(void *unused)
+{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+
+ zenbleed_check(c);
+}
+
+void amd_check_microcode(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return;
+
+ on_each_cpu(zenbleed_check_cpu, NULL, 1);
+}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f1f57acca139..6e1acbdd27a5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -9,7 +9,6 @@
* - Andrew D. Balsa (code cleanup).
*/
#include <linux/init.h>
-#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/nospec.h>
@@ -25,9 +24,7 @@
#include <asm/msr.h>
#include <asm/vmx.h>
#include <asm/paravirt.h>
-#include <asm/alternative.h>
#include <asm/pgtable.h>
-#include <asm/set_memory.h>
#include <asm/intel-family.h>
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
@@ -47,6 +44,7 @@ static void __init md_clear_select_mitigation(void);
static void __init taa_select_mitigation(void);
static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
+static void __init gds_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -115,29 +113,24 @@ EXPORT_SYMBOL_GPL(mds_idle_clear);
DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
-void __init check_bugs(void)
+void __init cpu_select_mitigations(void)
{
- identify_boot_cpu();
-
- /*
- * identify_boot_cpu() initialized SMT support information, let the
- * core code know.
- */
- cpu_smt_check_topology();
-
- if (!IS_ENABLED(CONFIG_SMP)) {
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
- }
-
/*
* Read the SPEC_CTRL MSR to account for reserved bits which may
* have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
* init code as it is not enumerated and depends on the family.
*/
- if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) {
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ /*
+ * Previously running kernel (kexec), may have some controls
+ * turned ON. Clear them and let the mitigations setup below
+ * rediscover them based on configuration.
+ */
+ x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
+ }
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -157,39 +150,7 @@ void __init check_bugs(void)
l1tf_select_mitigation();
md_clear_select_mitigation();
srbds_select_mitigation();
-
- arch_smt_update();
-
-#ifdef CONFIG_X86_32
- /*
- * Check whether we are able to run this kernel safely on SMP.
- *
- * - i386 is no longer supported.
- * - In order to run on anything without a TSC, we need to be
- * compiled for a i486.
- */
- if (boot_cpu_data.x86 < 4)
- panic("Kernel requires i486+ for 'invlpg' and other features");
-
- init_utsname()->machine[1] =
- '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
- alternative_instructions();
-
- fpu__init_check_bugs();
-#else /* CONFIG_X86_64 */
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-#endif
+ gds_select_mitigation();
}
/*
@@ -641,6 +602,149 @@ static int __init srbds_parse_cmdline(char *str)
early_param("srbds", srbds_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "GDS: " fmt
+
+enum gds_mitigations {
+ GDS_MITIGATION_OFF,
+ GDS_MITIGATION_UCODE_NEEDED,
+ GDS_MITIGATION_FORCE,
+ GDS_MITIGATION_FULL,
+ GDS_MITIGATION_FULL_LOCKED,
+ GDS_MITIGATION_HYPERVISOR,
+};
+
+#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
+#else
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
+#endif
+
+static const char * const gds_strings[] = {
+ [GDS_MITIGATION_OFF] = "Vulnerable",
+ [GDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+ [GDS_MITIGATION_FORCE] = "Mitigation: AVX disabled, no microcode",
+ [GDS_MITIGATION_FULL] = "Mitigation: Microcode",
+ [GDS_MITIGATION_FULL_LOCKED] = "Mitigation: Microcode (locked)",
+ [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status",
+};
+
+bool gds_ucode_mitigated(void)
+{
+ return (gds_mitigation == GDS_MITIGATION_FULL ||
+ gds_mitigation == GDS_MITIGATION_FULL_LOCKED);
+}
+EXPORT_SYMBOL_GPL(gds_ucode_mitigated);
+
+void update_gds_msr(void)
+{
+ u64 mcu_ctrl_after;
+ u64 mcu_ctrl;
+
+ switch (gds_mitigation) {
+ case GDS_MITIGATION_OFF:
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ mcu_ctrl |= GDS_MITG_DIS;
+ break;
+ case GDS_MITIGATION_FULL_LOCKED:
+ /*
+ * The LOCKED state comes from the boot CPU. APs might not have
+ * the same state. Make sure the mitigation is enabled on all
+ * CPUs.
+ */
+ case GDS_MITIGATION_FULL:
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ mcu_ctrl &= ~GDS_MITG_DIS;
+ break;
+ case GDS_MITIGATION_FORCE:
+ case GDS_MITIGATION_UCODE_NEEDED:
+ case GDS_MITIGATION_HYPERVISOR:
+ return;
+ };
+
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+ /*
+ * Check to make sure that the WRMSR value was not ignored. Writes to
+ * GDS_MITG_DIS will be ignored if this processor is locked but the boot
+ * processor was not.
+ */
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
+ WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
+}
+
+static void __init gds_select_mitigation(void)
+{
+ u64 mcu_ctrl;
+
+ if (!boot_cpu_has_bug(X86_BUG_GDS))
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ gds_mitigation = GDS_MITIGATION_HYPERVISOR;
+ goto out;
+ }
+
+ if (cpu_mitigations_off())
+ gds_mitigation = GDS_MITIGATION_OFF;
+ /* Will verify below that mitigation _can_ be disabled */
+
+ /* No microcode */
+ if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (gds_mitigation == GDS_MITIGATION_FORCE) {
+ /*
+ * This only needs to be done on the boot CPU so do it
+ * here rather than in update_gds_msr()
+ */
+ setup_clear_cpu_cap(X86_FEATURE_AVX);
+ pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
+ } else {
+ gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
+ }
+ goto out;
+ }
+
+ /* Microcode has mitigation, use it */
+ if (gds_mitigation == GDS_MITIGATION_FORCE)
+ gds_mitigation = GDS_MITIGATION_FULL;
+
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ if (mcu_ctrl & GDS_MITG_LOCKED) {
+ if (gds_mitigation == GDS_MITIGATION_OFF)
+ pr_warn("Mitigation locked. Disable failed.\n");
+
+ /*
+ * The mitigation is selected from the boot CPU. All other CPUs
+ * _should_ have the same state. If the boot CPU isn't locked
+ * but others are then update_gds_msr() will WARN() of the state
+ * mismatch. If the boot CPU is locked update_gds_msr() will
+ * ensure the other CPUs have the mitigation enabled.
+ */
+ gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
+ }
+
+ update_gds_msr();
+out:
+ pr_info("%s\n", gds_strings[gds_mitigation]);
+}
+
+static int __init gds_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!boot_cpu_has_bug(X86_BUG_GDS))
+ return 0;
+
+ if (!strcmp(str, "off"))
+ gds_mitigation = GDS_MITIGATION_OFF;
+ else if (!strcmp(str, "force"))
+ gds_mitigation = GDS_MITIGATION_FORCE;
+
+ return 0;
+}
+early_param("gather_data_sampling", gds_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
enum spectre_v1_mitigation {
@@ -975,14 +1079,18 @@ spectre_v2_parse_user_cmdline(void)
return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
{
- return mode == SPECTRE_V2_IBRS ||
- mode == SPECTRE_V2_EIBRS ||
+ return mode == SPECTRE_V2_EIBRS ||
mode == SPECTRE_V2_EIBRS_RETPOLINE ||
mode == SPECTRE_V2_EIBRS_LFENCE;
}
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+{
+ return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS;
+}
+
static void __init
spectre_v2_user_select_mitigation(void)
{
@@ -1045,12 +1153,19 @@ spectre_v2_user_select_mitigation(void)
}
/*
- * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
- * STIBP is not required.
+ * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
+ * is not required.
+ *
+ * Enhanced IBRS also protects against cross-thread branch target
+ * injection in user-mode as the IBRS bit remains always set which
+ * implicitly enables cross-thread protections. However, in legacy IBRS
+ * mode, the IBRS bit is set only on kernel entry and cleared on return
+ * to userspace. This disables the implicit cross-thread protection,
+ * so allow for STIBP to be selected in that case.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+ spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return;
/*
@@ -1778,6 +1893,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
task_update_spec_tif(task);
+ if (task == current)
+ indirect_branch_prediction_barrier();
break;
default:
return -ERANGE;
@@ -2100,7 +2217,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -2175,6 +2292,11 @@ static ssize_t retbleed_show_state(char *buf)
return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
}
+static ssize_t gds_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2224,6 +2346,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_RETBLEED:
return retbleed_show_state(buf);
+ case X86_BUG_GDS:
+ return gds_show_state(buf);
+
default:
break;
}
@@ -2288,4 +2413,9 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha
{
return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
}
+
+ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 55293e5dcbff..d315e928b95c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,14 +13,20 @@
#include <linux/sched/mm.h>
#include <linux/sched/clock.h>
#include <linux/sched/task.h>
+#include <linux/sched/smt.h>
#include <linux/init.h>
#include <linux/kprobes.h>
#include <linux/kgdb.h>
+#include <linux/mem_encrypt.h>
#include <linux/smp.h>
+#include <linux/cpu.h>
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <asm/stackprotector.h>
+#include <linux/utsname.h>
+
+#include <asm/alternative.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/archrandom.h>
@@ -56,6 +62,7 @@
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
#endif
+#include <asm/set_memory.h>
#include "cpu.h"
@@ -1040,6 +1047,12 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define MMIO_SBDS BIT(2)
/* CPU is affected by RETbleed, speculating where you would not expect it */
#define RETBLEED BIT(3)
+/* CPU is affected by SMT (cross-thread) return predictions */
+#define SMT_RSB BIT(4)
+/* CPU is affected by SRSO */
+#define SRSO BIT(5)
+/* CPU is affected by GDS */
+#define GDS BIT(6)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1052,18 +1065,20 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(CANNONLAKE_MOBILE,X86_STEPPING_ANY, RETBLEED),
- VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPING_ANY, MMIO | GDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
@@ -1186,6 +1201,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!(ia32_cap & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
+ /*
+ * Check if CPU is vulnerable to GDS. If running in a virtual machine on
+ * an affected processor, the VMM may have disabled the use of GATHER by
+ * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
+ * which means that AVX will be disabled.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ boot_cpu_has(X86_FEATURE_AVX))
+ setup_force_cpu_bug(X86_BUG_GDS);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1271,8 +1296,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_set_bug_bits(c);
- fpu__init_system(c);
-
#ifdef CONFIG_X86_32
/*
* Regardless of whether PCID is enumerated, the SDM says
@@ -1661,6 +1684,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
validate_apic_and_package_id(c);
x86_spec_ctrl_setup_ap();
update_srbds_msr();
+ if (boot_cpu_has_bug(X86_BUG_GDS))
+ update_gds_msr();
}
static __init int setup_noclflush(char *arg)
@@ -1978,8 +2003,6 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- fpu__init_cpu();
-
if (is_uv_system())
uv_cpu_init();
@@ -2043,8 +2066,6 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- fpu__init_cpu();
-
load_fixmap_gdt(cpu);
}
#endif
@@ -2077,6 +2098,8 @@ void microcode_check(void)
perf_check_microcode();
+ amd_check_microcode();
+
/* Reload CPUID max function as it might've changed. */
info.cpuid_level = cpuid_eax(0);
@@ -2095,3 +2118,69 @@ void microcode_check(void)
pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ identify_boot_cpu();
+
+ /*
+ * identify_boot_cpu() initialized SMT support information, let the
+ * core code know.
+ */
+ cpu_smt_check_topology();
+
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+ cpu_select_mitigations();
+
+ arch_smt_update();
+
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ /*
+ * Check whether this is a real i386 which is not longer
+ * supported and fixup the utsname.
+ */
+ if (boot_cpu_data.x86 < 4)
+ panic("Kernel requires i486+ for 'invlpg' and other features");
+
+ init_utsname()->machine[1] =
+ '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+ }
+
+ /*
+ * Must be before alternatives because it might set or clear
+ * feature bits.
+ */
+ fpu__init_system();
+ fpu__init_cpu();
+
+ alternative_instructions();
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+ } else {
+ fpu__init_check_bugs();
+ }
+
+ /*
+ * This needs to be called before any devices perform DMA
+ * operations that might use the SWIOTLB bounce buffers. It will
+ * mark the bounce buffers as decrypted so that their usage will
+ * not cause "plain-text" data to be decrypted when accessed. It
+ * must be called after late_time_init() so that Hyper-V x86/x64
+ * hypercalls work when the SWIOTLB bounce buffers are decrypted.
+ */
+ mem_encrypt_init();
+}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 4eb9bf68b122..ca1b8bf380a2 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -79,9 +79,11 @@ extern void detect_ht(struct cpuinfo_x86 *c);
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
+void cpu_select_mitigations(void);
extern void x86_spec_ctrl_setup_ap(void);
extern void update_srbds_msr(void);
+extern void update_gds_msr(void);
extern u64 x86_read_arch_cap_msr(void);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index f406e3b85bdb..1125f752f126 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -585,8 +585,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
+ * This pairs with the full barrier between the rq->curr update and
+ * resctrl_sched_in() during context switch.
*/
- barrier();
+ smp_mb();
/*
* By now, the task's closid and rmid are set. If the task is current
@@ -2140,19 +2142,23 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
t->closid = to->closid;
t->rmid = to->mon.rmid;
-#ifdef CONFIG_SMP
/*
- * This is safe on x86 w/o barriers as the ordering
- * of writing to task_cpu() and t->on_cpu is
- * reverse to the reading here. The detection is
- * inaccurate as tasks might move or schedule
- * before the smp function call takes place. In
- * such a case the function call is pointless, but
+ * Order the closid/rmid stores above before the loads
+ * in task_curr(). This pairs with the full barrier
+ * between the rq->curr update and resctrl_sched_in()
+ * during context switch.
+ */
+ smp_mb();
+
+ /*
+ * If the task is on a CPU, set the CPU in the mask.
+ * The detection is inaccurate as tasks might move or
+ * schedule before the smp function call takes place.
+ * In such a case the function call is pointless, but
* there is no other side effect.
*/
- if (mask && t->on_cpu)
+ if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
cpumask_set_cpu(task_cpu(t), mask);
-#endif
}
}
read_unlock(&tasklist_lock);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 8396c77e9323..5698e04803b5 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -54,7 +54,9 @@ struct cont_desc {
};
static u32 ucode_new_rev;
-static u8 amd_ucode_patch[PATCH_MAX_SIZE];
+
+/* One blob per node. */
+static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE];
/*
* Microcode patch container file is prepended to the initrd in cpio
@@ -210,7 +212,7 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p
patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
#else
new_rev = &ucode_new_rev;
- patch = &amd_ucode_patch;
+ patch = &amd_ucode_patch[0];
#endif
desc.cpuid_1_eax = cpuid_1_eax;
@@ -329,8 +331,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false);
}
-static enum ucode_state
-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
+static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
{
@@ -348,19 +349,19 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
if (!desc.mc)
return -EINVAL;
- ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
+ ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
if (ret > UCODE_UPDATED)
return -EINVAL;
return 0;
}
-void reload_ucode_amd(void)
+void reload_ucode_amd(unsigned int cpu)
{
- struct microcode_amd *mc;
u32 rev, dummy;
+ struct microcode_amd *mc;
- mc = (struct microcode_amd *)amd_ucode_patch;
+ mc = (struct microcode_amd *)amd_ucode_patch[cpu_to_node(cpu)];
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
@@ -531,7 +532,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
/* need to apply patch? */
- if (rev >= mc_amd->hdr.patch_id) {
+ if (rev > mc_amd->hdr.patch_id) {
ret = UCODE_OK;
goto out;
}
@@ -698,9 +699,10 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
return UCODE_OK;
}
-static enum ucode_state
-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
+static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
{
+ struct cpuinfo_x86 *c;
+ unsigned int nid, cpu;
struct ucode_patch *p;
enum ucode_state ret;
@@ -713,22 +715,22 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
return ret;
}
- p = find_patch(0);
- if (!p) {
- return ret;
- } else {
- if (boot_cpu_data.microcode >= p->patch_id)
- return ret;
+ for_each_node(nid) {
+ cpu = cpumask_first(cpumask_of_node(nid));
+ c = &cpu_data(cpu);
- ret = UCODE_NEW;
- }
+ p = find_patch(cpu);
+ if (!p)
+ continue;
- /* save BSP's matching patch for early load */
- if (!save)
- return ret;
+ if (c->microcode >= p->patch_id)
+ continue;
- memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
- memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+ ret = UCODE_NEW;
+
+ memset(&amd_ucode_patch[nid], 0, PATCH_MAX_SIZE);
+ memcpy(&amd_ucode_patch[nid], p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+ }
return ret;
}
@@ -754,12 +756,11 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
struct cpuinfo_x86 *c = &cpu_data(cpu);
- bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
enum ucode_state ret = UCODE_NFOUND;
const struct firmware *fw;
/* reload ucode container only on the boot cpu */
- if (!refresh_fw || !bsp)
+ if (!refresh_fw)
return UCODE_OK;
if (c->x86 >= 0x15)
@@ -776,7 +777,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
goto fw_release;
}
- ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
+ ret = load_microcode_amd(c->x86, fw->data, fw->size);
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 985ef98c8ba2..963b989710f9 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -326,7 +326,7 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
#endif
}
-void reload_early_microcode(void)
+void reload_early_microcode(unsigned int cpu)
{
int vendor, family;
@@ -340,7 +340,7 @@ void reload_early_microcode(void)
break;
case X86_VENDOR_AMD:
if (family >= 0x10)
- reload_ucode_amd();
+ reload_ucode_amd(cpu);
break;
default:
break;
@@ -783,7 +783,7 @@ void microcode_bsp_resume(void)
if (uci->valid && uci->mc)
microcode_ops->apply_microcode(cpu);
else if (!uci->mc)
- reload_early_microcode();
+ reload_early_microcode(cpu);
}
static struct syscore_ops mc_syscore_ops = {
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 3aa0e5a45303..31ad79a0c6f3 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -662,7 +662,6 @@ void load_ucode_intel_ap(void)
else
iup = &intel_ucode_patch;
-reget:
if (!*iup) {
patch = __load_ucode_intel(&uci);
if (!patch)
@@ -673,12 +672,7 @@ reget:
uci.mc = *iup;
- if (apply_microcode_early(&uci, true)) {
- /* Mixed-silicon system? Try to refetch the proper patch: */
- *iup = NULL;
-
- goto reget;
- }
+ apply_microcode_early(&uci, true);
}
static struct microcode_intel *find_patch(struct ucode_cpu_info *uci)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 71ca064e3794..31fe56a90cbf 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -44,7 +44,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
* initial apic id, which also represents 32-bit extended x2apic id.
*/
c->initial_apicid = edx;
- smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
#endif
return 0;
}
@@ -68,7 +68,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
* Populate HT related information from sub-leaf level 0.
*/
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
- core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 91b3483e5085..e40bb50c5c4f 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,7 +35,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
/* Used while preparing memory map entries for second kernel */
@@ -86,15 +85,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -153,12 +143,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2b5886401e5f..7e698c45760c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -171,7 +171,6 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack);
- stack = stack ? : get_stack_pointer(task, regs);
regs = unwind_get_entry_regs(&state, &partial);
/*
@@ -190,9 +189,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - hardirq stack
* - entry stack
*/
- for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ for (stack = stack ?: get_stack_pointer(task, regs);
+ stack;
+ stack = stack_info.next_sp) {
const char *stack_name;
+ stack = PTR_ALIGN(stack, sizeof(long));
+
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
/*
* We weren't on a valid stack. It's possible that
@@ -326,7 +329,7 @@ unsigned long oops_begin(void)
}
NOKPROBE_SYMBOL(oops_begin);
-void __noreturn rewind_stack_do_exit(int signr);
+void __noreturn rewind_stack_and_make_dead(int signr);
void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
@@ -361,7 +364,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
* reuse the task stack and that existing poisons are invalid.
*/
kasan_unpoison_task_stack(current);
- rewind_stack_do_exit(signr);
+ rewind_stack_and_make_dead(signr);
}
NOKPROBE_SYMBOL(oops_end);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 9692ccc583bb..644372a10c89 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -49,7 +49,7 @@ void fpu__init_cpu(void)
fpu__init_cpu_xstate();
}
-static bool fpu__probe_without_cpuid(void)
+static bool __init fpu__probe_without_cpuid(void)
{
unsigned long cr0;
u16 fsw, fcw;
@@ -67,7 +67,7 @@ static bool fpu__probe_without_cpuid(void)
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
-static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
+static void __init fpu__init_system_early_generic(void)
{
if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
@@ -297,10 +297,10 @@ static void __init fpu__init_parse_early_param(void)
* Called on the boot CPU once per system bootup, to set up the initial
* FPU state that is later cloned into all processes:
*/
-void __init fpu__init_system(struct cpuinfo_x86 *c)
+void __init fpu__init_system(void)
{
fpu__init_parse_early_param();
- fpu__init_system_early_generic(c);
+ fpu__init_system_early_generic();
/*
* The FPU has to be operational for some of the
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 7d372db8bee1..e33b732ad337 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -811,6 +811,14 @@ void __init fpu__init_system_xstate(void)
fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
setup_xstate_comp();
+
+ /*
+ * CPU capabilities initialization runs before FPU init. So
+ * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
+ * functional, set the feature bit so depending code works.
+ */
+ setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
+
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index fe522691ac71..82753622f489 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,6 +32,7 @@
*/
static void init_8259A(int auto_eoi);
+static bool pcat_compat __ro_after_init;
static int i8259A_auto_eoi;
DEFINE_RAW_SPINLOCK(i8259A_lock);
@@ -114,6 +115,7 @@ static void make_8259A_irq(unsigned int irq)
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<<irq);
irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
+ irq_set_status_flags(irq, IRQ_LEVEL);
enable_irq(irq);
lapic_assign_legacy_vector(irq, true);
}
@@ -300,15 +302,32 @@ static void unmask_8259A(void)
static int probe_8259A(void)
{
+ unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR);
unsigned long flags;
- unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
- unsigned char new_val;
+
+ /*
+ * If MADT has the PCAT_COMPAT flag set, then do not bother probing
+ * for the PIC. Some BIOSes leave the PIC uninitialized and probing
+ * fails.
+ *
+ * Right now this causes problems as quite some code depends on
+ * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly
+ * when the system has an IO/APIC because then PIC is not required
+ * at all, except for really old machines where the timer interrupt
+ * must be routed through the PIC. So just pretend that the PIC is
+ * there and let legacy_pic->init() initialize it for nothing.
+ *
+ * Alternatively this could just try to initialize the PIC and
+ * repeat the probe, but for cases where there is no PIC that's
+ * just pointless.
+ */
+ if (pcat_compat)
+ return nr_legacy_irqs();
+
/*
- * Check to see if we have a PIC.
- * Mask all except the cascade and read
- * back the value we just wrote. If we don't
- * have a PIC, we will read 0xff as opposed to the
- * value we wrote.
+ * Check to see if we have a PIC. Mask all except the cascade and
+ * read back the value we just wrote. If we don't have a PIC, we
+ * will read 0xff as opposed to the value we wrote.
*/
raw_spin_lock_irqsave(&i8259A_lock, flags);
@@ -430,5 +449,9 @@ static int __init i8259A_init_ops(void)
return 0;
}
-
device_initcall(i8259A_init_ops);
+
+void __init legacy_pic_pcat_compat(void)
+{
+ pcat_compat = true;
+}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index a0693b71cfc1..f2c215e1f64c 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -72,8 +72,10 @@ void __init init_ISA_irqs(void)
legacy_pic->init(0);
- for (i = 0; i < nr_legacy_irqs(); i++)
+ for (i = 0; i < nr_legacy_irqs(); i++) {
irq_set_chip_and_handler(i, chip, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
+ }
}
void __init init_IRQ(void)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 544bd41a514c..36b5a493e5b4 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -56,8 +56,8 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
/* This function only handles jump-optimized kprobe */
if (kp && kprobe_optimized(kp)) {
op = container_of(kp, struct optimized_kprobe, kp);
- /* If op->list is not empty, op is under optimizing */
- if (list_empty(&op->list))
+ /* If op is optimized or under unoptimizing */
+ if (list_empty(&op->list) || optprobe_queued_unopt(op))
goto found;
}
}
@@ -328,7 +328,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
for (i = 1; i < op->optinsn.size; i++) {
p = get_kprobe(op->kp.addr + i);
- if (p && !kprobe_disabled(p))
+ if (p && !kprobe_disarmed(p))
return -EEXIST;
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 736348ead421..2ecf1dcc86b2 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -96,7 +96,7 @@ static void ich_force_hpet_resume(void)
static void ich_force_enable_hpet(struct pci_dev *dev)
{
u32 val;
- u32 uninitialized_var(rcba);
+ u32 rcba;
int err = 0;
if (hpet_address || force_hpet_address)
@@ -186,7 +186,7 @@ static void hpet_print_force_info(void)
static void old_ich_force_hpet_resume(void)
{
u32 val;
- u32 uninitialized_var(gen_cntl);
+ u32 gen_cntl;
if (!force_hpet_address || !cached_dev)
return;
@@ -208,7 +208,7 @@ static void old_ich_force_hpet_resume(void)
static void old_ich_force_enable_hpet(struct pci_dev *dev)
{
u32 val;
- u32 uninitialized_var(gen_cntl);
+ u32 gen_cntl;
if (hpet_address || force_hpet_address)
return;
@@ -299,7 +299,7 @@ static void vt8237_force_hpet_resume(void)
static void vt8237_force_enable_hpet(struct pci_dev *dev)
{
- u32 uninitialized_var(val);
+ u32 val;
if (hpet_address || force_hpet_address)
return;
@@ -430,7 +430,7 @@ static void nvidia_force_hpet_resume(void)
static void nvidia_force_enable_hpet(struct pci_dev *dev)
{
- u32 uninitialized_var(val);
+ u32 val;
if (hpet_address || force_hpet_address)
return;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index b0f3a996df15..444b8a691c14 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -536,33 +536,29 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
-/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
-static void emergency_vmx_disable_all(void)
+static void emergency_reboot_disable_virtualization(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/*
- * Disable VMX on all CPUs before rebooting, otherwise we risk hanging
- * the machine, because the CPU blocks INIT when it's in VMX root.
+ * Disable virtualization on all CPUs before rebooting to avoid hanging
+ * the system, as VMX and SVM block INIT when running in the host.
*
* We can't take any locks and we may be on an inconsistent state, so
- * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
+ * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
*
- * Do the NMI shootdown even if VMX if off on _this_ CPU, as that
- * doesn't prevent a different CPU from being in VMX root operation.
+ * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
+ * other CPUs may have virtualization enabled.
*/
- if (cpu_has_vmx()) {
- /* Safely force _this_ CPU out of VMX root operation. */
- __cpu_emergency_vmxoff();
+ if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+ /* Safely force _this_ CPU out of VMX/SVM operation. */
+ cpu_emergency_disable_virtualization();
- /* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ /* Disable VMX/SVM and halt on other CPUs. */
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -599,7 +595,7 @@ static void native_machine_emergency_restart(void)
unsigned short mode;
if (reboot_emergency)
- emergency_vmx_disable_all();
+ emergency_reboot_disable_virtualization();
tboot_shutdown(TB_SHUTDOWN_REBOOT);
@@ -804,6 +800,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -826,7 +833,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -842,18 +856,32 @@ static void smp_send_nmi_allbutself(void)
apic->send_IPI_allbutself(NMI_VECTOR);
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -881,7 +909,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -911,6 +949,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index b2b87b91f336..c94ed0b37bcd 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -33,7 +33,7 @@
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
#include <asm/kexec.h>
-#include <asm/virtext.h>
+#include <asm/reboot.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -163,7 +163,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
return NMI_HANDLED;
@@ -176,7 +176,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
asmlinkage __visible void smp_reboot_interrupt(void)
{
ipi_entering_ack_irq();
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
irq_exit();
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8783d065f927..2e4f6a1ebca5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -96,6 +96,17 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
+struct mwait_cpu_dead {
+ unsigned int control;
+ unsigned int status;
+};
+
+/*
+ * Cache line aligned data for mwait_play_dead(). Separate on purpose so
+ * that it's unlikely to be touched by other CPUs.
+ */
+static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
+
/* Logical package management. We might want to allocate that dynamically */
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
@@ -231,6 +242,7 @@ static void notrace start_secondary(void *unused)
#endif
load_current_idt();
cpu_init();
+ fpu__init_cpu();
x86_cpuinit.early_percpu_clock_init();
preempt_disable();
smp_callin();
@@ -1594,10 +1606,10 @@ static bool wakeup_cpu0(void)
*/
static inline void mwait_play_dead(void)
{
+ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
- void *mwait_ptr;
int i;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
@@ -1631,13 +1643,6 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
- /*
- * This should be a memory location in a cache line which is
- * unlikely to be touched by other processors. The actual
- * content is immaterial as it is not actually modified in any way.
- */
- mwait_ptr = &current_thread_info()->flags;
-
wbinvd();
while (1) {
@@ -1649,9 +1654,9 @@ static inline void mwait_play_dead(void)
* case where we return around the loop.
*/
mb();
- clflush(mwait_ptr);
+ clflush(md);
mb();
- __monitor(mwait_ptr, 0, 0);
+ __monitor(md, 0, 0);
mb();
__mwait(eax, 0);
/*
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 897da526e40e..5bc0fedb3342 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -265,6 +265,22 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
"Lenovo ideapad D330-10IGM"),
},
},
+ {
+ /* Lenovo IdeaPad Duet 3 10IGL5 with 1200x1920 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "IdeaPad Duet 3 10IGL5"),
+ },
+ },
+ {
+ /* Lenovo Yoga Book X91F / X91L */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ /* Non exact match to match F + L versions */
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
+ },
+ },
{},
};
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ae9e806a11de..3b87bca027b4 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -735,8 +735,9 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
switch (opc1) {
case 0xeb: /* jmp 8 */
case 0xe9: /* jmp 32 */
- case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
break;
+ case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
+ goto setup;
case 0xe8: /* call relative */
branch_clear_offset(auprobe, insn);
@@ -765,6 +766,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
return -ENOTSUPP;
}
+setup:
auprobe->branch.opc1 = opc1;
auprobe->branch.ilen = insn->length;
auprobe->branch.offs = insn->immediate.value;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 34c0652ca8b4..20d09355c9e0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -383,7 +383,7 @@ SECTIONS
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
__brk_base = .;
. += 64 * 1024; /* 64k alignment slop space */
- *(.brk_reservation) /* areas brk users have reserved */
+ *(.bss..brk) /* areas brk users have reserved */
__brk_limit = .;
}
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index dc4f2fdf5e57..13fd54de5449 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o page_track.o debugfs.o
-kvm-intel-y += vmx.o pmu_intel.o
+kvm-intel-y += vmx/vmx.o vmx/pmu_intel.o
kvm-amd-y += svm.o pmu_amd.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 027941e3df68..256b00f456e6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2201,13 +2201,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
{
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
+ int r;
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
- return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
- NULL);
+
+ r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
+ if (r && lvt_type == APIC_LVTPC)
+ kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
+ return r;
}
return 0;
}
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 611f9e60f815..611f9e60f815 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9bb696d7300c..55e52064c4ec 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2062,6 +2062,12 @@ static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
}
+static inline bool nested_cpu_has_save_preemption_timer(struct vmcs12 *vmcs12)
+{
+ return vmcs12->vm_exit_controls &
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -4734,9 +4740,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
}
}
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrl(MSR_IA32_XSS, host_xss);
-
return 0;
}
@@ -5518,18 +5521,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
- if (var->unusable || !var->present)
- ar = 1 << 16;
- else {
- ar = var->type & 15;
- ar |= (var->s & 1) << 4;
- ar |= (var->dpl & 3) << 5;
- ar |= (var->present & 1) << 7;
- ar |= (var->avl & 1) << 12;
- ar |= (var->l & 1) << 13;
- ar |= (var->db & 1) << 14;
- ar |= (var->g & 1) << 15;
- }
+ ar = var->type & 15;
+ ar |= (var->s & 1) << 4;
+ ar |= (var->dpl & 3) << 5;
+ ar |= (var->present & 1) << 7;
+ ar |= (var->avl & 1) << 12;
+ ar |= (var->l & 1) << 13;
+ ar |= (var->db & 1) << 14;
+ ar |= (var->g & 1) << 15;
+ ar |= (var->unusable || !var->present) << 16;
return ar;
}
@@ -7951,6 +7951,9 @@ static __init int hardware_setup(void)
WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
}
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ rdmsrl(MSR_IA32_XSS, host_xss);
+
if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
@@ -12609,6 +12612,10 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (!nested_cpu_has_preemption_timer(vmcs12) &&
+ nested_cpu_has_save_preemption_timer(vmcs12))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (nested_vmx_check_pml_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@@ -12745,7 +12752,7 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 *exit_qual)
{
- bool ia32e;
+ bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
*exit_qual = ENTRY_FAIL_DEFAULT;
@@ -12758,6 +12765,13 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 1;
}
+ if ((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG)
+ return 1;
+
+ if ((ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
+ (ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
+ return 1;
+
/*
* If the load IA32_EFER VM-entry control is 1, the following checks
* are performed on the field for the IA32_EFER MSR:
@@ -12769,7 +12783,6 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
*/
if (to_vmx(vcpu)->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
- ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
((vmcs12->guest_cr0 & X86_CR0_PG) &&
@@ -13865,6 +13878,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
/* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
break;
+ case x86_intercept_pause:
+ /*
+ * PAUSE is a single-byte NOP with a REPE prefix, i.e. collides
+ * with vanilla NOPs in the emulator. Apply the interception
+ * check only to actual PAUSE instructions. Don't check
+ * PAUSE-loop-exiting, software can't expect a given PAUSE to
+ * exit, i.e. KVM is within its rights to allow L2 to execute
+ * the PAUSE.
+ */
+ if ((info->rep_prefix != REPE_PREFIX) ||
+ !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING))
+ return X86EMUL_CONTINUE;
+
+ break;
+
/* TODO: check more intercepts... */
default:
break;
diff --git a/arch/x86/kvm/vmx_evmcs.h b/arch/x86/kvm/vmx/vmx_evmcs.h
index 210a884090ad..210a884090ad 100644
--- a/arch/x86/kvm/vmx_evmcs.h
+++ b/arch/x86/kvm/vmx/vmx_evmcs.h
diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx/vmx_shadow_fields.h
index cd0c75f6d037..cd0c75f6d037 100644
--- a/arch/x86/kvm/vmx_shadow_fields.h
+++ b/arch/x86/kvm/vmx/vmx_shadow_fields.h
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8fe615df8e5b..0548ae57826f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1224,6 +1224,9 @@ u64 kvm_get_arch_capabilities(void)
/* Guests don't need to know "Fill buffer clear control" exists */
data &= ~ARCH_CAP_FB_CLEAR_CTRL;
+ if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
+ data |= ARCH_CAP_GDS_NO;
+
return data;
}
@@ -2457,6 +2460,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_PATCH_LOADER:
case MSR_AMD64_BU_CFG2:
case MSR_AMD64_DC_CFG:
+ case MSR_AMD64_TW_CFG:
case MSR_F15H_EX_CFG:
break;
@@ -2760,6 +2764,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_BU_CFG2:
case MSR_IA32_PERF_CTL:
case MSR_AMD64_DC_CFG:
+ case MSR_AMD64_TW_CFG:
case MSR_F15H_EX_CFG:
msr_info->data = 0;
break;
@@ -3637,12 +3642,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
{
unsigned long val;
+ memset(dbgregs, 0, sizeof(*dbgregs));
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
kvm_get_dr(vcpu, 6, &val);
dbgregs->dr6 = val;
dbgregs->dr7 = vcpu->arch.dr7;
- dbgregs->flags = 0;
- memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index 33147fef3452..f1024b51cfee 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -22,6 +22,6 @@
*/
ENTRY(__iowrite32_copy)
movl %edx,%ecx
- rep movsd
+ rep movsl
ret
ENDPROC(__iowrite32_copy)
diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c
index a018ec4fba53..c97be9a1430a 100644
--- a/arch/x86/lib/misc.c
+++ b/arch/x86/lib/misc.c
@@ -6,7 +6,7 @@
*/
int num_digits(int val)
{
- int m = 10;
+ long long m = 10;
int d = 1;
if (val < 0) {
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index b1dba0987565..2c84c5595cf4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -9,6 +9,7 @@
#include <linux/kmemleak.h>
#include <asm/set_memory.h>
+#include <asm/cpu_device_id.h>
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/page.h>
@@ -207,6 +208,24 @@ static void __init probe_page_size_mask(void)
}
}
+#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \
+ .family = 6, \
+ .model = _model, \
+ }
+/*
+ * INVLPG may not properly flush Global entries
+ * on these CPUs when PCIDs are enabled.
+ */
+static const struct x86_cpu_id invlpg_miss_ids[] = {
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),
+ {}
+};
+
static void setup_pcid(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
@@ -215,6 +234,12 @@ static void setup_pcid(void)
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
+ if (x86_match_cpu(invlpg_miss_ids)) {
+ pr_info("Incomplete global flushes, disabling PCID");
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ return;
+ }
+
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() -- the
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 650d5a6cafc7..832c899b7b73 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -563,7 +563,8 @@ void __init sme_enable(struct boot_params *bp)
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
((u64)bp->ext_cmd_line_ptr << 32));
- cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
+ return;
if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
sme_me_mask = me_mask;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index b4ff063a4371..a830d49341ec 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -585,13 +585,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
if (start >= end)
continue;
- /*
- * Don't confuse VM with a node that doesn't have the
- * minimum amount of memory:
- */
- if (end && (end - start) < NODE_MIN_SIZE)
- continue;
-
alloc_node_data(nid);
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 76959a7d88c8..94291e0ddcb7 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -7,6 +7,7 @@
#include <linux/dmi.h>
#include <linux/pci.h>
#include <linux/vgaarb.h>
+#include <asm/amd_nb.h>
#include <asm/hpet.h>
#include <asm/pci_x86.h>
@@ -824,3 +825,23 @@ static void rs690_fix_64bit_dma(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
#endif
+
+#ifdef CONFIG_AMD_NB
+
+#define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008
+#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L
+
+static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev)
+{
+ u32 data;
+
+ if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) {
+ data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK;
+ if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data))
+ pci_err(dev, "Failed to write data 0x%x\n", data);
+ } else {
+ pci_err(dev, "Failed to read data\n");
+ }
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0);
+#endif
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2cfa0caef133..00f104e341e5 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -12,6 +12,11 @@ $(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE
$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
targets += purgatory.ro
@@ -25,7 +30,7 @@ KCOV_INSTRUMENT := n
# make up the standalone purgatory.ro
PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
-PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0
PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
@@ -56,6 +61,9 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
+AFLAGS_REMOVE_setup-x86_$(BITS).o += -g -Wa,-gdwarf-2
+AFLAGS_REMOVE_entry64.o += -g -Wa,-gdwarf-2
+
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
index 7c441b59d375..be99ff25c503 100644
--- a/arch/x86/um/vdso/um_vdso.c
+++ b/arch/x86/um/vdso/um_vdso.c
@@ -20,8 +20,10 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+ asm("syscall"
+ : "=a" (ret)
+ : "0" (__NR_clock_gettime), "D" (clock), "S" (ts)
+ : "rcx", "r11", "memory");
return ret;
}
@@ -32,8 +34,10 @@ int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ asm("syscall"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday), "D" (tv), "S" (tz)
+ : "rcx", "r11", "memory");
return ret;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7a43b2ae19f1..a1cc855c539c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,30 +32,30 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
void xen_smp_intr_free(unsigned int cpu)
{
+ kfree(per_cpu(xen_resched_irq, cpu).name);
+ per_cpu(xen_resched_irq, cpu).name = NULL;
if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
per_cpu(xen_resched_irq, cpu).irq = -1;
- kfree(per_cpu(xen_resched_irq, cpu).name);
- per_cpu(xen_resched_irq, cpu).name = NULL;
}
+ kfree(per_cpu(xen_callfunc_irq, cpu).name);
+ per_cpu(xen_callfunc_irq, cpu).name = NULL;
if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
per_cpu(xen_callfunc_irq, cpu).irq = -1;
- kfree(per_cpu(xen_callfunc_irq, cpu).name);
- per_cpu(xen_callfunc_irq, cpu).name = NULL;
}
+ kfree(per_cpu(xen_debug_irq, cpu).name);
+ per_cpu(xen_debug_irq, cpu).name = NULL;
if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
per_cpu(xen_debug_irq, cpu).irq = -1;
- kfree(per_cpu(xen_debug_irq, cpu).name);
- per_cpu(xen_debug_irq, cpu).name = NULL;
}
+ kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
+ per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
NULL);
per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
- kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
- per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
}
}
@@ -65,6 +65,7 @@ int xen_smp_intr_init(unsigned int cpu)
char *resched_name, *callfunc_name, *debug_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
+ per_cpu(xen_resched_irq, cpu).name = resched_name;
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
cpu,
xen_reschedule_interrupt,
@@ -74,9 +75,9 @@ int xen_smp_intr_init(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_resched_irq, cpu).irq = rc;
- per_cpu(xen_resched_irq, cpu).name = resched_name;
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
+ per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
cpu,
xen_call_function_interrupt,
@@ -86,18 +87,21 @@ int xen_smp_intr_init(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_callfunc_irq, cpu).irq = rc;
- per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
- debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
- rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
- IRQF_PERCPU | IRQF_NOBALANCING,
- debug_name, NULL);
- if (rc < 0)
- goto fail;
- per_cpu(xen_debug_irq, cpu).irq = rc;
- per_cpu(xen_debug_irq, cpu).name = debug_name;
+ if (!xen_fifo_events) {
+ debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+ per_cpu(xen_debug_irq, cpu).name = debug_name;
+ rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
+ xen_debug_interrupt,
+ IRQF_PERCPU | IRQF_NOBALANCING,
+ debug_name, NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_debug_irq, cpu).irq = rc;
+ }
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
cpu,
xen_call_function_single_interrupt,
@@ -107,7 +111,6 @@ int xen_smp_intr_init(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
- per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
return 0;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index e8248e8a04ca..66f83562d329 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -27,6 +27,7 @@
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/cpu.h>
+#include <asm/fpu/internal.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
@@ -58,6 +59,7 @@ static void cpu_bringup(void)
int cpu;
cpu_init();
+ fpu__init_cpu();
touch_softlockup_watchdog();
preempt_disable();
@@ -94,18 +96,18 @@ asmlinkage __visible void cpu_bringup_and_idle(void)
void xen_smp_intr_free_pv(unsigned int cpu)
{
+ kfree(per_cpu(xen_irq_work, cpu).name);
+ per_cpu(xen_irq_work, cpu).name = NULL;
if (per_cpu(xen_irq_work, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
per_cpu(xen_irq_work, cpu).irq = -1;
- kfree(per_cpu(xen_irq_work, cpu).name);
- per_cpu(xen_irq_work, cpu).name = NULL;
}
+ kfree(per_cpu(xen_pmu_irq, cpu).name);
+ per_cpu(xen_pmu_irq, cpu).name = NULL;
if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
per_cpu(xen_pmu_irq, cpu).irq = -1;
- kfree(per_cpu(xen_pmu_irq, cpu).name);
- per_cpu(xen_pmu_irq, cpu).name = NULL;
}
}
@@ -115,6 +117,7 @@ int xen_smp_intr_init_pv(unsigned int cpu)
char *callfunc_name, *pmu_name;
callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
+ per_cpu(xen_irq_work, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
cpu,
xen_irq_work_interrupt,
@@ -124,10 +127,10 @@ int xen_smp_intr_init_pv(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_irq_work, cpu).irq = rc;
- per_cpu(xen_irq_work, cpu).name = callfunc_name;
if (is_xen_pmu) {
pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+ per_cpu(xen_pmu_irq, cpu).name = pmu_name;
rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
xen_pmu_irq_handler,
IRQF_PERCPU|IRQF_NOBALANCING,
@@ -135,7 +138,6 @@ int xen_smp_intr_init_pv(unsigned int cpu)
if (rc < 0)
goto fail;
per_cpu(xen_pmu_irq, cpu).irq = rc;
- per_cpu(xen_pmu_irq, cpu).name = pmu_name;
}
return 0;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 6fffb86a32ad..e6cf1b430fd0 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -83,6 +83,7 @@ void xen_init_lock_cpu(int cpu)
cpu, per_cpu(lock_kicker_irq, cpu));
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
+ per_cpu(irq_name, cpu) = name;
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
dummy_handler,
@@ -93,7 +94,6 @@ void xen_init_lock_cpu(int cpu)
if (irq >= 0) {
disable_irq(irq); /* make sure it's never delivered */
per_cpu(lock_kicker_irq, cpu) = irq;
- per_cpu(irq_name, cpu) = name;
}
printk("cpu %d spinlock event irq %d\n", cpu, irq);
@@ -106,6 +106,8 @@ void xen_uninit_lock_cpu(int cpu)
if (!xen_pvspin)
return;
+ kfree(per_cpu(irq_name, cpu));
+ per_cpu(irq_name, cpu) = NULL;
/*
* When booting the kernel with 'mitigations=auto,nosmt', the secondary
* CPUs are not activated, and lock_kicker_irq is not initialized.
@@ -116,8 +118,6 @@ void xen_uninit_lock_cpu(int cpu)
unbind_from_irqhandler(irq, NULL);
per_cpu(lock_kicker_irq, cpu) = -1;
- kfree(per_cpu(irq_name, cpu));
- per_cpu(irq_name, cpu) = NULL;
}
PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f111f47ba98..9faec8543237 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,6 +30,8 @@ extern struct start_info *xen_start_info;
extern struct shared_info xen_dummy_shared_info;
extern struct shared_info *HYPERVISOR_shared_info;
+extern bool xen_fifo_events;
+
void xen_setup_mfn_list_list(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);