aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/alternative.c15
-rw-r--r--arch/x86/kernel/apic/apic.c5
-rw-r--r--arch/x86/kernel/apic/io_apic.c14
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c5
-rw-r--r--arch/x86/kernel/apm_32.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c220
-rw-r--r--arch/x86/kernel/cpu/bugs.c246
-rw-r--r--arch/x86/kernel/cpu/common.c119
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c26
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c55
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c6
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c8
-rw-r--r--arch/x86/kernel/cpu/topology.c5
-rw-r--r--arch/x86/kernel/crash.c17
-rw-r--r--arch/x86/kernel/dumpstack.c11
-rw-r--r--arch/x86/kernel/fpu/init.c8
-rw-r--r--arch/x86/kernel/fpu/xstate.c8
-rw-r--r--arch/x86/kernel/i8259.c39
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/kprobes/opt.c6
-rw-r--r--arch/x86/kernel/quirks.c10
-rw-r--r--arch/x86/kernel/reboot.c88
-rw-r--r--arch/x86/kernel/smp.c6
-rw-r--r--arch/x86/kernel/smpboot.c25
-rw-r--r--arch/x86/kernel/sysfb_efi.c16
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
29 files changed, 689 insertions, 290 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8b1aa1206d98..2f0fa294d617 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -156,6 +156,9 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
madt->address);
}
+ if (madt->flags & ACPI_MADT_PCAT_COMPAT)
+ legacy_pic_pcat_compat();
+
default_acpi_madt_oem_check(madt->header.oem_id,
madt->header.oem_table_id);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 918a23704c0c..33882d5ab25d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -366,6 +366,17 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
u8 insnbuf[MAX_PATCH_LEN];
DPRINTK("alt table %px, -> %px", start, end);
+
+ /*
+ * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
+ * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
+ * During the process, KASAN becomes confused seeing partial LA57
+ * conversion and triggers a false-positive out-of-bound report.
+ *
+ * Disable KASAN until the patching is complete.
+ */
+ kasan_disable_current();
+
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
@@ -426,6 +437,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insnbuf, insnbuf_sz);
}
+
+ kasan_enable_current();
}
#ifdef CONFIG_SMP
@@ -677,8 +690,8 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
} else {
local_irq_save(flags);
memcpy(addr, opcode, len);
- local_irq_restore(flags);
sync_core();
+ local_irq_restore(flags);
/*
* Could also do a CLFLUSH here to speed up CPU recovery; but
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 926939978c1c..9318fe7d850e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -403,10 +403,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
if (vector && !eilvt_entry_is_changeable(vector, new))
/* may not change if vectors are different */
return rsvd;
- rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
- } while (rsvd != new);
+ } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
- rsvd &= ~APIC_EILVT_MASKED;
+ rsvd = new & ~APIC_EILVT_MASKED;
if (rsvd && rsvd != vector)
pr_info("LVT offset %d assigned for vector 0x%02x\n",
offset, rsvd);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 677508baf95a..af59aa9c5523 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2449,17 +2449,21 @@ static int io_apic_get_redir_entries(int ioapic)
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
+ unsigned int ret;
+
/*
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- if (!ioapic_initialized)
- return gsi_top;
+ ret = ioapic_dynirq_base ? : gsi_top;
+
/*
- * For DT enabled machines ioapic_dynirq_base is irrelevant and not
- * updated. So simply return @from if ioapic_dynirq_base == 0.
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and
+ * always 0. gsi_top can be 0 if there is no IO/APIC registered.
+ * 0 is an invalid interrupt number for dynamic allocations. Return
+ * @from instead.
*/
- return ioapic_dynirq_base ? : from;
+ return ret ? : from;
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8e70c2ba21b3..fb17767552ef 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -102,7 +102,10 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
+ if (!x2apic_mode)
+ return 0;
+
+ if (x2apic_phys || x2apic_fadt_phys())
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f7151cd03cb0..3d7a8049f637 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -247,12 +247,6 @@ extern int (*console_blank_hook)(int);
#endif
/*
- * The apm_bios device is one of the misc char devices.
- * This is its minor number.
- */
-#define APM_MINOR_DEV 134
-
-/*
* Various options can be changed at boot time as follows:
* (We allow underscores for compatibility with the modules code)
* apm=on/off enable/disable APM
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e017f64e09d6..84667781c41d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -23,11 +23,6 @@
#include "cpu.h"
-static const int amd_erratum_383[];
-static const int amd_erratum_400[];
-static const int amd_erratum_1054[];
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
-
/*
* nodes_per_socket: Stores the number of nodes per socket.
* Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
@@ -35,6 +30,83 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
*/
static u32 nodes_per_socket = 1;
+/*
+ * AMD errata checking
+ *
+ * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
+ * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
+ * have an OSVW id assigned, which it takes as first argument. Both take a
+ * variable number of family-specific model-stepping ranges created by
+ * AMD_MODEL_RANGE().
+ *
+ * Example:
+ *
+ * const int amd_erratum_319[] =
+ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
+ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
+ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
+ */
+
+#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
+#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
+static const int amd_erratum_400[] =
+ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+ AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+
+static const int amd_erratum_383[] =
+ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+
+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+
+static const int amd_zenbleed[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
+ AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
+
+static const int amd_erratum_1485[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
+ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+{
+ int osvw_id = *erratum++;
+ u32 range;
+ u32 ms;
+
+ if (osvw_id >= 0 && osvw_id < 65536 &&
+ cpu_has(cpu, X86_FEATURE_OSVW)) {
+ u64 osvw_len;
+
+ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
+ if (osvw_id < osvw_len) {
+ u64 osvw_bits;
+
+ rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
+ osvw_bits);
+ return osvw_bits & (1ULL << (osvw_id & 0x3f));
+ }
+ }
+
+ /* OSVW unavailable or ID unknown, match family-model-stepping range */
+ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
+ while ((range = *erratum++))
+ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+ (ms >= AMD_MODEL_RANGE_START(range)) &&
+ (ms <= AMD_MODEL_RANGE_END(range)))
+ return true;
+
+ return false;
+}
+
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -199,6 +271,15 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
#endif
+ /*
+ * Work around Erratum 1386. The XSAVES instruction malfunctions in
+ * certain circumstances on Zen1/2 uarch, and not all parts have had
+ * updated microcode at the time of writing (March 2023).
+ *
+ * Affected parts all have no supervisor XSAVE states, meaning that
+ * the XSAVEC instruction (which works fine) is equivalent.
+ */
+ clear_cpu_cap(c, X86_FEATURE_XSAVES);
}
static void init_amd_k7(struct cpuinfo_x86 *c)
@@ -900,6 +981,47 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
}
}
+static bool cpu_has_zenbleed_microcode(void)
+{
+ u32 good_rev = 0;
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
+ case 0x60 ... 0x67: good_rev = 0x0860010b; break;
+ case 0x68 ... 0x6f: good_rev = 0x08608105; break;
+ case 0x70 ... 0x7f: good_rev = 0x08701032; break;
+ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+
+ default:
+ return false;
+ break;
+ }
+
+ if (boot_cpu_data.microcode < good_rev)
+ return false;
+
+ return true;
+}
+
+static void zenbleed_check(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_amd_erratum(c, amd_zenbleed))
+ return;
+
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (!cpu_has(c, X86_FEATURE_AVX))
+ return;
+
+ if (!cpu_has_zenbleed_microcode()) {
+ pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n");
+ msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ } else {
+ msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ }
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
early_init_amd(c);
@@ -1002,6 +1124,12 @@ static void init_amd(struct cpuinfo_x86 *c)
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
check_null_seg_clears_base(c);
+
+ zenbleed_check(c);
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ cpu_has_amd_erratum(c, amd_erratum_1485))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
}
#ifdef CONFIG_X86_32
@@ -1097,73 +1225,6 @@ static const struct cpu_dev amd_cpu_dev = {
cpu_dev_register(amd_cpu_dev);
-/*
- * AMD errata checking
- *
- * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
- * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
- * have an OSVW id assigned, which it takes as first argument. Both take a
- * variable number of family-specific model-stepping ranges created by
- * AMD_MODEL_RANGE().
- *
- * Example:
- *
- * const int amd_erratum_319[] =
- * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
- * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
- * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
- */
-
-#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
-#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
-#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
- ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
-#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
-#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
-#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
-
-static const int amd_erratum_400[] =
- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
- AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
-
-static const int amd_erratum_383[] =
- AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-
-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
-static const int amd_erratum_1054[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
-{
- int osvw_id = *erratum++;
- u32 range;
- u32 ms;
-
- if (osvw_id >= 0 && osvw_id < 65536 &&
- cpu_has(cpu, X86_FEATURE_OSVW)) {
- u64 osvw_len;
-
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
- if (osvw_id < osvw_len) {
- u64 osvw_bits;
-
- rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
- osvw_bits);
- return osvw_bits & (1ULL << (osvw_id & 0x3f));
- }
- }
-
- /* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_stepping;
- while ((range = *erratum++))
- if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
- (ms >= AMD_MODEL_RANGE_START(range)) &&
- (ms <= AMD_MODEL_RANGE_END(range)))
- return true;
-
- return false;
-}
-
void set_dr_addr_mask(unsigned long mask, int dr)
{
if (!boot_cpu_has(X86_FEATURE_BPEXT))
@@ -1182,3 +1243,18 @@ void set_dr_addr_mask(unsigned long mask, int dr)
break;
}
}
+
+static void zenbleed_check_cpu(void *unused)
+{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+
+ zenbleed_check(c);
+}
+
+void amd_check_microcode(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return;
+
+ on_each_cpu(zenbleed_check_cpu, NULL, 1);
+}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f1f57acca139..6e1acbdd27a5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -9,7 +9,6 @@
* - Andrew D. Balsa (code cleanup).
*/
#include <linux/init.h>
-#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/nospec.h>
@@ -25,9 +24,7 @@
#include <asm/msr.h>
#include <asm/vmx.h>
#include <asm/paravirt.h>
-#include <asm/alternative.h>
#include <asm/pgtable.h>
-#include <asm/set_memory.h>
#include <asm/intel-family.h>
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
@@ -47,6 +44,7 @@ static void __init md_clear_select_mitigation(void);
static void __init taa_select_mitigation(void);
static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
+static void __init gds_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -115,29 +113,24 @@ EXPORT_SYMBOL_GPL(mds_idle_clear);
DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
-void __init check_bugs(void)
+void __init cpu_select_mitigations(void)
{
- identify_boot_cpu();
-
- /*
- * identify_boot_cpu() initialized SMT support information, let the
- * core code know.
- */
- cpu_smt_check_topology();
-
- if (!IS_ENABLED(CONFIG_SMP)) {
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
- }
-
/*
* Read the SPEC_CTRL MSR to account for reserved bits which may
* have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
* init code as it is not enumerated and depends on the family.
*/
- if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) {
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ /*
+ * Previously running kernel (kexec), may have some controls
+ * turned ON. Clear them and let the mitigations setup below
+ * rediscover them based on configuration.
+ */
+ x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
+ }
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -157,39 +150,7 @@ void __init check_bugs(void)
l1tf_select_mitigation();
md_clear_select_mitigation();
srbds_select_mitigation();
-
- arch_smt_update();
-
-#ifdef CONFIG_X86_32
- /*
- * Check whether we are able to run this kernel safely on SMP.
- *
- * - i386 is no longer supported.
- * - In order to run on anything without a TSC, we need to be
- * compiled for a i486.
- */
- if (boot_cpu_data.x86 < 4)
- panic("Kernel requires i486+ for 'invlpg' and other features");
-
- init_utsname()->machine[1] =
- '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
- alternative_instructions();
-
- fpu__init_check_bugs();
-#else /* CONFIG_X86_64 */
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-#endif
+ gds_select_mitigation();
}
/*
@@ -641,6 +602,149 @@ static int __init srbds_parse_cmdline(char *str)
early_param("srbds", srbds_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "GDS: " fmt
+
+enum gds_mitigations {
+ GDS_MITIGATION_OFF,
+ GDS_MITIGATION_UCODE_NEEDED,
+ GDS_MITIGATION_FORCE,
+ GDS_MITIGATION_FULL,
+ GDS_MITIGATION_FULL_LOCKED,
+ GDS_MITIGATION_HYPERVISOR,
+};
+
+#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
+#else
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
+#endif
+
+static const char * const gds_strings[] = {
+ [GDS_MITIGATION_OFF] = "Vulnerable",
+ [GDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+ [GDS_MITIGATION_FORCE] = "Mitigation: AVX disabled, no microcode",
+ [GDS_MITIGATION_FULL] = "Mitigation: Microcode",
+ [GDS_MITIGATION_FULL_LOCKED] = "Mitigation: Microcode (locked)",
+ [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status",
+};
+
+bool gds_ucode_mitigated(void)
+{
+ return (gds_mitigation == GDS_MITIGATION_FULL ||
+ gds_mitigation == GDS_MITIGATION_FULL_LOCKED);
+}
+EXPORT_SYMBOL_GPL(gds_ucode_mitigated);
+
+void update_gds_msr(void)
+{
+ u64 mcu_ctrl_after;
+ u64 mcu_ctrl;
+
+ switch (gds_mitigation) {
+ case GDS_MITIGATION_OFF:
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ mcu_ctrl |= GDS_MITG_DIS;
+ break;
+ case GDS_MITIGATION_FULL_LOCKED:
+ /*
+ * The LOCKED state comes from the boot CPU. APs might not have
+ * the same state. Make sure the mitigation is enabled on all
+ * CPUs.
+ */
+ case GDS_MITIGATION_FULL:
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ mcu_ctrl &= ~GDS_MITG_DIS;
+ break;
+ case GDS_MITIGATION_FORCE:
+ case GDS_MITIGATION_UCODE_NEEDED:
+ case GDS_MITIGATION_HYPERVISOR:
+ return;
+ };
+
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+ /*
+ * Check to make sure that the WRMSR value was not ignored. Writes to
+ * GDS_MITG_DIS will be ignored if this processor is locked but the boot
+ * processor was not.
+ */
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
+ WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
+}
+
+static void __init gds_select_mitigation(void)
+{
+ u64 mcu_ctrl;
+
+ if (!boot_cpu_has_bug(X86_BUG_GDS))
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ gds_mitigation = GDS_MITIGATION_HYPERVISOR;
+ goto out;
+ }
+
+ if (cpu_mitigations_off())
+ gds_mitigation = GDS_MITIGATION_OFF;
+ /* Will verify below that mitigation _can_ be disabled */
+
+ /* No microcode */
+ if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (gds_mitigation == GDS_MITIGATION_FORCE) {
+ /*
+ * This only needs to be done on the boot CPU so do it
+ * here rather than in update_gds_msr()
+ */
+ setup_clear_cpu_cap(X86_FEATURE_AVX);
+ pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
+ } else {
+ gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
+ }
+ goto out;
+ }
+
+ /* Microcode has mitigation, use it */
+ if (gds_mitigation == GDS_MITIGATION_FORCE)
+ gds_mitigation = GDS_MITIGATION_FULL;
+
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ if (mcu_ctrl & GDS_MITG_LOCKED) {
+ if (gds_mitigation == GDS_MITIGATION_OFF)
+ pr_warn("Mitigation locked. Disable failed.\n");
+
+ /*
+ * The mitigation is selected from the boot CPU. All other CPUs
+ * _should_ have the same state. If the boot CPU isn't locked
+ * but others are then update_gds_msr() will WARN() of the state
+ * mismatch. If the boot CPU is locked update_gds_msr() will
+ * ensure the other CPUs have the mitigation enabled.
+ */
+ gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
+ }
+
+ update_gds_msr();
+out:
+ pr_info("%s\n", gds_strings[gds_mitigation]);
+}
+
+static int __init gds_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!boot_cpu_has_bug(X86_BUG_GDS))
+ return 0;
+
+ if (!strcmp(str, "off"))
+ gds_mitigation = GDS_MITIGATION_OFF;
+ else if (!strcmp(str, "force"))
+ gds_mitigation = GDS_MITIGATION_FORCE;
+
+ return 0;
+}
+early_param("gather_data_sampling", gds_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
enum spectre_v1_mitigation {
@@ -975,14 +1079,18 @@ spectre_v2_parse_user_cmdline(void)
return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
{
- return mode == SPECTRE_V2_IBRS ||
- mode == SPECTRE_V2_EIBRS ||
+ return mode == SPECTRE_V2_EIBRS ||
mode == SPECTRE_V2_EIBRS_RETPOLINE ||
mode == SPECTRE_V2_EIBRS_LFENCE;
}
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+{
+ return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS;
+}
+
static void __init
spectre_v2_user_select_mitigation(void)
{
@@ -1045,12 +1153,19 @@ spectre_v2_user_select_mitigation(void)
}
/*
- * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
- * STIBP is not required.
+ * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
+ * is not required.
+ *
+ * Enhanced IBRS also protects against cross-thread branch target
+ * injection in user-mode as the IBRS bit remains always set which
+ * implicitly enables cross-thread protections. However, in legacy IBRS
+ * mode, the IBRS bit is set only on kernel entry and cleared on return
+ * to userspace. This disables the implicit cross-thread protection,
+ * so allow for STIBP to be selected in that case.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+ spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return;
/*
@@ -1778,6 +1893,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
task_update_spec_tif(task);
+ if (task == current)
+ indirect_branch_prediction_barrier();
break;
default:
return -ERANGE;
@@ -2100,7 +2217,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -2175,6 +2292,11 @@ static ssize_t retbleed_show_state(char *buf)
return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
}
+static ssize_t gds_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2224,6 +2346,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_RETBLEED:
return retbleed_show_state(buf);
+ case X86_BUG_GDS:
+ return gds_show_state(buf);
+
default:
break;
}
@@ -2288,4 +2413,9 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha
{
return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
}
+
+ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 55293e5dcbff..d315e928b95c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,14 +13,20 @@
#include <linux/sched/mm.h>
#include <linux/sched/clock.h>
#include <linux/sched/task.h>
+#include <linux/sched/smt.h>
#include <linux/init.h>
#include <linux/kprobes.h>
#include <linux/kgdb.h>
+#include <linux/mem_encrypt.h>
#include <linux/smp.h>
+#include <linux/cpu.h>
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <asm/stackprotector.h>
+#include <linux/utsname.h>
+
+#include <asm/alternative.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/archrandom.h>
@@ -56,6 +62,7 @@
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
#endif
+#include <asm/set_memory.h>
#include "cpu.h"
@@ -1040,6 +1047,12 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define MMIO_SBDS BIT(2)
/* CPU is affected by RETbleed, speculating where you would not expect it */
#define RETBLEED BIT(3)
+/* CPU is affected by SMT (cross-thread) return predictions */
+#define SMT_RSB BIT(4)
+/* CPU is affected by SRSO */
+#define SRSO BIT(5)
+/* CPU is affected by GDS */
+#define GDS BIT(6)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1052,18 +1065,20 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(CANNONLAKE_MOBILE,X86_STEPPING_ANY, RETBLEED),
- VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPING_ANY, MMIO | GDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
@@ -1186,6 +1201,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!(ia32_cap & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
+ /*
+ * Check if CPU is vulnerable to GDS. If running in a virtual machine on
+ * an affected processor, the VMM may have disabled the use of GATHER by
+ * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
+ * which means that AVX will be disabled.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ boot_cpu_has(X86_FEATURE_AVX))
+ setup_force_cpu_bug(X86_BUG_GDS);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1271,8 +1296,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_set_bug_bits(c);
- fpu__init_system(c);
-
#ifdef CONFIG_X86_32
/*
* Regardless of whether PCID is enumerated, the SDM says
@@ -1661,6 +1684,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
validate_apic_and_package_id(c);
x86_spec_ctrl_setup_ap();
update_srbds_msr();
+ if (boot_cpu_has_bug(X86_BUG_GDS))
+ update_gds_msr();
}
static __init int setup_noclflush(char *arg)
@@ -1978,8 +2003,6 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- fpu__init_cpu();
-
if (is_uv_system())
uv_cpu_init();
@@ -2043,8 +2066,6 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- fpu__init_cpu();
-
load_fixmap_gdt(cpu);
}
#endif
@@ -2077,6 +2098,8 @@ void microcode_check(void)
perf_check_microcode();
+ amd_check_microcode();
+
/* Reload CPUID max function as it might've changed. */
info.cpuid_level = cpuid_eax(0);
@@ -2095,3 +2118,69 @@ void microcode_check(void)
pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ identify_boot_cpu();
+
+ /*
+ * identify_boot_cpu() initialized SMT support information, let the
+ * core code know.
+ */
+ cpu_smt_check_topology();
+
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+ cpu_select_mitigations();
+
+ arch_smt_update();
+
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ /*
+ * Check whether this is a real i386 which is not longer
+ * supported and fixup the utsname.
+ */
+ if (boot_cpu_data.x86 < 4)
+ panic("Kernel requires i486+ for 'invlpg' and other features");
+
+ init_utsname()->machine[1] =
+ '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+ }
+
+ /*
+ * Must be before alternatives because it might set or clear
+ * feature bits.
+ */
+ fpu__init_system();
+ fpu__init_cpu();
+
+ alternative_instructions();
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+ } else {
+ fpu__init_check_bugs();
+ }
+
+ /*
+ * This needs to be called before any devices perform DMA
+ * operations that might use the SWIOTLB bounce buffers. It will
+ * mark the bounce buffers as decrypted so that their usage will
+ * not cause "plain-text" data to be decrypted when accessed. It
+ * must be called after late_time_init() so that Hyper-V x86/x64
+ * hypercalls work when the SWIOTLB bounce buffers are decrypted.
+ */
+ mem_encrypt_init();
+}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 4eb9bf68b122..ca1b8bf380a2 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -79,9 +79,11 @@ extern void detect_ht(struct cpuinfo_x86 *c);
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
+void cpu_select_mitigations(void);
extern void x86_spec_ctrl_setup_ap(void);
extern void update_srbds_msr(void);
+extern void update_gds_msr(void);
extern u64 x86_read_arch_cap_msr(void);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index f406e3b85bdb..1125f752f126 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -585,8 +585,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
+ * This pairs with the full barrier between the rq->curr update and
+ * resctrl_sched_in() during context switch.
*/
- barrier();
+ smp_mb();
/*
* By now, the task's closid and rmid are set. If the task is current
@@ -2140,19 +2142,23 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
t->closid = to->closid;
t->rmid = to->mon.rmid;
-#ifdef CONFIG_SMP
/*
- * This is safe on x86 w/o barriers as the ordering
- * of writing to task_cpu() and t->on_cpu is
- * reverse to the reading here. The detection is
- * inaccurate as tasks might move or schedule
- * before the smp function call takes place. In
- * such a case the function call is pointless, but
+ * Order the closid/rmid stores above before the loads
+ * in task_curr(). This pairs with the full barrier
+ * between the rq->curr update and resctrl_sched_in()
+ * during context switch.
+ */
+ smp_mb();
+
+ /*
+ * If the task is on a CPU, set the CPU in the mask.
+ * The detection is inaccurate as tasks might move or
+ * schedule before the smp function call takes place.
+ * In such a case the function call is pointless, but
* there is no other side effect.
*/
- if (mask && t->on_cpu)
+ if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
cpumask_set_cpu(task_cpu(t), mask);
-#endif
}
}
read_unlock(&tasklist_lock);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 8396c77e9323..5698e04803b5 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -54,7 +54,9 @@ struct cont_desc {
};
static u32 ucode_new_rev;
-static u8 amd_ucode_patch[PATCH_MAX_SIZE];
+
+/* One blob per node. */
+static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE];
/*
* Microcode patch container file is prepended to the initrd in cpio
@@ -210,7 +212,7 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p
patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
#else
new_rev = &ucode_new_rev;
- patch = &amd_ucode_patch;
+ patch = &amd_ucode_patch[0];
#endif
desc.cpuid_1_eax = cpuid_1_eax;
@@ -329,8 +331,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false);
}
-static enum ucode_state
-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
+static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
{
@@ -348,19 +349,19 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
if (!desc.mc)
return -EINVAL;
- ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
+ ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
if (ret > UCODE_UPDATED)
return -EINVAL;
return 0;
}
-void reload_ucode_amd(void)
+void reload_ucode_amd(unsigned int cpu)
{
- struct microcode_amd *mc;
u32 rev, dummy;
+ struct microcode_amd *mc;
- mc = (struct microcode_amd *)amd_ucode_patch;
+ mc = (struct microcode_amd *)amd_ucode_patch[cpu_to_node(cpu)];
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
@@ -531,7 +532,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
/* need to apply patch? */
- if (rev >= mc_amd->hdr.patch_id) {
+ if (rev > mc_amd->hdr.patch_id) {
ret = UCODE_OK;
goto out;
}
@@ -698,9 +699,10 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
return UCODE_OK;
}
-static enum ucode_state
-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
+static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
{
+ struct cpuinfo_x86 *c;
+ unsigned int nid, cpu;
struct ucode_patch *p;
enum ucode_state ret;
@@ -713,22 +715,22 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
return ret;
}
- p = find_patch(0);
- if (!p) {
- return ret;
- } else {
- if (boot_cpu_data.microcode >= p->patch_id)
- return ret;
+ for_each_node(nid) {
+ cpu = cpumask_first(cpumask_of_node(nid));
+ c = &cpu_data(cpu);
- ret = UCODE_NEW;
- }
+ p = find_patch(cpu);
+ if (!p)
+ continue;
- /* save BSP's matching patch for early load */
- if (!save)
- return ret;
+ if (c->microcode >= p->patch_id)
+ continue;
- memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
- memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+ ret = UCODE_NEW;
+
+ memset(&amd_ucode_patch[nid], 0, PATCH_MAX_SIZE);
+ memcpy(&amd_ucode_patch[nid], p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+ }
return ret;
}
@@ -754,12 +756,11 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
struct cpuinfo_x86 *c = &cpu_data(cpu);
- bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
enum ucode_state ret = UCODE_NFOUND;
const struct firmware *fw;
/* reload ucode container only on the boot cpu */
- if (!refresh_fw || !bsp)
+ if (!refresh_fw)
return UCODE_OK;
if (c->x86 >= 0x15)
@@ -776,7 +777,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
goto fw_release;
}
- ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
+ ret = load_microcode_amd(c->x86, fw->data, fw->size);
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 985ef98c8ba2..963b989710f9 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -326,7 +326,7 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
#endif
}
-void reload_early_microcode(void)
+void reload_early_microcode(unsigned int cpu)
{
int vendor, family;
@@ -340,7 +340,7 @@ void reload_early_microcode(void)
break;
case X86_VENDOR_AMD:
if (family >= 0x10)
- reload_ucode_amd();
+ reload_ucode_amd(cpu);
break;
default:
break;
@@ -783,7 +783,7 @@ void microcode_bsp_resume(void)
if (uci->valid && uci->mc)
microcode_ops->apply_microcode(cpu);
else if (!uci->mc)
- reload_early_microcode();
+ reload_early_microcode(cpu);
}
static struct syscore_ops mc_syscore_ops = {
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 3aa0e5a45303..31ad79a0c6f3 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -662,7 +662,6 @@ void load_ucode_intel_ap(void)
else
iup = &intel_ucode_patch;
-reget:
if (!*iup) {
patch = __load_ucode_intel(&uci);
if (!patch)
@@ -673,12 +672,7 @@ reget:
uci.mc = *iup;
- if (apply_microcode_early(&uci, true)) {
- /* Mixed-silicon system? Try to refetch the proper patch: */
- *iup = NULL;
-
- goto reget;
- }
+ apply_microcode_early(&uci, true);
}
static struct microcode_intel *find_patch(struct ucode_cpu_info *uci)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 71ca064e3794..31fe56a90cbf 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -44,7 +44,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
* initial apic id, which also represents 32-bit extended x2apic id.
*/
c->initial_apicid = edx;
- smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
#endif
return 0;
}
@@ -68,7 +68,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
* Populate HT related information from sub-leaf level 0.
*/
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
- core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 91b3483e5085..e40bb50c5c4f 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,7 +35,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
/* Used while preparing memory map entries for second kernel */
@@ -86,15 +85,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -153,12 +143,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2b5886401e5f..7e698c45760c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -171,7 +171,6 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack);
- stack = stack ? : get_stack_pointer(task, regs);
regs = unwind_get_entry_regs(&state, &partial);
/*
@@ -190,9 +189,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - hardirq stack
* - entry stack
*/
- for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ for (stack = stack ?: get_stack_pointer(task, regs);
+ stack;
+ stack = stack_info.next_sp) {
const char *stack_name;
+ stack = PTR_ALIGN(stack, sizeof(long));
+
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
/*
* We weren't on a valid stack. It's possible that
@@ -326,7 +329,7 @@ unsigned long oops_begin(void)
}
NOKPROBE_SYMBOL(oops_begin);
-void __noreturn rewind_stack_do_exit(int signr);
+void __noreturn rewind_stack_and_make_dead(int signr);
void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
@@ -361,7 +364,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
* reuse the task stack and that existing poisons are invalid.
*/
kasan_unpoison_task_stack(current);
- rewind_stack_do_exit(signr);
+ rewind_stack_and_make_dead(signr);
}
NOKPROBE_SYMBOL(oops_end);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 9692ccc583bb..644372a10c89 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -49,7 +49,7 @@ void fpu__init_cpu(void)
fpu__init_cpu_xstate();
}
-static bool fpu__probe_without_cpuid(void)
+static bool __init fpu__probe_without_cpuid(void)
{
unsigned long cr0;
u16 fsw, fcw;
@@ -67,7 +67,7 @@ static bool fpu__probe_without_cpuid(void)
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
-static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
+static void __init fpu__init_system_early_generic(void)
{
if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
@@ -297,10 +297,10 @@ static void __init fpu__init_parse_early_param(void)
* Called on the boot CPU once per system bootup, to set up the initial
* FPU state that is later cloned into all processes:
*/
-void __init fpu__init_system(struct cpuinfo_x86 *c)
+void __init fpu__init_system(void)
{
fpu__init_parse_early_param();
- fpu__init_system_early_generic(c);
+ fpu__init_system_early_generic();
/*
* The FPU has to be operational for some of the
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 7d372db8bee1..e33b732ad337 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -811,6 +811,14 @@ void __init fpu__init_system_xstate(void)
fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
setup_xstate_comp();
+
+ /*
+ * CPU capabilities initialization runs before FPU init. So
+ * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
+ * functional, set the feature bit so depending code works.
+ */
+ setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
+
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index fe522691ac71..82753622f489 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,6 +32,7 @@
*/
static void init_8259A(int auto_eoi);
+static bool pcat_compat __ro_after_init;
static int i8259A_auto_eoi;
DEFINE_RAW_SPINLOCK(i8259A_lock);
@@ -114,6 +115,7 @@ static void make_8259A_irq(unsigned int irq)
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<<irq);
irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
+ irq_set_status_flags(irq, IRQ_LEVEL);
enable_irq(irq);
lapic_assign_legacy_vector(irq, true);
}
@@ -300,15 +302,32 @@ static void unmask_8259A(void)
static int probe_8259A(void)
{
+ unsigned char new_val, probe_val = ~(1 << PIC_CASCADE_IR);
unsigned long flags;
- unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
- unsigned char new_val;
+
+ /*
+ * If MADT has the PCAT_COMPAT flag set, then do not bother probing
+ * for the PIC. Some BIOSes leave the PIC uninitialized and probing
+ * fails.
+ *
+ * Right now this causes problems as quite some code depends on
+ * nr_legacy_irqs() > 0 or has_legacy_pic() == true. This is silly
+ * when the system has an IO/APIC because then PIC is not required
+ * at all, except for really old machines where the timer interrupt
+ * must be routed through the PIC. So just pretend that the PIC is
+ * there and let legacy_pic->init() initialize it for nothing.
+ *
+ * Alternatively this could just try to initialize the PIC and
+ * repeat the probe, but for cases where there is no PIC that's
+ * just pointless.
+ */
+ if (pcat_compat)
+ return nr_legacy_irqs();
+
/*
- * Check to see if we have a PIC.
- * Mask all except the cascade and read
- * back the value we just wrote. If we don't
- * have a PIC, we will read 0xff as opposed to the
- * value we wrote.
+ * Check to see if we have a PIC. Mask all except the cascade and
+ * read back the value we just wrote. If we don't have a PIC, we
+ * will read 0xff as opposed to the value we wrote.
*/
raw_spin_lock_irqsave(&i8259A_lock, flags);
@@ -430,5 +449,9 @@ static int __init i8259A_init_ops(void)
return 0;
}
-
device_initcall(i8259A_init_ops);
+
+void __init legacy_pic_pcat_compat(void)
+{
+ pcat_compat = true;
+}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index a0693b71cfc1..f2c215e1f64c 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -72,8 +72,10 @@ void __init init_ISA_irqs(void)
legacy_pic->init(0);
- for (i = 0; i < nr_legacy_irqs(); i++)
+ for (i = 0; i < nr_legacy_irqs(); i++) {
irq_set_chip_and_handler(i, chip, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
+ }
}
void __init init_IRQ(void)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 544bd41a514c..36b5a493e5b4 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -56,8 +56,8 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
/* This function only handles jump-optimized kprobe */
if (kp && kprobe_optimized(kp)) {
op = container_of(kp, struct optimized_kprobe, kp);
- /* If op->list is not empty, op is under optimizing */
- if (list_empty(&op->list))
+ /* If op is optimized or under unoptimizing */
+ if (list_empty(&op->list) || optprobe_queued_unopt(op))
goto found;
}
}
@@ -328,7 +328,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
for (i = 1; i < op->optinsn.size; i++) {
p = get_kprobe(op->kp.addr + i);
- if (p && !kprobe_disabled(p))
+ if (p && !kprobe_disarmed(p))
return -EEXIST;
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 736348ead421..2ecf1dcc86b2 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -96,7 +96,7 @@ static void ich_force_hpet_resume(void)
static void ich_force_enable_hpet(struct pci_dev *dev)
{
u32 val;
- u32 uninitialized_var(rcba);
+ u32 rcba;
int err = 0;
if (hpet_address || force_hpet_address)
@@ -186,7 +186,7 @@ static void hpet_print_force_info(void)
static void old_ich_force_hpet_resume(void)
{
u32 val;
- u32 uninitialized_var(gen_cntl);
+ u32 gen_cntl;
if (!force_hpet_address || !cached_dev)
return;
@@ -208,7 +208,7 @@ static void old_ich_force_hpet_resume(void)
static void old_ich_force_enable_hpet(struct pci_dev *dev)
{
u32 val;
- u32 uninitialized_var(gen_cntl);
+ u32 gen_cntl;
if (hpet_address || force_hpet_address)
return;
@@ -299,7 +299,7 @@ static void vt8237_force_hpet_resume(void)
static void vt8237_force_enable_hpet(struct pci_dev *dev)
{
- u32 uninitialized_var(val);
+ u32 val;
if (hpet_address || force_hpet_address)
return;
@@ -430,7 +430,7 @@ static void nvidia_force_hpet_resume(void)
static void nvidia_force_enable_hpet(struct pci_dev *dev)
{
- u32 uninitialized_var(val);
+ u32 val;
if (hpet_address || force_hpet_address)
return;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index b0f3a996df15..444b8a691c14 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -536,33 +536,29 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
-/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
-static void emergency_vmx_disable_all(void)
+static void emergency_reboot_disable_virtualization(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/*
- * Disable VMX on all CPUs before rebooting, otherwise we risk hanging
- * the machine, because the CPU blocks INIT when it's in VMX root.
+ * Disable virtualization on all CPUs before rebooting to avoid hanging
+ * the system, as VMX and SVM block INIT when running in the host.
*
* We can't take any locks and we may be on an inconsistent state, so
- * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
+ * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
*
- * Do the NMI shootdown even if VMX if off on _this_ CPU, as that
- * doesn't prevent a different CPU from being in VMX root operation.
+ * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
+ * other CPUs may have virtualization enabled.
*/
- if (cpu_has_vmx()) {
- /* Safely force _this_ CPU out of VMX root operation. */
- __cpu_emergency_vmxoff();
+ if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+ /* Safely force _this_ CPU out of VMX/SVM operation. */
+ cpu_emergency_disable_virtualization();
- /* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ /* Disable VMX/SVM and halt on other CPUs. */
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -599,7 +595,7 @@ static void native_machine_emergency_restart(void)
unsigned short mode;
if (reboot_emergency)
- emergency_vmx_disable_all();
+ emergency_reboot_disable_virtualization();
tboot_shutdown(TB_SHUTDOWN_REBOOT);
@@ -804,6 +800,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -826,7 +833,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -842,18 +856,32 @@ static void smp_send_nmi_allbutself(void)
apic->send_IPI_allbutself(NMI_VECTOR);
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -881,7 +909,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -911,6 +949,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index b2b87b91f336..c94ed0b37bcd 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -33,7 +33,7 @@
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
#include <asm/kexec.h>
-#include <asm/virtext.h>
+#include <asm/reboot.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -163,7 +163,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
return NMI_HANDLED;
@@ -176,7 +176,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
asmlinkage __visible void smp_reboot_interrupt(void)
{
ipi_entering_ack_irq();
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
irq_exit();
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8783d065f927..2e4f6a1ebca5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -96,6 +96,17 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
+struct mwait_cpu_dead {
+ unsigned int control;
+ unsigned int status;
+};
+
+/*
+ * Cache line aligned data for mwait_play_dead(). Separate on purpose so
+ * that it's unlikely to be touched by other CPUs.
+ */
+static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
+
/* Logical package management. We might want to allocate that dynamically */
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
@@ -231,6 +242,7 @@ static void notrace start_secondary(void *unused)
#endif
load_current_idt();
cpu_init();
+ fpu__init_cpu();
x86_cpuinit.early_percpu_clock_init();
preempt_disable();
smp_callin();
@@ -1594,10 +1606,10 @@ static bool wakeup_cpu0(void)
*/
static inline void mwait_play_dead(void)
{
+ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
- void *mwait_ptr;
int i;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
@@ -1631,13 +1643,6 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
- /*
- * This should be a memory location in a cache line which is
- * unlikely to be touched by other processors. The actual
- * content is immaterial as it is not actually modified in any way.
- */
- mwait_ptr = &current_thread_info()->flags;
-
wbinvd();
while (1) {
@@ -1649,9 +1654,9 @@ static inline void mwait_play_dead(void)
* case where we return around the loop.
*/
mb();
- clflush(mwait_ptr);
+ clflush(md);
mb();
- __monitor(mwait_ptr, 0, 0);
+ __monitor(md, 0, 0);
mb();
__mwait(eax, 0);
/*
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 897da526e40e..5bc0fedb3342 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -265,6 +265,22 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
"Lenovo ideapad D330-10IGM"),
},
},
+ {
+ /* Lenovo IdeaPad Duet 3 10IGL5 with 1200x1920 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "IdeaPad Duet 3 10IGL5"),
+ },
+ },
+ {
+ /* Lenovo Yoga Book X91F / X91L */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ /* Non exact match to match F + L versions */
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
+ },
+ },
{},
};
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ae9e806a11de..3b87bca027b4 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -735,8 +735,9 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
switch (opc1) {
case 0xeb: /* jmp 8 */
case 0xe9: /* jmp 32 */
- case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
break;
+ case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
+ goto setup;
case 0xe8: /* call relative */
branch_clear_offset(auprobe, insn);
@@ -765,6 +766,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
return -ENOTSUPP;
}
+setup:
auprobe->branch.opc1 = opc1;
auprobe->branch.ilen = insn->length;
auprobe->branch.offs = insn->immediate.value;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 34c0652ca8b4..20d09355c9e0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -383,7 +383,7 @@ SECTIONS
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
__brk_base = .;
. += 64 * 1024; /* 64k alignment slop space */
- *(.brk_reservation) /* areas brk users have reserved */
+ *(.bss..brk) /* areas brk users have reserved */
__brk_limit = .;
}