aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch424
1 files changed, 0 insertions, 424 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch
deleted file mode 100644
index 1cff10af..00000000
--- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch
+++ /dev/null
@@ -1,424 +0,0 @@
-From d26480ad859d58897cd409ed66ff4bc5e3ba079d Mon Sep 17 00:00:00 2001
-From: Hugh Dickins <hughd@google.com>
-Date: Wed, 30 Aug 2017 16:23:00 -0700
-Subject: [PATCH 020/103] kaiser: enhanced by kernel and user PCIDs
-
-Merged performance improvements to Kaiser, using distinct kernel
-and user Process Context Identifiers to minimize the TLB flushing.
-
-[This work actually all from Dave Hansen 2017-08-30:
-still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Hugh Dickins <hughd@google.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/entry/entry_64.S | 10 ++++--
- arch/x86/entry/entry_64_compat.S | 1 +
- arch/x86/include/asm/cpufeatures.h | 1 +
- arch/x86/include/asm/kaiser.h | 15 +++++++--
- arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++++
- arch/x86/include/asm/tlbflush.h | 52 ++++++++++++++++++++++++-----
- arch/x86/include/uapi/asm/processor-flags.h | 3 +-
- arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++++
- arch/x86/kvm/x86.c | 3 +-
- arch/x86/mm/kaiser.c | 7 ++++
- arch/x86/mm/tlb.c | 46 +++++++++++++++++++++++--
- 11 files changed, 181 insertions(+), 17 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index df33f10..4a0ebf4 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -1315,7 +1315,10 @@ ENTRY(nmi)
- /* %rax is saved above, so OK to clobber here */
- movq %cr3, %rax
- pushq %rax
-- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
-+ /* mask off "user" bit of pgd address and 12 PCID bits: */
-+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
-+ /* Add back kernel PCID and "no flush" bit */
-+ orq X86_CR3_PCID_KERN_VAR, %rax
- movq %rax, %cr3
- #endif
- call do_nmi
-@@ -1556,7 +1559,10 @@ end_repeat_nmi:
- /* %rax is saved above, so OK to clobber here */
- movq %cr3, %rax
- pushq %rax
-- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
-+ /* mask off "user" bit of pgd address and 12 PCID bits: */
-+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
-+ /* Add back kernel PCID and "no flush" bit */
-+ orq X86_CR3_PCID_KERN_VAR, %rax
- movq %rax, %cr3
- #endif
-
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index f0e384e..0eb5801 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -13,6 +13,7 @@
- #include <asm/irqflags.h>
- #include <asm/asm.h>
- #include <asm/smap.h>
-+#include <asm/pgtable_types.h>
- #include <asm/kaiser.h>
- #include <linux/linkage.h>
- #include <linux/err.h>
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index ed10b5b..dc50883 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -189,6 +189,7 @@
-
- #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
- #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
-
- #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
- #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
-index e0fc45e..360ff3b 100644
---- a/arch/x86/include/asm/kaiser.h
-+++ b/arch/x86/include/asm/kaiser.h
-@@ -1,5 +1,8 @@
- #ifndef _ASM_X86_KAISER_H
- #define _ASM_X86_KAISER_H
-+
-+#include <uapi/asm/processor-flags.h> /* For PCID constants */
-+
- /*
- * This file includes the definitions for the KAISER feature.
- * KAISER is a counter measure against x86_64 side channel attacks on
-@@ -21,13 +24,21 @@
-
- .macro _SWITCH_TO_KERNEL_CR3 reg
- movq %cr3, \reg
--andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
-+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
-+orq X86_CR3_PCID_KERN_VAR, \reg
- movq \reg, %cr3
- .endm
-
- .macro _SWITCH_TO_USER_CR3 reg
- movq %cr3, \reg
--orq $(KAISER_SHADOW_PGD_OFFSET), \reg
-+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
-+/*
-+ * This can obviously be one instruction by putting the
-+ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
-+ * But, just leave it now for simplicity.
-+ */
-+orq X86_CR3_PCID_USER_VAR, \reg
-+orq $(KAISER_SHADOW_PGD_OFFSET), \reg
- movq \reg, %cr3
- .endm
-
-diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
-index 8bc8d02..ada77fd 100644
---- a/arch/x86/include/asm/pgtable_types.h
-+++ b/arch/x86/include/asm/pgtable_types.h
-@@ -141,6 +141,32 @@
- _PAGE_SOFT_DIRTY)
- #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
-
-+/* The ASID is the lower 12 bits of CR3 */
-+#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL))
-+
-+/* Mask for all the PCID-related bits in CR3: */
-+#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
-+#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
-+#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL))
-+#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL))
-+
-+#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
-+#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
-+#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
-+#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
-+#else
-+#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
-+#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
-+/*
-+ * PCIDs are unsupported on 32-bit and none of these bits can be
-+ * set in CR3:
-+ */
-+#define X86_CR3_PCID_KERN_FLUSH (0)
-+#define X86_CR3_PCID_USER_FLUSH (0)
-+#define X86_CR3_PCID_KERN_NOFLUSH (0)
-+#define X86_CR3_PCID_USER_NOFLUSH (0)
-+#endif
-+
- /*
- * The cache modes defined here are used to translate between pure SW usage
- * and the HW defined cache mode bits and/or PAT entries.
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index c13041e..28b4182 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -12,7 +12,6 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr,
- unsigned long type)
- {
- struct { u64 d[2]; } desc = { { pcid, addr } };
--
- /*
- * The memory clobber is because the whole point is to invalidate
- * stale TLB entries and, especially if we're flushing global
-@@ -135,14 +134,25 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
-
- static inline void __native_flush_tlb(void)
- {
-+ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
-+ /*
-+ * If current->mm == NULL then we borrow a mm which may change during a
-+ * task switch and therefore we must not be preempted while we write CR3
-+ * back:
-+ */
-+ preempt_disable();
-+ native_write_cr3(native_read_cr3());
-+ preempt_enable();
-+ return;
-+ }
- /*
-- * If current->mm == NULL then we borrow a mm which may change during a
-- * task switch and therefore we must not be preempted while we write CR3
-- * back:
-+ * We are no longer using globals with KAISER, so a
-+ * "nonglobals" flush would work too. But, this is more
-+ * conservative.
-+ *
-+ * Note, this works with CR4.PCIDE=0 or 1.
- */
-- preempt_disable();
-- native_write_cr3(native_read_cr3());
-- preempt_enable();
-+ invpcid_flush_all();
- }
-
- static inline void __native_flush_tlb_global_irq_disabled(void)
-@@ -164,6 +174,8 @@ static inline void __native_flush_tlb_global(void)
- /*
- * Using INVPCID is considerably faster than a pair of writes
- * to CR4 sandwiched inside an IRQ flag save/restore.
-+ *
-+ * Note, this works with CR4.PCIDE=0 or 1.
- */
- invpcid_flush_all();
- return;
-@@ -183,7 +195,31 @@ static inline void __native_flush_tlb_global(void)
-
- static inline void __native_flush_tlb_single(unsigned long addr)
- {
-- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
-+ /*
-+ * SIMICS #GP's if you run INVPCID with type 2/3
-+ * and X86_CR4_PCIDE clear. Shame!
-+ *
-+ * The ASIDs used below are hard-coded. But, we must not
-+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
-+ * invpcid in the case we are called early.
-+ */
-+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
-+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
-+ return;
-+ }
-+ /* Flush the address out of both PCIDs. */
-+ /*
-+ * An optimization here might be to determine addresses
-+ * that are only kernel-mapped and only flush the kernel
-+ * ASID. But, userspace flushes are probably much more
-+ * important performance-wise.
-+ *
-+ * Make sure to do only a single invpcid when KAISER is
-+ * disabled and we have only a single ASID.
-+ */
-+ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
-+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
-+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
- }
-
- static inline void __flush_tlb_all(void)
-diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
-index 567de50..6768d13 100644
---- a/arch/x86/include/uapi/asm/processor-flags.h
-+++ b/arch/x86/include/uapi/asm/processor-flags.h
-@@ -77,7 +77,8 @@
- #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
- #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
- #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
--#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
-+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
-+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
-
- /*
- * Intel CPU features in CR4
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 3efde13..b4c0ae5 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -324,11 +324,45 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
- }
- }
-
-+/*
-+ * These can have bit 63 set, so we can not just use a plain "or"
-+ * instruction to get their value or'd into CR3. It would take
-+ * another register. So, we use a memory reference to these
-+ * instead.
-+ *
-+ * This is also handy because systems that do not support
-+ * PCIDs just end up or'ing a 0 into their CR3, which does
-+ * no harm.
-+ */
-+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
-+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
-+
- static void setup_pcid(struct cpuinfo_x86 *c)
- {
- if (cpu_has(c, X86_FEATURE_PCID)) {
- if (cpu_has(c, X86_FEATURE_PGE)) {
- cr4_set_bits(X86_CR4_PCIDE);
-+ /*
-+ * These variables are used by the entry/exit
-+ * code to change PCIDs.
-+ */
-+#ifdef CONFIG_KAISER
-+ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
-+ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
-+#endif
-+ /*
-+ * INVPCID has two "groups" of types:
-+ * 1/2: Invalidate an individual address
-+ * 3/4: Invalidate all contexts
-+ *
-+ * 1/2 take a PCID, but 3/4 do not. So, 3/4
-+ * ignore the PCID argument in the descriptor.
-+ * But, we have to be careful not to call 1/2
-+ * with an actual non-zero PCID in them before
-+ * we do the above cr4_set_bits().
-+ */
-+ if (cpu_has(c, X86_FEATURE_INVPCID))
-+ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
- } else {
- /*
- * flush_tlb_all(), as currently implemented, won't
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index e5bc139..51a700a 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
- return 1;
-
- /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
-- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
-+ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
-+ !is_long_mode(vcpu))
- return 1;
- }
-
-diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
-index bd22ef5..f5c75f7 100644
---- a/arch/x86/mm/kaiser.c
-+++ b/arch/x86/mm/kaiser.c
-@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(void)
- } while (0)
-
- extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
-+extern unsigned long X86_CR3_PCID_KERN_VAR;
-+extern unsigned long X86_CR3_PCID_USER_VAR;
- /*
- * If anything in here fails, we will likely die on one of the
- * first kernel->user transitions and init will die. But, we
-@@ -289,6 +291,11 @@ void __init kaiser_init(void)
- kaiser_add_user_map_early(&debug_idt_table,
- sizeof(gate_desc) * NR_VECTORS,
- __PAGE_KERNEL);
-+
-+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
-+ __PAGE_KERNEL);
-+ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
-+ __PAGE_KERNEL);
- }
-
- /* Add a mapping to the shadow mapping, and synchronize the mappings */
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index a7655f6..a376246 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -36,6 +36,46 @@ struct flush_tlb_info {
- unsigned long flush_end;
- };
-
-+static void load_new_mm_cr3(pgd_t *pgdir)
-+{
-+ unsigned long new_mm_cr3 = __pa(pgdir);
-+
-+ /*
-+ * KAISER, plus PCIDs needs some extra work here. But,
-+ * if either of features is not present, we need no
-+ * PCIDs here and just do a normal, full TLB flush with
-+ * the write_cr3()
-+ */
-+ if (!IS_ENABLED(CONFIG_KAISER) ||
-+ !cpu_feature_enabled(X86_FEATURE_PCID))
-+ goto out_set_cr3;
-+ /*
-+ * We reuse the same PCID for different tasks, so we must
-+ * flush all the entires for the PCID out when we change
-+ * tasks.
-+ */
-+ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
-+
-+ /*
-+ * The flush from load_cr3() may leave old TLB entries
-+ * for userspace in place. We must flush that context
-+ * separately. We can theoretically delay doing this
-+ * until we actually load up the userspace CR3, but
-+ * that's a bit tricky. We have to have the "need to
-+ * flush userspace PCID" bit per-cpu and check it in the
-+ * exit-to-userspace paths.
-+ */
-+ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
-+
-+out_set_cr3:
-+ /*
-+ * Caution: many callers of this function expect
-+ * that load_cr3() is serializing and orders TLB
-+ * fills with respect to the mm_cpumask writes.
-+ */
-+ write_cr3(new_mm_cr3);
-+}
-+
- /*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
-@@ -47,7 +87,7 @@ void leave_mm(int cpu)
- BUG();
- if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
- cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
-- load_cr3(swapper_pg_dir);
-+ load_new_mm_cr3(swapper_pg_dir);
- /*
- * This gets called in the idle path where RCU
- * functions differently. Tracing normally
-@@ -126,7 +166,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
- * ordering guarantee we need.
- *
- */
-- load_cr3(next->pgd);
-+ load_new_mm_cr3(next->pgd);
-
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-
-@@ -175,7 +215,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
- * As above, load_cr3() is serializing and orders TLB
- * fills with respect to the mm_cpumask write.
- */
-- load_cr3(next->pgd);
-+ load_new_mm_cr3(next->pgd);
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
- load_mm_cr4(next);
- load_mm_ldt(next);
---
-2.7.4
-