diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch | 424 |
1 files changed, 0 insertions, 424 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch deleted file mode 100644 index 1cff10af..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch +++ /dev/null @@ -1,424 +0,0 @@ -From d26480ad859d58897cd409ed66ff4bc5e3ba079d Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Wed, 30 Aug 2017 16:23:00 -0700 -Subject: [PATCH 020/103] kaiser: enhanced by kernel and user PCIDs - -Merged performance improvements to Kaiser, using distinct kernel -and user Process Context Identifiers to minimize the TLB flushing. - -[This work actually all from Dave Hansen 2017-08-30: -still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.] - -Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> -Signed-off-by: Hugh Dickins <hughd@google.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - arch/x86/entry/entry_64.S | 10 ++++-- - arch/x86/entry/entry_64_compat.S | 1 + - arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/kaiser.h | 15 +++++++-- - arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++++ - arch/x86/include/asm/tlbflush.h | 52 ++++++++++++++++++++++++----- - arch/x86/include/uapi/asm/processor-flags.h | 3 +- - arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++++ - arch/x86/kvm/x86.c | 3 +- - arch/x86/mm/kaiser.c | 7 ++++ - arch/x86/mm/tlb.c | 46 +++++++++++++++++++++++-- - 11 files changed, 181 insertions(+), 17 deletions(-) - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index df33f10..4a0ebf4 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1315,7 +1315,10 @@ ENTRY(nmi) - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax - pushq %rax -- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax -+ /* mask off "user" bit of pgd address and 12 PCID bits: */ -+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -+ /* Add back kernel PCID and "no flush" bit */ -+ orq X86_CR3_PCID_KERN_VAR, %rax - movq %rax, %cr3 - #endif - call do_nmi -@@ -1556,7 +1559,10 @@ end_repeat_nmi: - /* %rax is saved above, so OK to clobber here */ - movq %cr3, %rax - pushq %rax -- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax -+ /* mask off "user" bit of pgd address and 12 PCID bits: */ -+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax -+ /* Add back kernel PCID and "no flush" bit */ -+ orq X86_CR3_PCID_KERN_VAR, %rax - movq %rax, %cr3 - #endif - -diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S -index f0e384e..0eb5801 100644 ---- a/arch/x86/entry/entry_64_compat.S -+++ b/arch/x86/entry/entry_64_compat.S -@@ -13,6 +13,7 @@ - #include <asm/irqflags.h> - #include <asm/asm.h> - #include <asm/smap.h> -+#include <asm/pgtable_types.h> - #include <asm/kaiser.h> - #include <linux/linkage.h> - #include <linux/err.h> -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index ed10b5b..dc50883 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -189,6 +189,7 @@ - - #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ - #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */ - - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ - #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index e0fc45e..360ff3b 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -1,5 +1,8 @@ - #ifndef _ASM_X86_KAISER_H - #define _ASM_X86_KAISER_H -+ -+#include <uapi/asm/processor-flags.h> /* For PCID constants */ -+ - /* - * This file includes the definitions for the KAISER feature. - * KAISER is a counter measure against x86_64 side channel attacks on -@@ -21,13 +24,21 @@ - - .macro _SWITCH_TO_KERNEL_CR3 reg - movq %cr3, \reg --andq $(~KAISER_SHADOW_PGD_OFFSET), \reg -+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -+orq X86_CR3_PCID_KERN_VAR, \reg - movq \reg, %cr3 - .endm - - .macro _SWITCH_TO_USER_CR3 reg - movq %cr3, \reg --orq $(KAISER_SHADOW_PGD_OFFSET), \reg -+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg -+/* -+ * This can obviously be one instruction by putting the -+ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. -+ * But, just leave it now for simplicity. -+ */ -+orq X86_CR3_PCID_USER_VAR, \reg -+orq $(KAISER_SHADOW_PGD_OFFSET), \reg - movq \reg, %cr3 - .endm - -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index 8bc8d02..ada77fd 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -141,6 +141,32 @@ - _PAGE_SOFT_DIRTY) - #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) - -+/* The ASID is the lower 12 bits of CR3 */ -+#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL)) -+ -+/* Mask for all the PCID-related bits in CR3: */ -+#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) -+#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) -+#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) -+#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) -+ -+#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) -+#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) -+#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) -+#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) -+#else -+#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) -+#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) -+/* -+ * PCIDs are unsupported on 32-bit and none of these bits can be -+ * set in CR3: -+ */ -+#define X86_CR3_PCID_KERN_FLUSH (0) -+#define X86_CR3_PCID_USER_FLUSH (0) -+#define X86_CR3_PCID_KERN_NOFLUSH (0) -+#define X86_CR3_PCID_USER_NOFLUSH (0) -+#endif -+ - /* - * The cache modes defined here are used to translate between pure SW usage - * and the HW defined cache mode bits and/or PAT entries. -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index c13041e..28b4182 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -12,7 +12,6 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) - { - struct { u64 d[2]; } desc = { { pcid, addr } }; -- - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global -@@ -135,14 +134,25 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) - - static inline void __native_flush_tlb(void) - { -+ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { -+ /* -+ * If current->mm == NULL then we borrow a mm which may change during a -+ * task switch and therefore we must not be preempted while we write CR3 -+ * back: -+ */ -+ preempt_disable(); -+ native_write_cr3(native_read_cr3()); -+ preempt_enable(); -+ return; -+ } - /* -- * If current->mm == NULL then we borrow a mm which may change during a -- * task switch and therefore we must not be preempted while we write CR3 -- * back: -+ * We are no longer using globals with KAISER, so a -+ * "nonglobals" flush would work too. But, this is more -+ * conservative. -+ * -+ * Note, this works with CR4.PCIDE=0 or 1. - */ -- preempt_disable(); -- native_write_cr3(native_read_cr3()); -- preempt_enable(); -+ invpcid_flush_all(); - } - - static inline void __native_flush_tlb_global_irq_disabled(void) -@@ -164,6 +174,8 @@ static inline void __native_flush_tlb_global(void) - /* - * Using INVPCID is considerably faster than a pair of writes - * to CR4 sandwiched inside an IRQ flag save/restore. -+ * -+ * Note, this works with CR4.PCIDE=0 or 1. - */ - invpcid_flush_all(); - return; -@@ -183,7 +195,31 @@ static inline void __native_flush_tlb_global(void) - - static inline void __native_flush_tlb_single(unsigned long addr) - { -- asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); -+ /* -+ * SIMICS #GP's if you run INVPCID with type 2/3 -+ * and X86_CR4_PCIDE clear. Shame! -+ * -+ * The ASIDs used below are hard-coded. But, we must not -+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call -+ * invpcid in the case we are called early. -+ */ -+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { -+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); -+ return; -+ } -+ /* Flush the address out of both PCIDs. */ -+ /* -+ * An optimization here might be to determine addresses -+ * that are only kernel-mapped and only flush the kernel -+ * ASID. But, userspace flushes are probably much more -+ * important performance-wise. -+ * -+ * Make sure to do only a single invpcid when KAISER is -+ * disabled and we have only a single ASID. -+ */ -+ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) -+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); -+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); - } - - static inline void __flush_tlb_all(void) -diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h -index 567de50..6768d13 100644 ---- a/arch/x86/include/uapi/asm/processor-flags.h -+++ b/arch/x86/include/uapi/asm/processor-flags.h -@@ -77,7 +77,8 @@ - #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) - #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ - #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) --#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ -+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ -+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) - - /* - * Intel CPU features in CR4 -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 3efde13..b4c0ae5 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -324,11 +324,45 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) - } - } - -+/* -+ * These can have bit 63 set, so we can not just use a plain "or" -+ * instruction to get their value or'd into CR3. It would take -+ * another register. So, we use a memory reference to these -+ * instead. -+ * -+ * This is also handy because systems that do not support -+ * PCIDs just end up or'ing a 0 into their CR3, which does -+ * no harm. -+ */ -+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; -+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; -+ - static void setup_pcid(struct cpuinfo_x86 *c) - { - if (cpu_has(c, X86_FEATURE_PCID)) { - if (cpu_has(c, X86_FEATURE_PGE)) { - cr4_set_bits(X86_CR4_PCIDE); -+ /* -+ * These variables are used by the entry/exit -+ * code to change PCIDs. -+ */ -+#ifdef CONFIG_KAISER -+ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; -+ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; -+#endif -+ /* -+ * INVPCID has two "groups" of types: -+ * 1/2: Invalidate an individual address -+ * 3/4: Invalidate all contexts -+ * -+ * 1/2 take a PCID, but 3/4 do not. So, 3/4 -+ * ignore the PCID argument in the descriptor. -+ * But, we have to be careful not to call 1/2 -+ * with an actual non-zero PCID in them before -+ * we do the above cr4_set_bits(). -+ */ -+ if (cpu_has(c, X86_FEATURE_INVPCID)) -+ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE); - } else { - /* - * flush_tlb_all(), as currently implemented, won't -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index e5bc139..51a700a 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) - return 1; - - /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ -- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) -+ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) || -+ !is_long_mode(vcpu)) - return 1; - } - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index bd22ef5..f5c75f7 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(void) - } while (0) - - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; -+extern unsigned long X86_CR3_PCID_KERN_VAR; -+extern unsigned long X86_CR3_PCID_USER_VAR; - /* - * If anything in here fails, we will likely die on one of the - * first kernel->user transitions and init will die. But, we -@@ -289,6 +291,11 @@ void __init kaiser_init(void) - kaiser_add_user_map_early(&debug_idt_table, - sizeof(gate_desc) * NR_VECTORS, - __PAGE_KERNEL); -+ -+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, -+ __PAGE_KERNEL); -+ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, -+ __PAGE_KERNEL); - } - - /* Add a mapping to the shadow mapping, and synchronize the mappings */ -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index a7655f6..a376246 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -36,6 +36,46 @@ struct flush_tlb_info { - unsigned long flush_end; - }; - -+static void load_new_mm_cr3(pgd_t *pgdir) -+{ -+ unsigned long new_mm_cr3 = __pa(pgdir); -+ -+ /* -+ * KAISER, plus PCIDs needs some extra work here. But, -+ * if either of features is not present, we need no -+ * PCIDs here and just do a normal, full TLB flush with -+ * the write_cr3() -+ */ -+ if (!IS_ENABLED(CONFIG_KAISER) || -+ !cpu_feature_enabled(X86_FEATURE_PCID)) -+ goto out_set_cr3; -+ /* -+ * We reuse the same PCID for different tasks, so we must -+ * flush all the entires for the PCID out when we change -+ * tasks. -+ */ -+ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); -+ -+ /* -+ * The flush from load_cr3() may leave old TLB entries -+ * for userspace in place. We must flush that context -+ * separately. We can theoretically delay doing this -+ * until we actually load up the userspace CR3, but -+ * that's a bit tricky. We have to have the "need to -+ * flush userspace PCID" bit per-cpu and check it in the -+ * exit-to-userspace paths. -+ */ -+ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); -+ -+out_set_cr3: -+ /* -+ * Caution: many callers of this function expect -+ * that load_cr3() is serializing and orders TLB -+ * fills with respect to the mm_cpumask writes. -+ */ -+ write_cr3(new_mm_cr3); -+} -+ - /* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. -@@ -47,7 +87,7 @@ void leave_mm(int cpu) - BUG(); - if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { - cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); -- load_cr3(swapper_pg_dir); -+ load_new_mm_cr3(swapper_pg_dir); - /* - * This gets called in the idle path where RCU - * functions differently. Tracing normally -@@ -126,7 +166,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - * ordering guarantee we need. - * - */ -- load_cr3(next->pgd); -+ load_new_mm_cr3(next->pgd); - - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - -@@ -175,7 +215,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, - * As above, load_cr3() is serializing and orders TLB - * fills with respect to the mm_cpumask write. - */ -- load_cr3(next->pgd); -+ load_new_mm_cr3(next->pgd); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - load_mm_cr4(next); - load_mm_ldt(next); --- -2.7.4 - |