diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch | 424 |
1 files changed, 424 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch new file mode 100644 index 00000000..85bdc307 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0020-kaiser-enhanced-by-kernel-and-user-PCIDs.patch @@ -0,0 +1,424 @@ +From 9bc1089baa5051f750a246af746e81bf1bb1fe09 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Wed, 30 Aug 2017 16:23:00 -0700 +Subject: [PATCH 020/102] kaiser: enhanced by kernel and user PCIDs + +Merged performance improvements to Kaiser, using distinct kernel +and user Process Context Identifiers to minimize the TLB flushing. + +[This work actually all from Dave Hansen 2017-08-30: +still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.] + +Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/entry/entry_64.S | 10 ++++-- + arch/x86/entry/entry_64_compat.S | 1 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/kaiser.h | 15 +++++++-- + arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++++ + arch/x86/include/asm/tlbflush.h | 52 ++++++++++++++++++++++++----- + arch/x86/include/uapi/asm/processor-flags.h | 3 +- + arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++++ + arch/x86/kvm/x86.c | 3 +- + arch/x86/mm/kaiser.c | 7 ++++ + arch/x86/mm/tlb.c | 46 +++++++++++++++++++++++-- + 11 files changed, 181 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index df33f10..4a0ebf4 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1315,7 +1315,10 @@ ENTRY(nmi) + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax ++ /* mask off "user" bit of pgd address and 12 PCID bits: */ ++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax ++ /* Add back kernel PCID and "no flush" bit */ ++ orq X86_CR3_PCID_KERN_VAR, %rax + movq %rax, %cr3 + #endif + call do_nmi +@@ -1556,7 +1559,10 @@ end_repeat_nmi: + /* %rax is saved above, so OK to clobber here */ + movq %cr3, %rax + pushq %rax +- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax ++ /* mask off "user" bit of pgd address and 12 PCID bits: */ ++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax ++ /* Add back kernel PCID and "no flush" bit */ ++ orq X86_CR3_PCID_KERN_VAR, %rax + movq %rax, %cr3 + #endif + +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index f0e384e..0eb5801 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -13,6 +13,7 @@ + #include <asm/irqflags.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/pgtable_types.h> + #include <asm/kaiser.h> + #include <linux/linkage.h> + #include <linux/err.h> +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index ed10b5b..dc50883 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -189,6 +189,7 @@ + + #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ + #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ ++#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */ + + #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index e0fc45e..360ff3b 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -1,5 +1,8 @@ + #ifndef _ASM_X86_KAISER_H + #define _ASM_X86_KAISER_H ++ ++#include <uapi/asm/processor-flags.h> /* For PCID constants */ ++ + /* + * This file includes the definitions for the KAISER feature. + * KAISER is a counter measure against x86_64 side channel attacks on +@@ -21,13 +24,21 @@ + + .macro _SWITCH_TO_KERNEL_CR3 reg + movq %cr3, \reg +-andq $(~KAISER_SHADOW_PGD_OFFSET), \reg ++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg ++orq X86_CR3_PCID_KERN_VAR, \reg + movq \reg, %cr3 + .endm + + .macro _SWITCH_TO_USER_CR3 reg + movq %cr3, \reg +-orq $(KAISER_SHADOW_PGD_OFFSET), \reg ++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg ++/* ++ * This can obviously be one instruction by putting the ++ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. ++ * But, just leave it now for simplicity. ++ */ ++orq X86_CR3_PCID_USER_VAR, \reg ++orq $(KAISER_SHADOW_PGD_OFFSET), \reg + movq \reg, %cr3 + .endm + +diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h +index 8bc8d02..ada77fd 100644 +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -141,6 +141,32 @@ + _PAGE_SOFT_DIRTY) + #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) + ++/* The ASID is the lower 12 bits of CR3 */ ++#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL)) ++ ++/* Mask for all the PCID-related bits in CR3: */ ++#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK) ++#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64) ++#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL)) ++#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL)) ++ ++#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN) ++#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER) ++#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN) ++#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER) ++#else ++#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL)) ++#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL)) ++/* ++ * PCIDs are unsupported on 32-bit and none of these bits can be ++ * set in CR3: ++ */ ++#define X86_CR3_PCID_KERN_FLUSH (0) ++#define X86_CR3_PCID_USER_FLUSH (0) ++#define X86_CR3_PCID_KERN_NOFLUSH (0) ++#define X86_CR3_PCID_USER_NOFLUSH (0) ++#endif ++ + /* + * The cache modes defined here are used to translate between pure SW usage + * and the HW defined cache mode bits and/or PAT entries. +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index c13041e..28b4182 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -12,7 +12,6 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) + { + struct { u64 d[2]; } desc = { { pcid, addr } }; +- + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global +@@ -135,14 +134,25 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) + + static inline void __native_flush_tlb(void) + { ++ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { ++ /* ++ * If current->mm == NULL then we borrow a mm which may change during a ++ * task switch and therefore we must not be preempted while we write CR3 ++ * back: ++ */ ++ preempt_disable(); ++ native_write_cr3(native_read_cr3()); ++ preempt_enable(); ++ return; ++ } + /* +- * If current->mm == NULL then we borrow a mm which may change during a +- * task switch and therefore we must not be preempted while we write CR3 +- * back: ++ * We are no longer using globals with KAISER, so a ++ * "nonglobals" flush would work too. But, this is more ++ * conservative. ++ * ++ * Note, this works with CR4.PCIDE=0 or 1. + */ +- preempt_disable(); +- native_write_cr3(native_read_cr3()); +- preempt_enable(); ++ invpcid_flush_all(); + } + + static inline void __native_flush_tlb_global_irq_disabled(void) +@@ -164,6 +174,8 @@ static inline void __native_flush_tlb_global(void) + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. ++ * ++ * Note, this works with CR4.PCIDE=0 or 1. + */ + invpcid_flush_all(); + return; +@@ -183,7 +195,31 @@ static inline void __native_flush_tlb_global(void) + + static inline void __native_flush_tlb_single(unsigned long addr) + { +- asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); ++ /* ++ * SIMICS #GP's if you run INVPCID with type 2/3 ++ * and X86_CR4_PCIDE clear. Shame! ++ * ++ * The ASIDs used below are hard-coded. But, we must not ++ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call ++ * invpcid in the case we are called early. ++ */ ++ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { ++ asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); ++ return; ++ } ++ /* Flush the address out of both PCIDs. */ ++ /* ++ * An optimization here might be to determine addresses ++ * that are only kernel-mapped and only flush the kernel ++ * ASID. But, userspace flushes are probably much more ++ * important performance-wise. ++ * ++ * Make sure to do only a single invpcid when KAISER is ++ * disabled and we have only a single ASID. ++ */ ++ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) ++ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); ++ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); + } + + static inline void __flush_tlb_all(void) +diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h +index 567de50..6768d13 100644 +--- a/arch/x86/include/uapi/asm/processor-flags.h ++++ b/arch/x86/include/uapi/asm/processor-flags.h +@@ -77,7 +77,8 @@ + #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) + #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ + #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) +-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ ++#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ ++#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) + + /* + * Intel CPU features in CR4 +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 3efde13..b4c0ae5 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -324,11 +324,45 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) + } + } + ++/* ++ * These can have bit 63 set, so we can not just use a plain "or" ++ * instruction to get their value or'd into CR3. It would take ++ * another register. So, we use a memory reference to these ++ * instead. ++ * ++ * This is also handy because systems that do not support ++ * PCIDs just end up or'ing a 0 into their CR3, which does ++ * no harm. ++ */ ++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; ++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; ++ + static void setup_pcid(struct cpuinfo_x86 *c) + { + if (cpu_has(c, X86_FEATURE_PCID)) { + if (cpu_has(c, X86_FEATURE_PGE)) { + cr4_set_bits(X86_CR4_PCIDE); ++ /* ++ * These variables are used by the entry/exit ++ * code to change PCIDs. ++ */ ++#ifdef CONFIG_KAISER ++ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; ++ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; ++#endif ++ /* ++ * INVPCID has two "groups" of types: ++ * 1/2: Invalidate an individual address ++ * 3/4: Invalidate all contexts ++ * ++ * 1/2 take a PCID, but 3/4 do not. So, 3/4 ++ * ignore the PCID argument in the descriptor. ++ * But, we have to be careful not to call 1/2 ++ * with an actual non-zero PCID in them before ++ * we do the above cr4_set_bits(). ++ */ ++ if (cpu_has(c, X86_FEATURE_INVPCID)) ++ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index e5bc139..51a700a 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) + return 1; + + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ +- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) ++ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) || ++ !is_long_mode(vcpu)) + return 1; + } + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index bd22ef5..f5c75f7 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(void) + } while (0) + + extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; ++extern unsigned long X86_CR3_PCID_KERN_VAR; ++extern unsigned long X86_CR3_PCID_USER_VAR; + /* + * If anything in here fails, we will likely die on one of the + * first kernel->user transitions and init will die. But, we +@@ -289,6 +291,11 @@ void __init kaiser_init(void) + kaiser_add_user_map_early(&debug_idt_table, + sizeof(gate_desc) * NR_VECTORS, + __PAGE_KERNEL); ++ ++ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, ++ __PAGE_KERNEL); ++ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, ++ __PAGE_KERNEL); + } + + /* Add a mapping to the shadow mapping, and synchronize the mappings */ +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index a7655f6..a376246 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -36,6 +36,46 @@ struct flush_tlb_info { + unsigned long flush_end; + }; + ++static void load_new_mm_cr3(pgd_t *pgdir) ++{ ++ unsigned long new_mm_cr3 = __pa(pgdir); ++ ++ /* ++ * KAISER, plus PCIDs needs some extra work here. But, ++ * if either of features is not present, we need no ++ * PCIDs here and just do a normal, full TLB flush with ++ * the write_cr3() ++ */ ++ if (!IS_ENABLED(CONFIG_KAISER) || ++ !cpu_feature_enabled(X86_FEATURE_PCID)) ++ goto out_set_cr3; ++ /* ++ * We reuse the same PCID for different tasks, so we must ++ * flush all the entires for the PCID out when we change ++ * tasks. ++ */ ++ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); ++ ++ /* ++ * The flush from load_cr3() may leave old TLB entries ++ * for userspace in place. We must flush that context ++ * separately. We can theoretically delay doing this ++ * until we actually load up the userspace CR3, but ++ * that's a bit tricky. We have to have the "need to ++ * flush userspace PCID" bit per-cpu and check it in the ++ * exit-to-userspace paths. ++ */ ++ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); ++ ++out_set_cr3: ++ /* ++ * Caution: many callers of this function expect ++ * that load_cr3() is serializing and orders TLB ++ * fills with respect to the mm_cpumask writes. ++ */ ++ write_cr3(new_mm_cr3); ++} ++ + /* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. +@@ -47,7 +87,7 @@ void leave_mm(int cpu) + BUG(); + if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { + cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); +- load_cr3(swapper_pg_dir); ++ load_new_mm_cr3(swapper_pg_dir); + /* + * This gets called in the idle path where RCU + * functions differently. Tracing normally +@@ -126,7 +166,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + * ordering guarantee we need. + * + */ +- load_cr3(next->pgd); ++ load_new_mm_cr3(next->pgd); + + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + +@@ -175,7 +215,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. + */ +- load_cr3(next->pgd); ++ load_new_mm_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + load_mm_cr4(next); + load_mm_ldt(next); +-- +2.7.4 + |