diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch | 403 |
1 files changed, 403 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch new file mode 100644 index 00000000..e949fb58 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch @@ -0,0 +1,403 @@ +From efc1ec625e63752ab337e0b151068400535aa861 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Thu, 17 Aug 2017 15:00:37 -0700 +Subject: [PATCH 021/102] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush + user + +We have many machines (Westmere, Sandybridge, Ivybridge) supporting +PCID but not INVPCID: on these load_new_mm_cr3() simply crashed. + +Flushing user context inside load_new_mm_cr3() without the use of +invpcid is difficult: momentarily switch from kernel to user context +and back to do so? I'm not sure whether that can be safely done at +all, and would risk polluting user context with kernel internals, +and kernel context with stale user externals. + +Instead, follow the hint in the comment that was there: change +X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3() +can leave a note in it, for SWITCH_USER_CR3 on return to userspace to +flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH. + +Which works well enough that there's no need to do it this way only +when invpcid is unsupported: it's a good alternative to invpcid here. +But there's a couple of inlines in asm/tlbflush.h that need to do the +same trick, so it's best to localize all this per-cpu business in +mm/kaiser.c: moving that part of the initialization from setup_pcid() +to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the +function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a +KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit. + +I did try to make the feature tests in asm/tlbflush.h more consistent +with each other: there seem to be far too many ways of performing such +tests, and I don't have a good grasp of their differences. At first +I converted them all to be static_cpu_has(): but that proved to be a +mistake, as the comment in __native_flush_tlb_single() hints; so then +I reversed and made them all this_cpu_has(). Probably all gratuitous +change, but that's the way it's working at present. + +I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR +gets re-initialized by each cpu (before and after these changes): +no problem when (as usual) all cpus on a machine have the same +features, but in principle incorrect. However, my experiment +to per-cpu-ify that one did not end well... + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/kaiser.h | 18 +++++++------ + arch/x86/include/asm/tlbflush.h | 56 ++++++++++++++++++++++++++++------------- + arch/x86/kernel/cpu/common.c | 22 +--------------- + arch/x86/mm/kaiser.c | 50 +++++++++++++++++++++++++++++++----- + arch/x86/mm/tlb.c | 46 +++++++++++++-------------------- + 5 files changed, 113 insertions(+), 79 deletions(-) + +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 360ff3b..009bca5 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -32,13 +32,12 @@ movq \reg, %cr3 + .macro _SWITCH_TO_USER_CR3 reg + movq %cr3, \reg + andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg +-/* +- * This can obviously be one instruction by putting the +- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR. +- * But, just leave it now for simplicity. +- */ +-orq X86_CR3_PCID_USER_VAR, \reg +-orq $(KAISER_SHADOW_PGD_OFFSET), \reg ++orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg ++js 9f ++// FLUSH this time, reset to NOFLUSH for next time ++// But if nopcid? Consider using 0x80 for user pcid? ++movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7) ++9: + movq \reg, %cr3 + .endm + +@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax + */ + DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + ++extern unsigned long X86_CR3_PCID_KERN_VAR; ++DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); ++ ++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; ++ + /** + * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping + * @addr: the start address of the range +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index 28b4182..4fff696 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -12,6 +12,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) + { + struct { u64 d[2]; } desc = { { pcid, addr } }; ++ + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global +@@ -132,27 +133,42 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) + cr4_set_bits(mask); + } + ++/* ++ * Declare a couple of kaiser interfaces here for convenience, ++ * to avoid the need for asm/kaiser.h in unexpected places. ++ */ ++#ifdef CONFIG_KAISER ++extern void kaiser_setup_pcid(void); ++extern void kaiser_flush_tlb_on_return_to_user(void); ++#else ++static inline void kaiser_setup_pcid(void) ++{ ++} ++static inline void kaiser_flush_tlb_on_return_to_user(void) ++{ ++} ++#endif ++ + static inline void __native_flush_tlb(void) + { +- if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) { ++ if (this_cpu_has(X86_FEATURE_INVPCID)) { + /* +- * If current->mm == NULL then we borrow a mm which may change during a +- * task switch and therefore we must not be preempted while we write CR3 +- * back: ++ * Note, this works with CR4.PCIDE=0 or 1. + */ +- preempt_disable(); +- native_write_cr3(native_read_cr3()); +- preempt_enable(); ++ invpcid_flush_all_nonglobals(); + return; + } ++ + /* +- * We are no longer using globals with KAISER, so a +- * "nonglobals" flush would work too. But, this is more +- * conservative. +- * +- * Note, this works with CR4.PCIDE=0 or 1. ++ * If current->mm == NULL then we borrow a mm which may change during a ++ * task switch and therefore we must not be preempted while we write CR3 ++ * back: + */ +- invpcid_flush_all(); ++ preempt_disable(); ++ if (this_cpu_has(X86_FEATURE_PCID)) ++ kaiser_flush_tlb_on_return_to_user(); ++ native_write_cr3(native_read_cr3()); ++ preempt_enable(); + } + + static inline void __native_flush_tlb_global_irq_disabled(void) +@@ -168,9 +184,13 @@ static inline void __native_flush_tlb_global_irq_disabled(void) + + static inline void __native_flush_tlb_global(void) + { ++#ifdef CONFIG_KAISER ++ /* Globals are not used at all */ ++ __native_flush_tlb(); ++#else + unsigned long flags; + +- if (static_cpu_has(X86_FEATURE_INVPCID)) { ++ if (this_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. +@@ -187,10 +207,9 @@ static inline void __native_flush_tlb_global(void) + * be called from deep inside debugging code.) + */ + raw_local_irq_save(flags); +- + __native_flush_tlb_global_irq_disabled(); +- + raw_local_irq_restore(flags); ++#endif + } + + static inline void __native_flush_tlb_single(unsigned long addr) +@@ -201,9 +220,12 @@ static inline void __native_flush_tlb_single(unsigned long addr) + * + * The ASIDs used below are hard-coded. But, we must not + * call invpcid(type=1/2) before CR4.PCIDE=1. Just call +- * invpcid in the case we are called early. ++ * invlpg in the case we are called early. + */ ++ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { ++ if (this_cpu_has(X86_FEATURE_PCID)) ++ kaiser_flush_tlb_on_return_to_user(); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + return; + } +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index b4c0ae5..e6be5f3 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -324,33 +324,12 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) + } + } + +-/* +- * These can have bit 63 set, so we can not just use a plain "or" +- * instruction to get their value or'd into CR3. It would take +- * another register. So, we use a memory reference to these +- * instead. +- * +- * This is also handy because systems that do not support +- * PCIDs just end up or'ing a 0 into their CR3, which does +- * no harm. +- */ +-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0; +-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0; +- + static void setup_pcid(struct cpuinfo_x86 *c) + { + if (cpu_has(c, X86_FEATURE_PCID)) { + if (cpu_has(c, X86_FEATURE_PGE)) { + cr4_set_bits(X86_CR4_PCIDE); + /* +- * These variables are used by the entry/exit +- * code to change PCIDs. +- */ +-#ifdef CONFIG_KAISER +- X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH; +- X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH; +-#endif +- /* + * INVPCID has two "groups" of types: + * 1/2: Invalidate an individual address + * 3/4: Invalidate all contexts +@@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x86 *c) + clear_cpu_cap(c, X86_FEATURE_PCID); + } + } ++ kaiser_setup_pcid(); + } + + /* +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index f5c75f7..7056840 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -11,12 +11,26 @@ + #include <linux/uaccess.h> + + #include <asm/kaiser.h> ++#include <asm/tlbflush.h> /* to verify its kaiser declarations */ + #include <asm/pgtable.h> + #include <asm/pgalloc.h> + #include <asm/desc.h> ++ + #ifdef CONFIG_KAISER ++__visible ++DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); ++ ++/* ++ * These can have bit 63 set, so we can not just use a plain "or" ++ * instruction to get their value or'd into CR3. It would take ++ * another register. So, we use a memory reference to these instead. ++ * ++ * This is also handy because systems that do not support PCIDs ++ * just end up or'ing a 0 into their CR3, which does no harm. ++ */ ++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR; ++DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR); + +-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); + /* + * At runtime, the only things we map are some things for CPU + * hotplug, and stacks for new processes. No two CPUs will ever +@@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(void) + WARN_ON(__ret); \ + } while (0) + +-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +-extern unsigned long X86_CR3_PCID_KERN_VAR; +-extern unsigned long X86_CR3_PCID_USER_VAR; + /* + * If anything in here fails, we will likely die on one of the + * first kernel->user transitions and init will die. But, we +@@ -294,8 +305,6 @@ void __init kaiser_init(void) + + kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, + __PAGE_KERNEL); +- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, +- __PAGE_KERNEL); + } + + /* Add a mapping to the shadow mapping, and synchronize the mappings */ +@@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) + } + return pgd; + } ++ ++void kaiser_setup_pcid(void) ++{ ++ unsigned long kern_cr3 = 0; ++ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET; ++ ++ if (this_cpu_has(X86_FEATURE_PCID)) { ++ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH; ++ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH; ++ } ++ /* ++ * These variables are used by the entry/exit ++ * code to change PCID and pgd and TLB flushing. ++ */ ++ X86_CR3_PCID_KERN_VAR = kern_cr3; ++ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3); ++} ++ ++/* ++ * Make a note that this cpu will need to flush USER tlb on return to user. ++ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling: ++ * if cpu does not, then the NOFLUSH bit will never have been set. ++ */ ++void kaiser_flush_tlb_on_return_to_user(void) ++{ ++ this_cpu_write(X86_CR3_PCID_USER_VAR, ++ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); ++} ++EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); + #endif /* CONFIG_KAISER */ +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index a376246..a2532d4 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -6,13 +6,14 @@ + #include <linux/interrupt.h> + #include <linux/export.h> + #include <linux/cpu.h> ++#include <linux/debugfs.h> + + #include <asm/tlbflush.h> + #include <asm/mmu_context.h> + #include <asm/cache.h> + #include <asm/apic.h> + #include <asm/uv/uv.h> +-#include <linux/debugfs.h> ++#include <asm/kaiser.h> + + /* + * Smarter SMP flushing macros. +@@ -40,34 +41,23 @@ static void load_new_mm_cr3(pgd_t *pgdir) + { + unsigned long new_mm_cr3 = __pa(pgdir); + +- /* +- * KAISER, plus PCIDs needs some extra work here. But, +- * if either of features is not present, we need no +- * PCIDs here and just do a normal, full TLB flush with +- * the write_cr3() +- */ +- if (!IS_ENABLED(CONFIG_KAISER) || +- !cpu_feature_enabled(X86_FEATURE_PCID)) +- goto out_set_cr3; +- /* +- * We reuse the same PCID for different tasks, so we must +- * flush all the entires for the PCID out when we change +- * tasks. +- */ +- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); +- +- /* +- * The flush from load_cr3() may leave old TLB entries +- * for userspace in place. We must flush that context +- * separately. We can theoretically delay doing this +- * until we actually load up the userspace CR3, but +- * that's a bit tricky. We have to have the "need to +- * flush userspace PCID" bit per-cpu and check it in the +- * exit-to-userspace paths. +- */ +- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); ++#ifdef CONFIG_KAISER ++ if (this_cpu_has(X86_FEATURE_PCID)) { ++ /* ++ * We reuse the same PCID for different tasks, so we must ++ * flush all the entries for the PCID out when we change tasks. ++ * Flush KERN below, flush USER when returning to userspace in ++ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro. ++ * ++ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could ++ * do it here, but can only be used if X86_FEATURE_INVPCID is ++ * available - and many machines support pcid without invpcid. ++ */ ++ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; ++ kaiser_flush_tlb_on_return_to_user(); ++ } ++#endif /* CONFIG_KAISER */ + +-out_set_cr3: + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB +-- +2.7.4 + |