aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch403
1 files changed, 403 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch
new file mode 100644
index 00000000..e949fb58
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0021-kaiser-load_new_mm_cr3-let-SWITCH_USER_CR3-flush-use.patch
@@ -0,0 +1,403 @@
+From efc1ec625e63752ab337e0b151068400535aa861 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 17 Aug 2017 15:00:37 -0700
+Subject: [PATCH 021/102] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush
+ user
+
+We have many machines (Westmere, Sandybridge, Ivybridge) supporting
+PCID but not INVPCID: on these load_new_mm_cr3() simply crashed.
+
+Flushing user context inside load_new_mm_cr3() without the use of
+invpcid is difficult: momentarily switch from kernel to user context
+and back to do so? I'm not sure whether that can be safely done at
+all, and would risk polluting user context with kernel internals,
+and kernel context with stale user externals.
+
+Instead, follow the hint in the comment that was there: change
+X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3()
+can leave a note in it, for SWITCH_USER_CR3 on return to userspace to
+flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH.
+
+Which works well enough that there's no need to do it this way only
+when invpcid is unsupported: it's a good alternative to invpcid here.
+But there's a couple of inlines in asm/tlbflush.h that need to do the
+same trick, so it's best to localize all this per-cpu business in
+mm/kaiser.c: moving that part of the initialization from setup_pcid()
+to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the
+function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a
+KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit.
+
+I did try to make the feature tests in asm/tlbflush.h more consistent
+with each other: there seem to be far too many ways of performing such
+tests, and I don't have a good grasp of their differences. At first
+I converted them all to be static_cpu_has(): but that proved to be a
+mistake, as the comment in __native_flush_tlb_single() hints; so then
+I reversed and made them all this_cpu_has(). Probably all gratuitous
+change, but that's the way it's working at present.
+
+I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR
+gets re-initialized by each cpu (before and after these changes):
+no problem when (as usual) all cpus on a machine have the same
+features, but in principle incorrect. However, my experiment
+to per-cpu-ify that one did not end well...
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h | 18 +++++++------
+ arch/x86/include/asm/tlbflush.h | 56 ++++++++++++++++++++++++++++-------------
+ arch/x86/kernel/cpu/common.c | 22 +---------------
+ arch/x86/mm/kaiser.c | 50 +++++++++++++++++++++++++++++++-----
+ arch/x86/mm/tlb.c | 46 +++++++++++++--------------------
+ 5 files changed, 113 insertions(+), 79 deletions(-)
+
+diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
+index 360ff3b..009bca5 100644
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -32,13 +32,12 @@ movq \reg, %cr3
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-/*
+- * This can obviously be one instruction by putting the
+- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
+- * But, just leave it now for simplicity.
+- */
+-orq X86_CR3_PCID_USER_VAR, \reg
+-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
++orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
++js 9f
++// FLUSH this time, reset to NOFLUSH for next time
++// But if nopcid? Consider using 0x80 for user pcid?
++movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++9:
+ movq \reg, %cr3
+ .endm
+
+@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
++extern unsigned long X86_CR3_PCID_KERN_VAR;
++DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++
+ /**
+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 28b4182..4fff696 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -12,6 +12,7 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+ {
+ struct { u64 d[2]; } desc = { { pcid, addr } };
++
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+@@ -132,27 +133,42 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
+ cr4_set_bits(mask);
+ }
+
++/*
++ * Declare a couple of kaiser interfaces here for convenience,
++ * to avoid the need for asm/kaiser.h in unexpected places.
++ */
++#ifdef CONFIG_KAISER
++extern void kaiser_setup_pcid(void);
++extern void kaiser_flush_tlb_on_return_to_user(void);
++#else
++static inline void kaiser_setup_pcid(void)
++{
++}
++static inline void kaiser_flush_tlb_on_return_to_user(void)
++{
++}
++#endif
++
+ static inline void __native_flush_tlb(void)
+ {
+- if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
++ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+- * If current->mm == NULL then we borrow a mm which may change during a
+- * task switch and therefore we must not be preempted while we write CR3
+- * back:
++ * Note, this works with CR4.PCIDE=0 or 1.
+ */
+- preempt_disable();
+- native_write_cr3(native_read_cr3());
+- preempt_enable();
++ invpcid_flush_all_nonglobals();
+ return;
+ }
++
+ /*
+- * We are no longer using globals with KAISER, so a
+- * "nonglobals" flush would work too. But, this is more
+- * conservative.
+- *
+- * Note, this works with CR4.PCIDE=0 or 1.
++ * If current->mm == NULL then we borrow a mm which may change during a
++ * task switch and therefore we must not be preempted while we write CR3
++ * back:
+ */
+- invpcid_flush_all();
++ preempt_disable();
++ if (this_cpu_has(X86_FEATURE_PCID))
++ kaiser_flush_tlb_on_return_to_user();
++ native_write_cr3(native_read_cr3());
++ preempt_enable();
+ }
+
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+@@ -168,9 +184,13 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
+
+ static inline void __native_flush_tlb_global(void)
+ {
++#ifdef CONFIG_KAISER
++ /* Globals are not used at all */
++ __native_flush_tlb();
++#else
+ unsigned long flags;
+
+- if (static_cpu_has(X86_FEATURE_INVPCID)) {
++ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
+@@ -187,10 +207,9 @@ static inline void __native_flush_tlb_global(void)
+ * be called from deep inside debugging code.)
+ */
+ raw_local_irq_save(flags);
+-
+ __native_flush_tlb_global_irq_disabled();
+-
+ raw_local_irq_restore(flags);
++#endif
+ }
+
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -201,9 +220,12 @@ static inline void __native_flush_tlb_single(unsigned long addr)
+ *
+ * The ASIDs used below are hard-coded. But, we must not
+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
+- * invpcid in the case we are called early.
++ * invlpg in the case we are called early.
+ */
++
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++ if (this_cpu_has(X86_FEATURE_PCID))
++ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+ }
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index b4c0ae5..e6be5f3 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -324,33 +324,12 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
+ }
+ }
+
+-/*
+- * These can have bit 63 set, so we can not just use a plain "or"
+- * instruction to get their value or'd into CR3. It would take
+- * another register. So, we use a memory reference to these
+- * instead.
+- *
+- * This is also handy because systems that do not support
+- * PCIDs just end up or'ing a 0 into their CR3, which does
+- * no harm.
+- */
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
+-
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ /*
+- * These variables are used by the entry/exit
+- * code to change PCIDs.
+- */
+-#ifdef CONFIG_KAISER
+- X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
+- X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
+-#endif
+- /*
+ * INVPCID has two "groups" of types:
+ * 1/2: Invalidate an individual address
+ * 3/4: Invalidate all contexts
+@@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x86 *c)
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
++ kaiser_setup_pcid();
+ }
+
+ /*
+diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
+index f5c75f7..7056840 100644
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -11,12 +11,26 @@
+ #include <linux/uaccess.h>
+
+ #include <asm/kaiser.h>
++#include <asm/tlbflush.h> /* to verify its kaiser declarations */
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++
+ #ifdef CONFIG_KAISER
++__visible
++DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3. It would take
++ * another register. So, we use a memory reference to these instead.
++ *
++ * This is also handy because systems that do not support PCIDs
++ * just end up or'ing a 0 into their CR3, which does no harm.
++ */
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
++DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
+
+-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ /*
+ * At runtime, the only things we map are some things for CPU
+ * hotplug, and stacks for new processes. No two CPUs will ever
+@@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(void)
+ WARN_ON(__ret); \
+ } while (0)
+
+-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+-extern unsigned long X86_CR3_PCID_KERN_VAR;
+-extern unsigned long X86_CR3_PCID_USER_VAR;
+ /*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+@@ -294,8 +305,6 @@ void __init kaiser_init(void)
+
+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
+ __PAGE_KERNEL);
+- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
+- __PAGE_KERNEL);
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+@@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ }
+ return pgd;
+ }
++
++void kaiser_setup_pcid(void)
++{
++ unsigned long kern_cr3 = 0;
++ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
++
++ if (this_cpu_has(X86_FEATURE_PCID)) {
++ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
++ }
++ /*
++ * These variables are used by the entry/exit
++ * code to change PCID and pgd and TLB flushing.
++ */
++ X86_CR3_PCID_KERN_VAR = kern_cr3;
++ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
++}
++
++/*
++ * Make a note that this cpu will need to flush USER tlb on return to user.
++ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
++ * if cpu does not, then the NOFLUSH bit will never have been set.
++ */
++void kaiser_flush_tlb_on_return_to_user(void)
++{
++ this_cpu_write(X86_CR3_PCID_USER_VAR,
++ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
++}
++EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+ #endif /* CONFIG_KAISER */
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index a376246..a2532d4 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,13 +6,14 @@
+ #include <linux/interrupt.h>
+ #include <linux/export.h>
+ #include <linux/cpu.h>
++#include <linux/debugfs.h>
+
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+-#include <linux/debugfs.h>
++#include <asm/kaiser.h>
+
+ /*
+ * Smarter SMP flushing macros.
+@@ -40,34 +41,23 @@ static void load_new_mm_cr3(pgd_t *pgdir)
+ {
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+- /*
+- * KAISER, plus PCIDs needs some extra work here. But,
+- * if either of features is not present, we need no
+- * PCIDs here and just do a normal, full TLB flush with
+- * the write_cr3()
+- */
+- if (!IS_ENABLED(CONFIG_KAISER) ||
+- !cpu_feature_enabled(X86_FEATURE_PCID))
+- goto out_set_cr3;
+- /*
+- * We reuse the same PCID for different tasks, so we must
+- * flush all the entires for the PCID out when we change
+- * tasks.
+- */
+- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
+-
+- /*
+- * The flush from load_cr3() may leave old TLB entries
+- * for userspace in place. We must flush that context
+- * separately. We can theoretically delay doing this
+- * until we actually load up the userspace CR3, but
+- * that's a bit tricky. We have to have the "need to
+- * flush userspace PCID" bit per-cpu and check it in the
+- * exit-to-userspace paths.
+- */
+- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
++#ifdef CONFIG_KAISER
++ if (this_cpu_has(X86_FEATURE_PCID)) {
++ /*
++ * We reuse the same PCID for different tasks, so we must
++ * flush all the entries for the PCID out when we change tasks.
++ * Flush KERN below, flush USER when returning to userspace in
++ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
++ *
++ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
++ * do it here, but can only be used if X86_FEATURE_INVPCID is
++ * available - and many machines support pcid without invpcid.
++ */
++ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++ kaiser_flush_tlb_on_return_to_user();
++ }
++#endif /* CONFIG_KAISER */
+
+-out_set_cr3:
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+--
+2.7.4
+