diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch | 686 |
1 files changed, 0 insertions, 686 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch deleted file mode 100644 index 64e5f55e..00000000 --- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch +++ /dev/null @@ -1,686 +0,0 @@ -From 6ceca45ce264990a8831d3e5f7ff6e8c0d10df3a Mon Sep 17 00:00:00 2001 -From: Hugh Dickins <hughd@google.com> -Date: Sun, 24 Sep 2017 16:59:49 -0700 -Subject: [PATCH 027/103] kaiser: add "nokaiser" boot option, using ALTERNATIVE - -Added "nokaiser" boot option: an early param like "noinvpcid". -Most places now check int kaiser_enabled (#defined 0 when not -CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S -and entry_64_compat.S are using the ALTERNATIVE technique, which -patches in the preferred instructions at runtime. That technique -is tied to x86 cpu features, so X86_FEATURE_KAISER is fabricated. - -Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that, -but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when -nokaiser like when !CONFIG_KAISER, but not setting either when kaiser - -neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL -won't get set in some obscure corner, or something add PGE into CR4. -By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled, -all page table setup which uses pte_pfn() masks it out of the ptes. - -It's slightly shameful that the same declaration versus definition of -kaiser_enabled appears in not one, not two, but in three header files -(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way, -than with #including any of those in any of the others; and did not -feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes -them all, so we shall hear about it if they get out of synch. - -Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER -from kaiser.c; removed the unused native_get_normal_pgd(); removed -the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some -comments. But more interestingly, set CR4.PSE in secondary_startup_64: -the manual is clear that it does not matter whether it's 0 or 1 when -4-level-pts are enabled, but I was distracted to find cr4 different on -BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask(). - -Signed-off-by: Hugh Dickins <hughd@google.com> -Acked-by: Jiri Kosina <jkosina@suse.cz> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - Documentation/kernel-parameters.txt | 2 ++ - arch/x86/entry/entry_64.S | 15 ++++++------ - arch/x86/include/asm/cpufeatures.h | 3 +++ - arch/x86/include/asm/kaiser.h | 27 ++++++++++++++++------ - arch/x86/include/asm/pgtable.h | 20 +++++++++++----- - arch/x86/include/asm/pgtable_64.h | 13 ++++------- - arch/x86/include/asm/pgtable_types.h | 4 ---- - arch/x86/include/asm/tlbflush.h | 39 ++++++++++++++++++++------------ - arch/x86/kernel/cpu/common.c | 28 ++++++++++++++++++++++- - arch/x86/kernel/espfix_64.c | 3 ++- - arch/x86/kernel/head_64.S | 4 ++-- - arch/x86/mm/init.c | 2 +- - arch/x86/mm/init_64.c | 10 ++++++++ - arch/x86/mm/kaiser.c | 26 +++++++++++++++++---- - arch/x86/mm/pgtable.c | 8 ++----- - arch/x86/mm/tlb.c | 4 +--- - tools/arch/x86/include/asm/cpufeatures.h | 3 +++ - 17 files changed, 146 insertions(+), 65 deletions(-) - -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index a303387..e2642ec 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -2753,6 +2753,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - - nojitter [IA-64] Disables jitter checking for ITC timers. - -+ nokaiser [X86-64] Disable KAISER isolation of kernel from user. -+ - no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver - - no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 41bf650..bbb38ac 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1079,7 +1079,7 @@ ENTRY(paranoid_entry) - * unconditionally, but we need to find out whether the reverse - * should be done on return (conveyed to paranoid_exit in %ebx). - */ -- movq %cr3, %rax -+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - testl $KAISER_SHADOW_PGD_OFFSET, %eax - jz 2f - orl $2, %ebx -@@ -1111,6 +1111,7 @@ ENTRY(paranoid_exit) - TRACE_IRQS_OFF_DEBUG - TRACE_IRQS_IRETQ_DEBUG - #ifdef CONFIG_KAISER -+ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */ - testl $2, %ebx /* SWITCH_USER_CR3 needed? */ - jz paranoid_exit_no_switch - SWITCH_USER_CR3 -@@ -1339,13 +1340,14 @@ ENTRY(nmi) - #ifdef CONFIG_KAISER - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ -- movq %cr3, %rax -+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ - orq x86_cr3_pcid_noflush, %rax - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax - movq %rax, %cr3 -+2: - #endif - call do_nmi - -@@ -1355,8 +1357,7 @@ ENTRY(nmi) - * kernel code that needs user CR3, but do we ever return - * to "user mode" where we need the kernel CR3? - */ -- popq %rax -- mov %rax, %cr3 -+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER - #endif - - /* -@@ -1583,13 +1584,14 @@ end_repeat_nmi: - #ifdef CONFIG_KAISER - /* Unconditionally use kernel CR3 for do_nmi() */ - /* %rax is saved above, so OK to clobber here */ -- movq %cr3, %rax -+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER - /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ - orq x86_cr3_pcid_noflush, %rax - pushq %rax - /* mask off "user" bit of pgd address and 12 PCID bits: */ - andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax - movq %rax, %cr3 -+2: - #endif - - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ -@@ -1601,8 +1603,7 @@ end_repeat_nmi: - * kernel code that needs user CR3, like just just before - * a sysret. - */ -- popq %rax -- mov %rax, %cr3 -+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER - #endif - - testl %ebx, %ebx /* swapgs needed? */ -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index dc50883..20271d6 100644 ---- a/arch/x86/include/asm/cpufeatures.h -+++ b/arch/x86/include/asm/cpufeatures.h -@@ -198,6 +198,9 @@ - #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ - -+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ -+ - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ - #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h -index 3dc5f4c..96643a9 100644 ---- a/arch/x86/include/asm/kaiser.h -+++ b/arch/x86/include/asm/kaiser.h -@@ -46,28 +46,33 @@ movq \reg, %cr3 - .endm - - .macro SWITCH_KERNEL_CR3 --pushq %rax -+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER - _SWITCH_TO_KERNEL_CR3 %rax - popq %rax -+8: - .endm - - .macro SWITCH_USER_CR3 --pushq %rax -+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER - _SWITCH_TO_USER_CR3 %rax %al - popq %rax -+8: - .endm - - .macro SWITCH_KERNEL_CR3_NO_STACK --movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) -+ALTERNATIVE "jmp 8f", \ -+ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \ -+ X86_FEATURE_KAISER - _SWITCH_TO_KERNEL_CR3 %rax - movq PER_CPU_VAR(unsafe_stack_register_backup), %rax -+8: - .endm - - #else /* CONFIG_KAISER */ - --.macro SWITCH_KERNEL_CR3 reg -+.macro SWITCH_KERNEL_CR3 - .endm --.macro SWITCH_USER_CR3 reg regb -+.macro SWITCH_USER_CR3 - .endm - .macro SWITCH_KERNEL_CR3_NO_STACK - .endm -@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); - - extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; - -+extern int kaiser_enabled; -+#else -+#define kaiser_enabled 0 -+#endif /* CONFIG_KAISER */ -+ -+/* -+ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, -+ * so as to build with tests on kaiser_enabled instead of #ifdefs. -+ */ -+ - /** - * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping - * @addr: the start address of the range -@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsigned long start, unsigned long size); - */ - extern void kaiser_init(void); - --#endif /* CONFIG_KAISER */ -- - #endif /* __ASSEMBLY */ - - #endif /* _ASM_X86_KAISER_H */ -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index 1cee98e..217e83a 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -18,6 +18,12 @@ - #ifndef __ASSEMBLY__ - #include <asm/x86_init.h> - -+#ifdef CONFIG_KAISER -+extern int kaiser_enabled; -+#else -+#define kaiser_enabled 0 -+#endif -+ - void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); - void ptdump_walk_pgd_level_checkwx(void); - -@@ -697,7 +703,7 @@ static inline int pgd_bad(pgd_t pgd) - * page table by accident; it will fault on the first - * instruction it tries to run. See native_set_pgd(). - */ -- if (IS_ENABLED(CONFIG_KAISER)) -+ if (kaiser_enabled) - ignore_flags |= _PAGE_NX; - - return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; -@@ -913,12 +919,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, - */ - static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) - { -- memcpy(dst, src, count * sizeof(pgd_t)); -+ memcpy(dst, src, count * sizeof(pgd_t)); - #ifdef CONFIG_KAISER -- /* Clone the shadow pgd part as well */ -- memcpy(native_get_shadow_pgd(dst), -- native_get_shadow_pgd(src), -- count * sizeof(pgd_t)); -+ if (kaiser_enabled) { -+ /* Clone the shadow pgd part as well */ -+ memcpy(native_get_shadow_pgd(dst), -+ native_get_shadow_pgd(src), -+ count * sizeof(pgd_t)); -+ } - #endif - } - -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index 177caf3..cf68b5c 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); - - static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) - { -+#ifdef CONFIG_DEBUG_VM -+ /* linux/mmdebug.h may not have been included at this point */ -+ BUG_ON(!kaiser_enabled); -+#endif - return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); - } -- --static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) --{ -- return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); --} - #else - static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) - { -@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) - BUILD_BUG_ON(1); - return NULL; - } --static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) --{ -- return pgdp; --} - #endif /* CONFIG_KAISER */ - - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index 7cf2883..f0d9a1a 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -45,11 +45,7 @@ - #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) - #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) - #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) --#ifdef CONFIG_KAISER --#define _PAGE_GLOBAL (_AT(pteval_t, 0)) --#else - #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) --#endif - #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) - #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) - #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 4fff696..13a74f6 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -138,9 +138,11 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) - * to avoid the need for asm/kaiser.h in unexpected places. - */ - #ifdef CONFIG_KAISER -+extern int kaiser_enabled; - extern void kaiser_setup_pcid(void); - extern void kaiser_flush_tlb_on_return_to_user(void); - #else -+#define kaiser_enabled 0 - static inline void kaiser_setup_pcid(void) - { - } -@@ -165,7 +167,7 @@ static inline void __native_flush_tlb(void) - * back: - */ - preempt_disable(); -- if (this_cpu_has(X86_FEATURE_PCID)) -+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) - kaiser_flush_tlb_on_return_to_user(); - native_write_cr3(native_read_cr3()); - preempt_enable(); -@@ -176,20 +178,30 @@ static inline void __native_flush_tlb_global_irq_disabled(void) - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); -- /* clear PGE */ -- native_write_cr4(cr4 & ~X86_CR4_PGE); -- /* write old PGE again and flush TLBs */ -- native_write_cr4(cr4); -+ if (cr4 & X86_CR4_PGE) { -+ /* clear PGE and flush TLB of all entries */ -+ native_write_cr4(cr4 & ~X86_CR4_PGE); -+ /* restore PGE as it was before */ -+ native_write_cr4(cr4); -+ } else { -+ /* -+ * x86_64 microcode update comes this way when CR4.PGE is not -+ * enabled, and it's safer for all callers to allow this case. -+ */ -+ native_write_cr3(native_read_cr3()); -+ } - } - - static inline void __native_flush_tlb_global(void) - { --#ifdef CONFIG_KAISER -- /* Globals are not used at all */ -- __native_flush_tlb(); --#else - unsigned long flags; - -+ if (kaiser_enabled) { -+ /* Globals are not used at all */ -+ __native_flush_tlb(); -+ return; -+ } -+ - if (this_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Using INVPCID is considerably faster than a pair of writes -@@ -209,7 +221,6 @@ static inline void __native_flush_tlb_global(void) - raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); - raw_local_irq_restore(flags); --#endif - } - - static inline void __native_flush_tlb_single(unsigned long addr) -@@ -224,7 +235,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) - */ - - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { -- if (this_cpu_has(X86_FEATURE_PCID)) -+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) - kaiser_flush_tlb_on_return_to_user(); - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - return; -@@ -239,9 +250,9 @@ static inline void __native_flush_tlb_single(unsigned long addr) - * Make sure to do only a single invpcid when KAISER is - * disabled and we have only a single ASID. - */ -- if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) -- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); -- invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); -+ if (kaiser_enabled) -+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); -+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); - } - - static inline void __flush_tlb_all(void) -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index e6be5f3..8b03874 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s) - return 1; - } - __setup("nopcid", x86_pcid_setup); -+ -+static int __init x86_nokaiser_setup(char *s) -+{ -+ /* nokaiser doesn't accept parameters */ -+ if (s) -+ return -EINVAL; -+#ifdef CONFIG_KAISER -+ kaiser_enabled = 0; -+ setup_clear_cpu_cap(X86_FEATURE_KAISER); -+ pr_info("nokaiser: KAISER feature disabled\n"); -+#endif -+ return 0; -+} -+early_param("nokaiser", x86_nokaiser_setup); - #endif - - static int __init x86_noinvpcid_setup(char *s) -@@ -327,7 +341,7 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) - static void setup_pcid(struct cpuinfo_x86 *c) - { - if (cpu_has(c, X86_FEATURE_PCID)) { -- if (cpu_has(c, X86_FEATURE_PGE)) { -+ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) { - cr4_set_bits(X86_CR4_PCIDE); - /* - * INVPCID has two "groups" of types: -@@ -799,6 +813,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c) - c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); - - init_scattered_cpuid_features(c); -+#ifdef CONFIG_KAISER -+ if (kaiser_enabled) -+ set_cpu_cap(c, X86_FEATURE_KAISER); -+#endif - } - - static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) -@@ -1537,6 +1555,14 @@ void cpu_init(void) - * try to read it. - */ - cr4_init_shadow(); -+ if (!kaiser_enabled) { -+ /* -+ * secondary_startup_64() deferred setting PGE in cr4: -+ * probe_page_size_mask() sets it on the boot cpu, -+ * but it needs to be set on each secondary cpu. -+ */ -+ cr4_set_bits(X86_CR4_PGE); -+ } - - /* - * Load microcode on this cpu if a valid microcode is available. -diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c -index 560c2fd..e33b385 100644 ---- a/arch/x86/kernel/espfix_64.c -+++ b/arch/x86/kernel/espfix_64.c -@@ -132,9 +132,10 @@ void __init init_espfix_bsp(void) - * area to ensure it is mapped into the shadow user page - * tables. - */ -- if (IS_ENABLED(CONFIG_KAISER)) -+ if (kaiser_enabled) { - set_pgd(native_get_shadow_pgd(pgd_p), - __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); -+ } - - /* Randomize the locations */ - init_espfix_random(); -diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S -index 5775379..d04479b 100644 ---- a/arch/x86/kernel/head_64.S -+++ b/arch/x86/kernel/head_64.S -@@ -190,8 +190,8 @@ ENTRY(secondary_startup_64) - movq $(init_level4_pgt - __START_KERNEL_map), %rax - 1: - -- /* Enable PAE mode and PGE */ -- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx -+ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */ -+ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx - movq %rcx, %cr4 - - /* Setup early boot stage 4 level pagetables. */ -diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c -index 22af912..05a9855 100644 ---- a/arch/x86/mm/init.c -+++ b/arch/x86/mm/init.c -@@ -177,7 +177,7 @@ static void __init probe_page_size_mask(void) - cr4_set_bits_and_update_boot(X86_CR4_PSE); - - /* Enable PGE if available */ -- if (boot_cpu_has(X86_FEATURE_PGE)) { -+ if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) { - cr4_set_bits_and_update_boot(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } else -diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c -index 14b9dd7..a0e8df6 100644 ---- a/arch/x86/mm/init_64.c -+++ b/arch/x86/mm/init_64.c -@@ -324,6 +324,16 @@ void __init cleanup_highmap(void) - continue; - if (vaddr < (unsigned long) _text || vaddr > end) - set_pmd(pmd, __pmd(0)); -+ else if (kaiser_enabled) { -+ /* -+ * level2_kernel_pgt is initialized with _PAGE_GLOBAL: -+ * clear that now. This is not important, so long as -+ * CR4.PGE remains clear, but it removes an anomaly. -+ * Physical mapping setup below avoids _PAGE_GLOBAL -+ * by use of massage_pgprot() inside pfn_pte() etc. -+ */ -+ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL)); -+ } - } - } - -diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c -index cc0950f..11032dc 100644 ---- a/arch/x86/mm/kaiser.c -+++ b/arch/x86/mm/kaiser.c -@@ -16,7 +16,9 @@ - #include <asm/pgalloc.h> - #include <asm/desc.h> - --#ifdef CONFIG_KAISER -+int kaiser_enabled __read_mostly = 1; -+EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ -+ - __visible - DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); - -@@ -167,8 +169,8 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) - return pte_offset_kernel(pmd, address); - } - --int kaiser_add_user_map(const void *__start_addr, unsigned long size, -- unsigned long flags) -+static int kaiser_add_user_map(const void *__start_addr, unsigned long size, -+ unsigned long flags) - { - int ret = 0; - pte_t *pte; -@@ -177,6 +179,15 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, - unsigned long end_addr = PAGE_ALIGN(start_addr + size); - unsigned long target_address; - -+ /* -+ * It is convenient for callers to pass in __PAGE_KERNEL etc, -+ * and there is no actual harm from setting _PAGE_GLOBAL, so -+ * long as CR4.PGE is not set. But it is nonetheless troubling -+ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser" -+ * requires that not to be #defined to 0): so mask it off here. -+ */ -+ flags &= ~_PAGE_GLOBAL; -+ - for (; address < end_addr; address += PAGE_SIZE) { - target_address = get_pa_from_mapping(address); - if (target_address == -1) { -@@ -263,6 +274,8 @@ void __init kaiser_init(void) - { - int cpu; - -+ if (!kaiser_enabled) -+ return; - kaiser_init_all_pgds(); - - for_each_possible_cpu(cpu) { -@@ -311,6 +324,8 @@ void __init kaiser_init(void) - /* Add a mapping to the shadow mapping, and synchronize the mappings */ - int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) - { -+ if (!kaiser_enabled) -+ return 0; - return kaiser_add_user_map((const void *)addr, size, flags); - } - -@@ -322,6 +337,8 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) - unsigned long addr, next; - pgd_t *pgd; - -+ if (!kaiser_enabled) -+ return; - pgd = native_get_shadow_pgd(pgd_offset_k(start)); - for (addr = start; addr < end; pgd++, addr = next) { - next = pgd_addr_end(addr, end); -@@ -343,6 +360,8 @@ static inline bool is_userspace_pgd(pgd_t *pgdp) - - pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) - { -+ if (!kaiser_enabled) -+ return pgd; - /* - * Do we need to also populate the shadow pgd? Check _PAGE_USER to - * skip cases like kexec and EFI which make temporary low mappings. -@@ -399,4 +418,3 @@ void kaiser_flush_tlb_on_return_to_user(void) - X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); - } - EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); --#endif /* CONFIG_KAISER */ -diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index 352fd01..5aaec8e 100644 ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -345,16 +345,12 @@ static inline void _pgd_free(pgd_t *pgd) - } - #else - --#ifdef CONFIG_KAISER - /* -- * Instead of one pmd, we aquire two pmds. Being order-1, it is -+ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. - */ --#define PGD_ALLOCATION_ORDER 1 --#else --#define PGD_ALLOCATION_ORDER 0 --#endif -+#define PGD_ALLOCATION_ORDER kaiser_enabled - - static inline pgd_t *_pgd_alloc(void) - { -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 852c665..fde44bb 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -41,8 +41,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) - { - unsigned long new_mm_cr3 = __pa(pgdir); - --#ifdef CONFIG_KAISER -- if (this_cpu_has(X86_FEATURE_PCID)) { -+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { - /* - * We reuse the same PCID for different tasks, so we must - * flush all the entries for the PCID out when we change tasks. -@@ -59,7 +58,6 @@ static void load_new_mm_cr3(pgd_t *pgdir) - new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; - kaiser_flush_tlb_on_return_to_user(); - } --#endif /* CONFIG_KAISER */ - - /* - * Caution: many callers of this function expect -diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h -index a396292..67c93d9 100644 ---- a/tools/arch/x86/include/asm/cpufeatures.h -+++ b/tools/arch/x86/include/asm/cpufeatures.h -@@ -197,6 +197,9 @@ - #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ - #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ - -+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */ -+ - /* Virtualization flags: Linux defined, word 8 */ - #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ - #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ --- -2.7.4 - |