aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch686
1 files changed, 0 insertions, 686 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch
deleted file mode 100644
index 64e5f55e..00000000
--- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0027-kaiser-add-nokaiser-boot-option-using-ALTERNATIVE.patch
+++ /dev/null
@@ -1,686 +0,0 @@
-From 6ceca45ce264990a8831d3e5f7ff6e8c0d10df3a Mon Sep 17 00:00:00 2001
-From: Hugh Dickins <hughd@google.com>
-Date: Sun, 24 Sep 2017 16:59:49 -0700
-Subject: [PATCH 027/103] kaiser: add "nokaiser" boot option, using ALTERNATIVE
-
-Added "nokaiser" boot option: an early param like "noinvpcid".
-Most places now check int kaiser_enabled (#defined 0 when not
-CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S
-and entry_64_compat.S are using the ALTERNATIVE technique, which
-patches in the preferred instructions at runtime. That technique
-is tied to x86 cpu features, so X86_FEATURE_KAISER is fabricated.
-
-Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that,
-but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when
-nokaiser like when !CONFIG_KAISER, but not setting either when kaiser -
-neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL
-won't get set in some obscure corner, or something add PGE into CR4.
-By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled,
-all page table setup which uses pte_pfn() masks it out of the ptes.
-
-It's slightly shameful that the same declaration versus definition of
-kaiser_enabled appears in not one, not two, but in three header files
-(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way,
-than with #including any of those in any of the others; and did not
-feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes
-them all, so we shall hear about it if they get out of synch.
-
-Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER
-from kaiser.c; removed the unused native_get_normal_pgd(); removed
-the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some
-comments. But more interestingly, set CR4.PSE in secondary_startup_64:
-the manual is clear that it does not matter whether it's 0 or 1 when
-4-level-pts are enabled, but I was distracted to find cr4 different on
-BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask().
-
-Signed-off-by: Hugh Dickins <hughd@google.com>
-Acked-by: Jiri Kosina <jkosina@suse.cz>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- Documentation/kernel-parameters.txt | 2 ++
- arch/x86/entry/entry_64.S | 15 ++++++------
- arch/x86/include/asm/cpufeatures.h | 3 +++
- arch/x86/include/asm/kaiser.h | 27 ++++++++++++++++------
- arch/x86/include/asm/pgtable.h | 20 +++++++++++-----
- arch/x86/include/asm/pgtable_64.h | 13 ++++-------
- arch/x86/include/asm/pgtable_types.h | 4 ----
- arch/x86/include/asm/tlbflush.h | 39 ++++++++++++++++++++------------
- arch/x86/kernel/cpu/common.c | 28 ++++++++++++++++++++++-
- arch/x86/kernel/espfix_64.c | 3 ++-
- arch/x86/kernel/head_64.S | 4 ++--
- arch/x86/mm/init.c | 2 +-
- arch/x86/mm/init_64.c | 10 ++++++++
- arch/x86/mm/kaiser.c | 26 +++++++++++++++++----
- arch/x86/mm/pgtable.c | 8 ++-----
- arch/x86/mm/tlb.c | 4 +---
- tools/arch/x86/include/asm/cpufeatures.h | 3 +++
- 17 files changed, 146 insertions(+), 65 deletions(-)
-
-diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
-index a303387..e2642ec 100644
---- a/Documentation/kernel-parameters.txt
-+++ b/Documentation/kernel-parameters.txt
-@@ -2753,6 +2753,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
-
- nojitter [IA-64] Disables jitter checking for ITC timers.
-
-+ nokaiser [X86-64] Disable KAISER isolation of kernel from user.
-+
- no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
-
- no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 41bf650..bbb38ac 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -1079,7 +1079,7 @@ ENTRY(paranoid_entry)
- * unconditionally, but we need to find out whether the reverse
- * should be done on return (conveyed to paranoid_exit in %ebx).
- */
-- movq %cr3, %rax
-+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
- testl $KAISER_SHADOW_PGD_OFFSET, %eax
- jz 2f
- orl $2, %ebx
-@@ -1111,6 +1111,7 @@ ENTRY(paranoid_exit)
- TRACE_IRQS_OFF_DEBUG
- TRACE_IRQS_IRETQ_DEBUG
- #ifdef CONFIG_KAISER
-+ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
- testl $2, %ebx /* SWITCH_USER_CR3 needed? */
- jz paranoid_exit_no_switch
- SWITCH_USER_CR3
-@@ -1339,13 +1340,14 @@ ENTRY(nmi)
- #ifdef CONFIG_KAISER
- /* Unconditionally use kernel CR3 for do_nmi() */
- /* %rax is saved above, so OK to clobber here */
-- movq %cr3, %rax
-+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
- /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
- orq x86_cr3_pcid_noflush, %rax
- pushq %rax
- /* mask off "user" bit of pgd address and 12 PCID bits: */
- andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
- movq %rax, %cr3
-+2:
- #endif
- call do_nmi
-
-@@ -1355,8 +1357,7 @@ ENTRY(nmi)
- * kernel code that needs user CR3, but do we ever return
- * to "user mode" where we need the kernel CR3?
- */
-- popq %rax
-- mov %rax, %cr3
-+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
- #endif
-
- /*
-@@ -1583,13 +1584,14 @@ end_repeat_nmi:
- #ifdef CONFIG_KAISER
- /* Unconditionally use kernel CR3 for do_nmi() */
- /* %rax is saved above, so OK to clobber here */
-- movq %cr3, %rax
-+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
- /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
- orq x86_cr3_pcid_noflush, %rax
- pushq %rax
- /* mask off "user" bit of pgd address and 12 PCID bits: */
- andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
- movq %rax, %cr3
-+2:
- #endif
-
- /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
-@@ -1601,8 +1603,7 @@ end_repeat_nmi:
- * kernel code that needs user CR3, like just just before
- * a sysret.
- */
-- popq %rax
-- mov %rax, %cr3
-+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
- #endif
-
- testl %ebx, %ebx /* swapgs needed? */
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index dc50883..20271d6 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -198,6 +198,9 @@
- #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
- #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
-
-+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
-+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
-+
- /* Virtualization flags: Linux defined, word 8 */
- #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
- #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
-index 3dc5f4c..96643a9 100644
---- a/arch/x86/include/asm/kaiser.h
-+++ b/arch/x86/include/asm/kaiser.h
-@@ -46,28 +46,33 @@ movq \reg, %cr3
- .endm
-
- .macro SWITCH_KERNEL_CR3
--pushq %rax
-+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
- _SWITCH_TO_KERNEL_CR3 %rax
- popq %rax
-+8:
- .endm
-
- .macro SWITCH_USER_CR3
--pushq %rax
-+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
- _SWITCH_TO_USER_CR3 %rax %al
- popq %rax
-+8:
- .endm
-
- .macro SWITCH_KERNEL_CR3_NO_STACK
--movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
-+ALTERNATIVE "jmp 8f", \
-+ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
-+ X86_FEATURE_KAISER
- _SWITCH_TO_KERNEL_CR3 %rax
- movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
-+8:
- .endm
-
- #else /* CONFIG_KAISER */
-
--.macro SWITCH_KERNEL_CR3 reg
-+.macro SWITCH_KERNEL_CR3
- .endm
--.macro SWITCH_USER_CR3 reg regb
-+.macro SWITCH_USER_CR3
- .endm
- .macro SWITCH_KERNEL_CR3_NO_STACK
- .endm
-@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
-
- extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
-
-+extern int kaiser_enabled;
-+#else
-+#define kaiser_enabled 0
-+#endif /* CONFIG_KAISER */
-+
-+/*
-+ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
-+ * so as to build with tests on kaiser_enabled instead of #ifdefs.
-+ */
-+
- /**
- * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
- * @addr: the start address of the range
-@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
- */
- extern void kaiser_init(void);
-
--#endif /* CONFIG_KAISER */
--
- #endif /* __ASSEMBLY */
-
- #endif /* _ASM_X86_KAISER_H */
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 1cee98e..217e83a 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -18,6 +18,12 @@
- #ifndef __ASSEMBLY__
- #include <asm/x86_init.h>
-
-+#ifdef CONFIG_KAISER
-+extern int kaiser_enabled;
-+#else
-+#define kaiser_enabled 0
-+#endif
-+
- void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
- void ptdump_walk_pgd_level_checkwx(void);
-
-@@ -697,7 +703,7 @@ static inline int pgd_bad(pgd_t pgd)
- * page table by accident; it will fault on the first
- * instruction it tries to run. See native_set_pgd().
- */
-- if (IS_ENABLED(CONFIG_KAISER))
-+ if (kaiser_enabled)
- ignore_flags |= _PAGE_NX;
-
- return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
-@@ -913,12 +919,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
- */
- static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
- {
-- memcpy(dst, src, count * sizeof(pgd_t));
-+ memcpy(dst, src, count * sizeof(pgd_t));
- #ifdef CONFIG_KAISER
-- /* Clone the shadow pgd part as well */
-- memcpy(native_get_shadow_pgd(dst),
-- native_get_shadow_pgd(src),
-- count * sizeof(pgd_t));
-+ if (kaiser_enabled) {
-+ /* Clone the shadow pgd part as well */
-+ memcpy(native_get_shadow_pgd(dst),
-+ native_get_shadow_pgd(src),
-+ count * sizeof(pgd_t));
-+ }
- #endif
- }
-
-diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
-index 177caf3..cf68b5c 100644
---- a/arch/x86/include/asm/pgtable_64.h
-+++ b/arch/x86/include/asm/pgtable_64.h
-@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
-
- static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
- {
-+#ifdef CONFIG_DEBUG_VM
-+ /* linux/mmdebug.h may not have been included at this point */
-+ BUG_ON(!kaiser_enabled);
-+#endif
- return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
- }
--
--static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
--{
-- return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
--}
- #else
- static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
- {
-@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
- BUILD_BUG_ON(1);
- return NULL;
- }
--static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
--{
-- return pgdp;
--}
- #endif /* CONFIG_KAISER */
-
- static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
-diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
-index 7cf2883..f0d9a1a 100644
---- a/arch/x86/include/asm/pgtable_types.h
-+++ b/arch/x86/include/asm/pgtable_types.h
-@@ -45,11 +45,7 @@
- #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
- #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
- #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
--#ifdef CONFIG_KAISER
--#define _PAGE_GLOBAL (_AT(pteval_t, 0))
--#else
- #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
--#endif
- #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
- #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
- #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 4fff696..13a74f6 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -138,9 +138,11 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
- * to avoid the need for asm/kaiser.h in unexpected places.
- */
- #ifdef CONFIG_KAISER
-+extern int kaiser_enabled;
- extern void kaiser_setup_pcid(void);
- extern void kaiser_flush_tlb_on_return_to_user(void);
- #else
-+#define kaiser_enabled 0
- static inline void kaiser_setup_pcid(void)
- {
- }
-@@ -165,7 +167,7 @@ static inline void __native_flush_tlb(void)
- * back:
- */
- preempt_disable();
-- if (this_cpu_has(X86_FEATURE_PCID))
-+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
- kaiser_flush_tlb_on_return_to_user();
- native_write_cr3(native_read_cr3());
- preempt_enable();
-@@ -176,20 +178,30 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
- unsigned long cr4;
-
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
-- /* clear PGE */
-- native_write_cr4(cr4 & ~X86_CR4_PGE);
-- /* write old PGE again and flush TLBs */
-- native_write_cr4(cr4);
-+ if (cr4 & X86_CR4_PGE) {
-+ /* clear PGE and flush TLB of all entries */
-+ native_write_cr4(cr4 & ~X86_CR4_PGE);
-+ /* restore PGE as it was before */
-+ native_write_cr4(cr4);
-+ } else {
-+ /*
-+ * x86_64 microcode update comes this way when CR4.PGE is not
-+ * enabled, and it's safer for all callers to allow this case.
-+ */
-+ native_write_cr3(native_read_cr3());
-+ }
- }
-
- static inline void __native_flush_tlb_global(void)
- {
--#ifdef CONFIG_KAISER
-- /* Globals are not used at all */
-- __native_flush_tlb();
--#else
- unsigned long flags;
-
-+ if (kaiser_enabled) {
-+ /* Globals are not used at all */
-+ __native_flush_tlb();
-+ return;
-+ }
-+
- if (this_cpu_has(X86_FEATURE_INVPCID)) {
- /*
- * Using INVPCID is considerably faster than a pair of writes
-@@ -209,7 +221,6 @@ static inline void __native_flush_tlb_global(void)
- raw_local_irq_save(flags);
- __native_flush_tlb_global_irq_disabled();
- raw_local_irq_restore(flags);
--#endif
- }
-
- static inline void __native_flush_tlb_single(unsigned long addr)
-@@ -224,7 +235,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
- */
-
- if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
-- if (this_cpu_has(X86_FEATURE_PCID))
-+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
- kaiser_flush_tlb_on_return_to_user();
- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
- return;
-@@ -239,9 +250,9 @@ static inline void __native_flush_tlb_single(unsigned long addr)
- * Make sure to do only a single invpcid when KAISER is
- * disabled and we have only a single ASID.
- */
-- if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
-- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
-- invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
-+ if (kaiser_enabled)
-+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
-+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
- }
-
- static inline void __flush_tlb_all(void)
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index e6be5f3..8b03874 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s)
- return 1;
- }
- __setup("nopcid", x86_pcid_setup);
-+
-+static int __init x86_nokaiser_setup(char *s)
-+{
-+ /* nokaiser doesn't accept parameters */
-+ if (s)
-+ return -EINVAL;
-+#ifdef CONFIG_KAISER
-+ kaiser_enabled = 0;
-+ setup_clear_cpu_cap(X86_FEATURE_KAISER);
-+ pr_info("nokaiser: KAISER feature disabled\n");
-+#endif
-+ return 0;
-+}
-+early_param("nokaiser", x86_nokaiser_setup);
- #endif
-
- static int __init x86_noinvpcid_setup(char *s)
-@@ -327,7 +341,7 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
- static void setup_pcid(struct cpuinfo_x86 *c)
- {
- if (cpu_has(c, X86_FEATURE_PCID)) {
-- if (cpu_has(c, X86_FEATURE_PGE)) {
-+ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
- cr4_set_bits(X86_CR4_PCIDE);
- /*
- * INVPCID has two "groups" of types:
-@@ -799,6 +813,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
- c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
-
- init_scattered_cpuid_features(c);
-+#ifdef CONFIG_KAISER
-+ if (kaiser_enabled)
-+ set_cpu_cap(c, X86_FEATURE_KAISER);
-+#endif
- }
-
- static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
-@@ -1537,6 +1555,14 @@ void cpu_init(void)
- * try to read it.
- */
- cr4_init_shadow();
-+ if (!kaiser_enabled) {
-+ /*
-+ * secondary_startup_64() deferred setting PGE in cr4:
-+ * probe_page_size_mask() sets it on the boot cpu,
-+ * but it needs to be set on each secondary cpu.
-+ */
-+ cr4_set_bits(X86_CR4_PGE);
-+ }
-
- /*
- * Load microcode on this cpu if a valid microcode is available.
-diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
-index 560c2fd..e33b385 100644
---- a/arch/x86/kernel/espfix_64.c
-+++ b/arch/x86/kernel/espfix_64.c
-@@ -132,9 +132,10 @@ void __init init_espfix_bsp(void)
- * area to ensure it is mapped into the shadow user page
- * tables.
- */
-- if (IS_ENABLED(CONFIG_KAISER))
-+ if (kaiser_enabled) {
- set_pgd(native_get_shadow_pgd(pgd_p),
- __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
-+ }
-
- /* Randomize the locations */
- init_espfix_random();
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index 5775379..d04479b 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -190,8 +190,8 @@ ENTRY(secondary_startup_64)
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
- 1:
-
-- /* Enable PAE mode and PGE */
-- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
-+ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
-+ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx
- movq %rcx, %cr4
-
- /* Setup early boot stage 4 level pagetables. */
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index 22af912..05a9855 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -177,7 +177,7 @@ static void __init probe_page_size_mask(void)
- cr4_set_bits_and_update_boot(X86_CR4_PSE);
-
- /* Enable PGE if available */
-- if (boot_cpu_has(X86_FEATURE_PGE)) {
-+ if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) {
- cr4_set_bits_and_update_boot(X86_CR4_PGE);
- __supported_pte_mask |= _PAGE_GLOBAL;
- } else
-diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
-index 14b9dd7..a0e8df6 100644
---- a/arch/x86/mm/init_64.c
-+++ b/arch/x86/mm/init_64.c
-@@ -324,6 +324,16 @@ void __init cleanup_highmap(void)
- continue;
- if (vaddr < (unsigned long) _text || vaddr > end)
- set_pmd(pmd, __pmd(0));
-+ else if (kaiser_enabled) {
-+ /*
-+ * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
-+ * clear that now. This is not important, so long as
-+ * CR4.PGE remains clear, but it removes an anomaly.
-+ * Physical mapping setup below avoids _PAGE_GLOBAL
-+ * by use of massage_pgprot() inside pfn_pte() etc.
-+ */
-+ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
-+ }
- }
- }
-
-diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
-index cc0950f..11032dc 100644
---- a/arch/x86/mm/kaiser.c
-+++ b/arch/x86/mm/kaiser.c
-@@ -16,7 +16,9 @@
- #include <asm/pgalloc.h>
- #include <asm/desc.h>
-
--#ifdef CONFIG_KAISER
-+int kaiser_enabled __read_mostly = 1;
-+EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
-+
- __visible
- DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
-
-@@ -167,8 +169,8 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
- return pte_offset_kernel(pmd, address);
- }
-
--int kaiser_add_user_map(const void *__start_addr, unsigned long size,
-- unsigned long flags)
-+static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
-+ unsigned long flags)
- {
- int ret = 0;
- pte_t *pte;
-@@ -177,6 +179,15 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size,
- unsigned long end_addr = PAGE_ALIGN(start_addr + size);
- unsigned long target_address;
-
-+ /*
-+ * It is convenient for callers to pass in __PAGE_KERNEL etc,
-+ * and there is no actual harm from setting _PAGE_GLOBAL, so
-+ * long as CR4.PGE is not set. But it is nonetheless troubling
-+ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
-+ * requires that not to be #defined to 0): so mask it off here.
-+ */
-+ flags &= ~_PAGE_GLOBAL;
-+
- for (; address < end_addr; address += PAGE_SIZE) {
- target_address = get_pa_from_mapping(address);
- if (target_address == -1) {
-@@ -263,6 +274,8 @@ void __init kaiser_init(void)
- {
- int cpu;
-
-+ if (!kaiser_enabled)
-+ return;
- kaiser_init_all_pgds();
-
- for_each_possible_cpu(cpu) {
-@@ -311,6 +324,8 @@ void __init kaiser_init(void)
- /* Add a mapping to the shadow mapping, and synchronize the mappings */
- int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
- {
-+ if (!kaiser_enabled)
-+ return 0;
- return kaiser_add_user_map((const void *)addr, size, flags);
- }
-
-@@ -322,6 +337,8 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size)
- unsigned long addr, next;
- pgd_t *pgd;
-
-+ if (!kaiser_enabled)
-+ return;
- pgd = native_get_shadow_pgd(pgd_offset_k(start));
- for (addr = start; addr < end; pgd++, addr = next) {
- next = pgd_addr_end(addr, end);
-@@ -343,6 +360,8 @@ static inline bool is_userspace_pgd(pgd_t *pgdp)
-
- pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
- {
-+ if (!kaiser_enabled)
-+ return pgd;
- /*
- * Do we need to also populate the shadow pgd? Check _PAGE_USER to
- * skip cases like kexec and EFI which make temporary low mappings.
-@@ -399,4 +418,3 @@ void kaiser_flush_tlb_on_return_to_user(void)
- X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
- }
- EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
--#endif /* CONFIG_KAISER */
-diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index 352fd01..5aaec8e 100644
---- a/arch/x86/mm/pgtable.c
-+++ b/arch/x86/mm/pgtable.c
-@@ -345,16 +345,12 @@ static inline void _pgd_free(pgd_t *pgd)
- }
- #else
-
--#ifdef CONFIG_KAISER
- /*
-- * Instead of one pmd, we aquire two pmds. Being order-1, it is
-+ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
- * both 8k in size and 8k-aligned. That lets us just flip bit 12
- * in a pointer to swap between the two 4k halves.
- */
--#define PGD_ALLOCATION_ORDER 1
--#else
--#define PGD_ALLOCATION_ORDER 0
--#endif
-+#define PGD_ALLOCATION_ORDER kaiser_enabled
-
- static inline pgd_t *_pgd_alloc(void)
- {
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 852c665..fde44bb 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -41,8 +41,7 @@ static void load_new_mm_cr3(pgd_t *pgdir)
- {
- unsigned long new_mm_cr3 = __pa(pgdir);
-
--#ifdef CONFIG_KAISER
-- if (this_cpu_has(X86_FEATURE_PCID)) {
-+ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
- /*
- * We reuse the same PCID for different tasks, so we must
- * flush all the entries for the PCID out when we change tasks.
-@@ -59,7 +58,6 @@ static void load_new_mm_cr3(pgd_t *pgdir)
- new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
- kaiser_flush_tlb_on_return_to_user();
- }
--#endif /* CONFIG_KAISER */
-
- /*
- * Caution: many callers of this function expect
-diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
-index a396292..67c93d9 100644
---- a/tools/arch/x86/include/asm/cpufeatures.h
-+++ b/tools/arch/x86/include/asm/cpufeatures.h
-@@ -197,6 +197,9 @@
- #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
- #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
-
-+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
-+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
-+
- /* Virtualization flags: Linux defined, word 8 */
- #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
- #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
---
-2.7.4
-