aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch1025
1 files changed, 0 insertions, 1025 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch
deleted file mode 100644
index d61b397e..00000000
--- a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch
+++ /dev/null
@@ -1,1025 +0,0 @@
-From ff1ce9f00432d65859fd923ce7eb86d605386f17 Mon Sep 17 00:00:00 2001
-From: Richard Fellner <richard.fellner@student.tugraz.at>
-Date: Thu, 4 May 2017 14:26:50 +0200
-Subject: [PATCH 004/103] KAISER: Kernel Address Isolation
-
-This patch introduces our implementation of KAISER (Kernel Address Isolation to
-have Side-channels Efficiently Removed), a kernel isolation technique to close
-hardware side channels on kernel address information.
-
-More information about the patch can be found on:
-
- https://github.com/IAIK/KAISER
-
-From: Richard Fellner <richard.fellner@student.tugraz.at>
-From: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
-Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode
-Date: Thu, 4 May 2017 14:26:50 +0200
-Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2
-Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5
-
-To: <linux-kernel@vger.kernel.org>
-To: <kernel-hardening@lists.openwall.com>
-Cc: <clementine.maurice@iaik.tugraz.at>
-Cc: <moritz.lipp@iaik.tugraz.at>
-Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
-Cc: Richard Fellner <richard.fellner@student.tugraz.at>
-Cc: Ingo Molnar <mingo@kernel.org>
-Cc: <kirill.shutemov@linux.intel.com>
-Cc: <anders.fogh@gdata-adan.de>
-
-After several recent works [1,2,3] KASLR on x86_64 was basically
-considered dead by many researchers. We have been working on an
-efficient but effective fix for this problem and found that not mapping
-the kernel space when running in user mode is the solution to this
-problem [4] (the corresponding paper [5] will be presented at ESSoS17).
-
-With this RFC patch we allow anybody to configure their kernel with the
-flag CONFIG_KAISER to add our defense mechanism.
-
-If there are any questions we would love to answer them.
-We also appreciate any comments!
-
-Cheers,
-Daniel (+ the KAISER team from Graz University of Technology)
-
-[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
-[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf
-[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf
-[4] https://github.com/IAIK/KAISER
-[5] https://gruss.cc/files/kaiser.pdf
-
-[patch based also on
-https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch]
-
-Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
-Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
-Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
-Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
-Acked-by: Jiri Kosina <jkosina@suse.cz>
-Signed-off-by: Hugh Dickins <hughd@google.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/entry/entry_64.S | 17 ++++
- arch/x86/entry/entry_64_compat.S | 7 +-
- arch/x86/include/asm/hw_irq.h | 2 +-
- arch/x86/include/asm/kaiser.h | 113 +++++++++++++++++++++++++
- arch/x86/include/asm/pgtable.h | 4 +
- arch/x86/include/asm/pgtable_64.h | 21 +++++
- arch/x86/include/asm/pgtable_types.h | 12 ++-
- arch/x86/include/asm/processor.h | 7 +-
- arch/x86/kernel/cpu/common.c | 4 +-
- arch/x86/kernel/espfix_64.c | 6 ++
- arch/x86/kernel/head_64.S | 16 +++-
- arch/x86/kernel/irqinit.c | 2 +-
- arch/x86/kernel/process.c | 2 +-
- arch/x86/mm/Makefile | 2 +-
- arch/x86/mm/kaiser.c | 160 +++++++++++++++++++++++++++++++++++
- arch/x86/mm/pageattr.c | 2 +-
- arch/x86/mm/pgtable.c | 26 ++++++
- include/asm-generic/vmlinux.lds.h | 11 ++-
- include/linux/percpu-defs.h | 30 +++++++
- init/main.c | 6 ++
- kernel/fork.c | 8 ++
- security/Kconfig | 7 ++
- 22 files changed, 449 insertions(+), 16 deletions(-)
- create mode 100644 arch/x86/include/asm/kaiser.h
- create mode 100644 arch/x86/mm/kaiser.c
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index ef766a3..6c880dc 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -36,6 +36,7 @@
- #include <asm/smap.h>
- #include <asm/pgtable_types.h>
- #include <asm/export.h>
-+#include <asm/kaiser.h>
- #include <linux/err.h>
-
- /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
-@@ -146,6 +147,7 @@ ENTRY(entry_SYSCALL_64)
- * it is too small to ever cause noticeable irq latency.
- */
- SWAPGS_UNSAFE_STACK
-+ SWITCH_KERNEL_CR3_NO_STACK
- /*
- * A hypervisor implementation might want to use a label
- * after the swapgs, so that it can do the swapgs
-@@ -228,6 +230,7 @@ entry_SYSCALL_64_fastpath:
- movq RIP(%rsp), %rcx
- movq EFLAGS(%rsp), %r11
- RESTORE_C_REGS_EXCEPT_RCX_R11
-+ SWITCH_USER_CR3
- movq RSP(%rsp), %rsp
- USERGS_SYSRET64
-
-@@ -323,10 +326,12 @@ return_from_SYSCALL_64:
- syscall_return_via_sysret:
- /* rcx and r11 are already restored (see code above) */
- RESTORE_C_REGS_EXCEPT_RCX_R11
-+ SWITCH_USER_CR3
- movq RSP(%rsp), %rsp
- USERGS_SYSRET64
-
- opportunistic_sysret_failed:
-+ SWITCH_USER_CR3
- SWAPGS
- jmp restore_c_regs_and_iret
- END(entry_SYSCALL_64)
-@@ -424,6 +429,7 @@ ENTRY(ret_from_fork)
- movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
- TRACE_IRQS_ON /* user mode is traced as IRQS on */
-+ SWITCH_USER_CR3
- SWAPGS
- jmp restore_regs_and_iret
-
-@@ -478,6 +484,7 @@ END(irq_entries_start)
- * tracking that we're in kernel mode.
- */
- SWAPGS
-+ SWITCH_KERNEL_CR3
-
- /*
- * We need to tell lockdep that IRQs are off. We can't do this until
-@@ -535,6 +542,7 @@ GLOBAL(retint_user)
- mov %rsp,%rdi
- call prepare_exit_to_usermode
- TRACE_IRQS_IRETQ
-+ SWITCH_USER_CR3
- SWAPGS
- jmp restore_regs_and_iret
-
-@@ -612,6 +620,7 @@ native_irq_return_ldt:
-
- pushq %rdi /* Stash user RDI */
- SWAPGS
-+ SWITCH_KERNEL_CR3
- movq PER_CPU_VAR(espfix_waddr), %rdi
- movq %rax, (0*8)(%rdi) /* user RAX */
- movq (1*8)(%rsp), %rax /* user RIP */
-@@ -638,6 +647,7 @@ native_irq_return_ldt:
- * still points to an RO alias of the ESPFIX stack.
- */
- orq PER_CPU_VAR(espfix_stack), %rax
-+ SWITCH_USER_CR3
- SWAPGS
- movq %rax, %rsp
-
-@@ -1034,6 +1044,7 @@ ENTRY(paranoid_entry)
- testl %edx, %edx
- js 1f /* negative -> in kernel */
- SWAPGS
-+ SWITCH_KERNEL_CR3
- xorl %ebx, %ebx
- 1: ret
- END(paranoid_entry)
-@@ -1056,6 +1067,7 @@ ENTRY(paranoid_exit)
- testl %ebx, %ebx /* swapgs needed? */
- jnz paranoid_exit_no_swapgs
- TRACE_IRQS_IRETQ
-+ SWITCH_USER_CR3_NO_STACK
- SWAPGS_UNSAFE_STACK
- jmp paranoid_exit_restore
- paranoid_exit_no_swapgs:
-@@ -1084,6 +1096,7 @@ ENTRY(error_entry)
- * from user mode due to an IRET fault.
- */
- SWAPGS
-+ SWITCH_KERNEL_CR3
-
- .Lerror_entry_from_usermode_after_swapgs:
- /*
-@@ -1135,6 +1148,7 @@ ENTRY(error_entry)
- * Switch to kernel gsbase:
- */
- SWAPGS
-+ SWITCH_KERNEL_CR3
-
- /*
- * Pretend that the exception came from user mode: set up pt_regs
-@@ -1233,6 +1247,7 @@ ENTRY(nmi)
- */
-
- SWAPGS_UNSAFE_STACK
-+ SWITCH_KERNEL_CR3_NO_STACK
- cld
- movq %rsp, %rdx
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-@@ -1273,6 +1288,7 @@ ENTRY(nmi)
- * work, because we don't want to enable interrupts. Fortunately,
- * do_nmi doesn't modify pt_regs.
- */
-+ SWITCH_USER_CR3
- SWAPGS
- jmp restore_c_regs_and_iret
-
-@@ -1484,6 +1500,7 @@ end_repeat_nmi:
- testl %ebx, %ebx /* swapgs needed? */
- jnz nmi_restore
- nmi_swapgs:
-+ SWITCH_USER_CR3_NO_STACK
- SWAPGS_UNSAFE_STACK
- nmi_restore:
- RESTORE_EXTRA_REGS
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index e1721da..f0e384e 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -13,6 +13,7 @@
- #include <asm/irqflags.h>
- #include <asm/asm.h>
- #include <asm/smap.h>
-+#include <asm/kaiser.h>
- #include <linux/linkage.h>
- #include <linux/err.h>
-
-@@ -48,6 +49,7 @@
- ENTRY(entry_SYSENTER_compat)
- /* Interrupts are off on entry. */
- SWAPGS_UNSAFE_STACK
-+ SWITCH_KERNEL_CR3_NO_STACK
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
- /*
-@@ -184,6 +186,7 @@ ENDPROC(entry_SYSENTER_compat)
- ENTRY(entry_SYSCALL_compat)
- /* Interrupts are off on entry. */
- SWAPGS_UNSAFE_STACK
-+ SWITCH_KERNEL_CR3_NO_STACK
-
- /* Stash user ESP and switch to the kernel stack. */
- movl %esp, %r8d
-@@ -259,6 +262,7 @@ sysret32_from_system_call:
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r10
-+ SWITCH_USER_CR3
- movq RSP-ORIG_RAX(%rsp), %rsp
- swapgs
- sysretl
-@@ -297,7 +301,7 @@ ENTRY(entry_INT80_compat)
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- ASM_CLAC /* Do this early to minimize exposure */
- SWAPGS
--
-+ SWITCH_KERNEL_CR3_NO_STACK
- /*
- * User tracing code (ptrace or signal handlers) might assume that
- * the saved RAX contains a 32-bit number when we're invoking a 32-bit
-@@ -338,6 +342,7 @@ ENTRY(entry_INT80_compat)
-
- /* Go back to user mode. */
- TRACE_IRQS_ON
-+ SWITCH_USER_CR3_NO_STACK
- SWAPGS
- jmp restore_regs_and_iret
- END(entry_INT80_compat)
-diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
-index b90e105..0817d63 100644
---- a/arch/x86/include/asm/hw_irq.h
-+++ b/arch/x86/include/asm/hw_irq.h
-@@ -178,7 +178,7 @@ extern char irq_entries_start[];
- #define VECTOR_RETRIGGERED ((void *)~0UL)
-
- typedef struct irq_desc* vector_irq_t[NR_VECTORS];
--DECLARE_PER_CPU(vector_irq_t, vector_irq);
-+DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
-
- #endif /* !ASSEMBLY_ */
-
-diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
-new file mode 100644
-index 0000000..63ee830
---- /dev/null
-+++ b/arch/x86/include/asm/kaiser.h
-@@ -0,0 +1,113 @@
-+#ifndef _ASM_X86_KAISER_H
-+#define _ASM_X86_KAISER_H
-+
-+/* This file includes the definitions for the KAISER feature.
-+ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
-+ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
-+ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
-+ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
-+ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
-+ *
-+ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
-+ * of the user space, or the stacks.
-+ */
-+#ifdef __ASSEMBLY__
-+#ifdef CONFIG_KAISER
-+
-+.macro _SWITCH_TO_KERNEL_CR3 reg
-+movq %cr3, \reg
-+andq $(~0x1000), \reg
-+movq \reg, %cr3
-+.endm
-+
-+.macro _SWITCH_TO_USER_CR3 reg
-+movq %cr3, \reg
-+orq $(0x1000), \reg
-+movq \reg, %cr3
-+.endm
-+
-+.macro SWITCH_KERNEL_CR3
-+pushq %rax
-+_SWITCH_TO_KERNEL_CR3 %rax
-+popq %rax
-+.endm
-+
-+.macro SWITCH_USER_CR3
-+pushq %rax
-+_SWITCH_TO_USER_CR3 %rax
-+popq %rax
-+.endm
-+
-+.macro SWITCH_KERNEL_CR3_NO_STACK
-+movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
-+_SWITCH_TO_KERNEL_CR3 %rax
-+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
-+.endm
-+
-+
-+.macro SWITCH_USER_CR3_NO_STACK
-+
-+movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
-+_SWITCH_TO_USER_CR3 %rax
-+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
-+
-+.endm
-+
-+#else /* CONFIG_KAISER */
-+
-+.macro SWITCH_KERNEL_CR3 reg
-+.endm
-+.macro SWITCH_USER_CR3 reg
-+.endm
-+.macro SWITCH_USER_CR3_NO_STACK
-+.endm
-+.macro SWITCH_KERNEL_CR3_NO_STACK
-+.endm
-+
-+#endif /* CONFIG_KAISER */
-+#else /* __ASSEMBLY__ */
-+
-+
-+#ifdef CONFIG_KAISER
-+// Upon kernel/user mode switch, it may happen that
-+// the address space has to be switched before the registers have been stored.
-+// To change the address space, another register is needed.
-+// A register therefore has to be stored/restored.
-+//
-+DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
-+
-+#endif /* CONFIG_KAISER */
-+
-+/**
-+ * shadowmem_add_mapping - map a virtual memory part to the shadow mapping
-+ * @addr: the start address of the range
-+ * @size: the size of the range
-+ * @flags: The mapping flags of the pages
-+ *
-+ * the mapping is done on a global scope, so no bigger synchronization has to be done.
-+ * the pages have to be manually unmapped again when they are not needed any longer.
-+ */
-+extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
-+
-+
-+/**
-+ * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
-+ * @addr: the start address of the range
-+ * @size: the size of the range
-+ */
-+extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
-+
-+/**
-+ * shadowmem_initialize_mapping - Initalize the shadow mapping
-+ *
-+ * most parts of the shadow mapping can be mapped upon boot time.
-+ * only the thread stacks have to be mapped on runtime.
-+ * the mapped regions are not unmapped at all.
-+ */
-+extern void kaiser_init(void);
-+
-+#endif
-+
-+
-+
-+#endif /* _ASM_X86_KAISER_H */
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 437feb4..4b479c9 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -904,6 +904,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
- static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
- {
- memcpy(dst, src, count * sizeof(pgd_t));
-+#ifdef CONFIG_KAISER
-+ // clone the shadow pgd part as well
-+ memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
-+#endif
- }
-
- #define PTE_SHIFT ilog2(PTRS_PER_PTE)
-diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
-index 1cc82ec..e6ea39f 100644
---- a/arch/x86/include/asm/pgtable_64.h
-+++ b/arch/x86/include/asm/pgtable_64.h
-@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_t *pud)
- native_set_pud(pud, native_make_pud(0));
- }
-
-+#ifdef CONFIG_KAISER
-+static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
-+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
-+}
-+
-+static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
-+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
-+}
-+#endif /* CONFIG_KAISER */
-+
- static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
- {
-+#ifdef CONFIG_KAISER
-+ // We know that a pgd is page aligned.
-+ // Therefore the lower indices have to be mapped to user space.
-+ // These pages are mapped to the shadow mapping.
-+ if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
-+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
-+ }
-+
-+ pgdp->pgd = pgd.pgd & ~_PAGE_USER;
-+#else /* CONFIG_KAISER */
- *pgdp = pgd;
-+#endif
- }
-
- static inline void native_pgd_clear(pgd_t *pgd)
-diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
-index 8b4de22..00fecbb 100644
---- a/arch/x86/include/asm/pgtable_types.h
-+++ b/arch/x86/include/asm/pgtable_types.h
-@@ -45,7 +45,11 @@
- #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
- #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
- #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
--#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
-+#ifdef CONFIG_KAISER
-+#define _PAGE_GLOBAL (_AT(pteval_t, 0))
-+#else
-+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
-+#endif
- #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
- #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
- #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
-@@ -119,7 +123,11 @@
- #define _PAGE_DEVMAP (_AT(pteval_t, 0))
- #endif
-
--#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-+#ifdef CONFIG_KAISER
-+#define _PAGE_PROTNONE (_AT(pteval_t, 0))
-+#else
-+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-+#endif
-
- #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 83db0ea..3d4784e2 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -308,7 +308,7 @@ struct tss_struct {
-
- } ____cacheline_aligned;
-
--DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
-+DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
-
- #ifdef CONFIG_X86_32
- DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
-@@ -335,6 +335,11 @@ union irq_stack_union {
- char gs_base[40];
- unsigned long stack_canary;
- };
-+
-+ struct {
-+ char irq_stack_pointer[64];
-+ char unused[IRQ_STACK_SIZE - 64];
-+ };
- };
-
- DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 91588be..3efde13 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -93,7 +93,7 @@ static const struct cpu_dev default_cpu = {
-
- static const struct cpu_dev *this_cpu = &default_cpu;
-
--DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
-+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
- #ifdef CONFIG_X86_64
- /*
- * We need valid kernel segments for data and code in long mode too
-@@ -1365,7 +1365,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [DEBUG_STACK - 1] = DEBUG_STKSZ
- };
-
--static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
- /* May not be marked __init: used by software suspend */
-diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
-index 04f89ca..9ff875a 100644
---- a/arch/x86/kernel/espfix_64.c
-+++ b/arch/x86/kernel/espfix_64.c
-@@ -41,6 +41,7 @@
- #include <asm/pgalloc.h>
- #include <asm/setup.h>
- #include <asm/espfix.h>
-+#include <asm/kaiser.h>
-
- /*
- * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
-@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void)
- /* Install the espfix pud into the kernel page directory */
- pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
- pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
-+#ifdef CONFIG_KAISER
-+ // add the esp stack pud to the shadow mapping here.
-+ // This can be done directly, because the fixup stack has its own pud
-+ set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
-+#endif
-
- /* Randomize the locations */
- init_espfix_random();
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index b4421cc..9e849b5 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -405,6 +405,14 @@ GLOBAL(early_recursion_flag)
- .balign PAGE_SIZE; \
- GLOBAL(name)
-
-+#ifdef CONFIG_KAISER
-+#define NEXT_PGD_PAGE(name) \
-+ .balign 2 * PAGE_SIZE; \
-+GLOBAL(name)
-+#else
-+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
-+#endif
-+
- /* Automate the creation of 1 to 1 mapping pmd entries */
- #define PMDS(START, PERM, COUNT) \
- i = 0 ; \
-@@ -414,7 +422,7 @@ GLOBAL(name)
- .endr
-
- __INITDATA
--NEXT_PAGE(early_level4_pgt)
-+NEXT_PGD_PAGE(early_level4_pgt)
- .fill 511,8,0
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
-
-@@ -424,10 +432,10 @@ NEXT_PAGE(early_dynamic_pgts)
- .data
-
- #ifndef CONFIG_XEN
--NEXT_PAGE(init_level4_pgt)
-- .fill 512,8,0
-+NEXT_PGD_PAGE(init_level4_pgt)
-+ .fill 2*512,8,0
- #else
--NEXT_PAGE(init_level4_pgt)
-+NEXT_PGD_PAGE(init_level4_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
-index 1423ab1..f480b38 100644
---- a/arch/x86/kernel/irqinit.c
-+++ b/arch/x86/kernel/irqinit.c
-@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
- .flags = IRQF_NO_THREAD,
- };
-
--DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
-+DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
- [0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
- };
-
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 8e10e72..a55b320 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -41,7 +41,7 @@
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
--__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-+__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
- .x86_tss = {
- .sp0 = TOP_OF_INIT_STACK,
- #ifdef CONFIG_X86_32
-diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
-index 96d2b84..682c162 100644
---- a/arch/x86/mm/Makefile
-+++ b/arch/x86/mm/Makefile
-@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
- obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
- obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
- obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
--
-+obj-$(CONFIG_KAISER) += kaiser.o
-diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
-new file mode 100644
-index 0000000..cf1bb92
---- /dev/null
-+++ b/arch/x86/mm/kaiser.c
-@@ -0,0 +1,160 @@
-+
-+
-+#include <linux/kernel.h>
-+#include <linux/errno.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/bug.h>
-+#include <linux/init.h>
-+#include <linux/spinlock.h>
-+#include <linux/mm.h>
-+
-+#include <linux/uaccess.h>
-+#include <asm/pgtable.h>
-+#include <asm/pgalloc.h>
-+#include <asm/desc.h>
-+#ifdef CONFIG_KAISER
-+
-+__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
-+
-+/**
-+ * Get the real ppn from a address in kernel mapping.
-+ * @param address The virtual adrress
-+ * @return the physical address
-+ */
-+static inline unsigned long get_pa_from_mapping (unsigned long address)
-+{
-+ pgd_t *pgd;
-+ pud_t *pud;
-+ pmd_t *pmd;
-+ pte_t *pte;
-+
-+ pgd = pgd_offset_k(address);
-+ BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
-+
-+ pud = pud_offset(pgd, address);
-+ BUG_ON(pud_none(*pud));
-+
-+ if (pud_large(*pud)) {
-+ return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
-+ }
-+
-+ pmd = pmd_offset(pud, address);
-+ BUG_ON(pmd_none(*pmd));
-+
-+ if (pmd_large(*pmd)) {
-+ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
-+ }
-+
-+ pte = pte_offset_kernel(pmd, address);
-+ BUG_ON(pte_none(*pte));
-+
-+ return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
-+}
-+
-+void _kaiser_copy (unsigned long start_addr, unsigned long size,
-+ unsigned long flags)
-+{
-+ pgd_t *pgd;
-+ pud_t *pud;
-+ pmd_t *pmd;
-+ pte_t *pte;
-+ unsigned long address;
-+ unsigned long end_addr = start_addr + size;
-+ unsigned long target_address;
-+
-+ for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
-+ address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
-+ target_address = get_pa_from_mapping(address);
-+
-+ pgd = native_get_shadow_pgd(pgd_offset_k(address));
-+
-+ BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
-+ BUG_ON(pgd_large(*pgd));
-+
-+ pud = pud_offset(pgd, address);
-+ if (pud_none(*pud)) {
-+ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
-+ }
-+ BUG_ON(pud_large(*pud));
-+
-+ pmd = pmd_offset(pud, address);
-+ if (pmd_none(*pmd)) {
-+ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
-+ }
-+ BUG_ON(pmd_large(*pmd));
-+
-+ pte = pte_offset_kernel(pmd, address);
-+ if (pte_none(*pte)) {
-+ set_pte(pte, __pte(flags | target_address));
-+ } else {
-+ BUG_ON(__pa(pte_page(*pte)) != target_address);
-+ }
-+ }
-+}
-+
-+// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
-+static inline void __init _kaiser_init(void)
-+{
-+ pgd_t *pgd;
-+ int i = 0;
-+
-+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
-+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
-+ set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
-+ }
-+}
-+
-+extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
-+spinlock_t shadow_table_lock;
-+void __init kaiser_init(void)
-+{
-+ int cpu;
-+ spin_lock_init(&shadow_table_lock);
-+
-+ spin_lock(&shadow_table_lock);
-+
-+ _kaiser_init();
-+
-+ for_each_possible_cpu(cpu) {
-+ // map the per cpu user variables
-+ _kaiser_copy(
-+ (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
-+ (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
-+ __PAGE_KERNEL);
-+ }
-+
-+ // map the entry/exit text section, which is responsible to switch between user- and kernel mode
-+ _kaiser_copy(
-+ (unsigned long) __entry_text_start,
-+ (unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
-+ __PAGE_KERNEL_RX);
-+
-+ // the fixed map address of the idt_table
-+ _kaiser_copy(
-+ (unsigned long) idt_descr.address,
-+ sizeof(gate_desc) * NR_VECTORS,
-+ __PAGE_KERNEL_RO);
-+
-+ spin_unlock(&shadow_table_lock);
-+}
-+
-+// add a mapping to the shadow-mapping, and synchronize the mappings
-+void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
-+{
-+ spin_lock(&shadow_table_lock);
-+ _kaiser_copy(addr, size, flags);
-+ spin_unlock(&shadow_table_lock);
-+}
-+
-+extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
-+void kaiser_remove_mapping(unsigned long start, unsigned long size)
-+{
-+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
-+ spin_lock(&shadow_table_lock);
-+ do {
-+ unmap_pud_range(pgd, start, start + size);
-+ } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
-+ spin_unlock(&shadow_table_lock);
-+}
-+#endif /* CONFIG_KAISER */
-diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
-index e3353c9..c17412f 100644
---- a/arch/x86/mm/pageattr.c
-+++ b/arch/x86/mm/pageattr.c
-@@ -823,7 +823,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
- pud_clear(pud);
- }
-
--static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
-+void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
- {
- pud_t *pud = pud_offset(pgd, start);
-
-diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index 3feec5a..27d218b 100644
---- a/arch/x86/mm/pgtable.c
-+++ b/arch/x86/mm/pgtable.c
-@@ -346,12 +346,38 @@ static inline void _pgd_free(pgd_t *pgd)
- #else
- static inline pgd_t *_pgd_alloc(void)
- {
-+#ifdef CONFIG_KAISER
-+ // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
-+ // block. Therefore, we have to allocate at least 3 pages. However, the
-+ // __get_free_pages returns us 4 pages. Hence, we store the base pointer at
-+ // the beginning of the page of our 8kb-aligned memory block in order to
-+ // correctly free it afterwars.
-+
-+ unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
-+
-+ if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
-+ {
-+ *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
-+ return (pgd_t *) pages;
-+ }
-+ else
-+ {
-+ *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
-+ return (pgd_t *) (pages + PAGE_SIZE);
-+ }
-+#else
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
-+#endif
- }
-
- static inline void _pgd_free(pgd_t *pgd)
- {
-+#ifdef CONFIG_KAISER
-+ unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
-+ free_pages(pages, get_order(4*PAGE_SIZE));
-+#else
- free_page((unsigned long)pgd);
-+#endif
- }
- #endif /* CONFIG_X86_PAE */
-
-diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
-index 31e1d63..0b16b5d 100644
---- a/include/asm-generic/vmlinux.lds.h
-+++ b/include/asm-generic/vmlinux.lds.h
-@@ -764,7 +764,16 @@
- */
- #define PERCPU_INPUT(cacheline) \
- VMLINUX_SYMBOL(__per_cpu_start) = .; \
-- *(.data..percpu..first) \
-+ \
-+ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
-+ *(.data..percpu..first) \
-+ . = ALIGN(cacheline); \
-+ *(.data..percpu..user_mapped) \
-+ *(.data..percpu..user_mapped..shared_aligned) \
-+ . = ALIGN(PAGE_SIZE); \
-+ *(.data..percpu..user_mapped..page_aligned) \
-+ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
-+ \
- . = ALIGN(PAGE_SIZE); \
- *(.data..percpu..page_aligned) \
- . = ALIGN(cacheline); \
-diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
-index 8f16299..8ea945f 100644
---- a/include/linux/percpu-defs.h
-+++ b/include/linux/percpu-defs.h
-@@ -35,6 +35,12 @@
-
- #endif
-
-+#ifdef CONFIG_KAISER
-+#define USER_MAPPED_SECTION "..user_mapped"
-+#else
-+#define USER_MAPPED_SECTION ""
-+#endif
-+
- /*
- * Base implementations of per-CPU variable declarations and definitions, where
- * the section in which the variable is to be placed is provided by the
-@@ -115,6 +121,12 @@
- #define DEFINE_PER_CPU(type, name) \
- DEFINE_PER_CPU_SECTION(type, name, "")
-
-+#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
-+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
-+
-+#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
-+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
-+
- /*
- * Declaration/definition used for per-CPU variables that must come first in
- * the set of variables.
-@@ -144,6 +156,14 @@
- DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
- ____cacheline_aligned_in_smp
-
-+#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
-+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
-+ ____cacheline_aligned_in_smp
-+
-+#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
-+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
-+ ____cacheline_aligned_in_smp
-+
- #define DECLARE_PER_CPU_ALIGNED(type, name) \
- DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \
- ____cacheline_aligned
-@@ -162,6 +182,16 @@
- #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
- DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \
- __aligned(PAGE_SIZE)
-+/*
-+ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
-+ */
-+#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
-+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
-+ __aligned(PAGE_SIZE)
-+
-+#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
-+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
-+ __aligned(PAGE_SIZE)
-
- /*
- * Declaration/definition used for per-CPU variables that must be read mostly.
-diff --git a/init/main.c b/init/main.c
-index f23b7fa..d2c8c23 100644
---- a/init/main.c
-+++ b/init/main.c
-@@ -87,6 +87,9 @@
- #include <asm/setup.h>
- #include <asm/sections.h>
- #include <asm/cacheflush.h>
-+#ifdef CONFIG_KAISER
-+#include <asm/kaiser.h>
-+#endif
-
- static int kernel_init(void *);
-
-@@ -474,6 +477,9 @@ static void __init mm_init(void)
- pgtable_init();
- vmalloc_init();
- ioremap_huge_init();
-+#ifdef CONFIG_KAISER
-+ kaiser_init();
-+#endif
- }
-
- asmlinkage __visible void __init start_kernel(void)
-diff --git a/kernel/fork.c b/kernel/fork.c
-index f1751cb..61748d1 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -211,8 +211,12 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
- #endif
- }
-
-+extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
- static inline void free_thread_stack(struct task_struct *tsk)
- {
-+#ifdef CONFIG_KAISER
-+ kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE);
-+#endif
- #ifdef CONFIG_VMAP_STACK
- if (task_stack_vm_area(tsk)) {
- unsigned long flags;
-@@ -468,6 +472,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
- *stackend = STACK_END_MAGIC; /* for overflow detection */
- }
-
-+extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
- static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
- {
- struct task_struct *tsk;
-@@ -495,6 +500,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
- * functions again.
- */
- tsk->stack = stack;
-+#ifdef CONFIG_KAISER
-+ kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
-+#endif
- #ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = stack_vm_area;
- #endif
-diff --git a/security/Kconfig b/security/Kconfig
-index 118f454..f515ac3 100644
---- a/security/Kconfig
-+++ b/security/Kconfig
-@@ -30,6 +30,13 @@ config SECURITY
- model will be used.
-
- If you are unsure how to answer this question, answer N.
-+config KAISER
-+ bool "Remove the kernel mapping in user mode"
-+ depends on X86_64
-+ depends on !PARAVIRT
-+ help
-+ This enforces a strict kernel and user space isolation in order to close
-+ hardware side channels on kernel address information.
-
- config SECURITYFS
- bool "Enable the securityfs filesystem"
---
-2.7.4
-