1 files changed, 1025 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch
new file mode 100644
index 00000000..7b0132c6
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0004-KAISER-Kernel-Address-Isolation.patch
@@ -0,0 +1,1025 @@
+From ebbc1312aa1e8495c5a920640ecd961251e136a8 Mon Sep 17 00:00:00 2001
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+Date: Thu, 4 May 2017 14:26:50 +0200
+Subject: [PATCH 004/102] KAISER: Kernel Address Isolation
+
+This patch introduces our implementation of KAISER (Kernel Address Isolation to
+have Side-channels Efficiently Removed), a kernel isolation technique to close
+hardware side channels on kernel address information.
+
+More information about the patch can be found on:
+
+        https://github.com/IAIK/KAISER
+
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+From: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode
+Date: Thu, 4 May 2017 14:26:50 +0200
+Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2
+Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5
+
+To: <linux-kernel@vger.kernel.org>
+To: <kernel-hardening@lists.openwall.com>
+Cc: <clementine.maurice@iaik.tugraz.at>
+Cc: <moritz.lipp@iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner@student.tugraz.at>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: <kirill.shutemov@linux.intel.com>
+Cc: <anders.fogh@gdata-adan.de>
+
+After several recent works [1,2,3] KASLR on x86_64 was basically
+considered dead by many researchers. We have been working on an
+efficient but effective fix for this problem and found that not mapping
+the kernel space when running in user mode is the solution to this
+problem [4] (the corresponding paper [5] will be presented at ESSoS17).
+
+With this RFC patch we allow anybody to configure their kernel with the
+flag CONFIG_KAISER to add our defense mechanism.
+
+If there are any questions we would love to answer them.
+We also appreciate any comments!
+
+Cheers,
+Daniel (+ the KAISER team from Graz University of Technology)
+
+[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
+[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf
+[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf
+[4] https://github.com/IAIK/KAISER
+[5] https://gruss.cc/files/kaiser.pdf
+
+[patch based also on
+https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch]
+
+Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S            |  17 ++++
+ arch/x86/entry/entry_64_compat.S     |   7 +-
+ arch/x86/include/asm/hw_irq.h        |   2 +-
+ arch/x86/include/asm/kaiser.h        | 113 +++++++++++++++++++++++++
+ arch/x86/include/asm/pgtable.h       |   4 +
+ arch/x86/include/asm/pgtable_64.h    |  21 +++++
+ arch/x86/include/asm/pgtable_types.h |  12 ++-
+ arch/x86/include/asm/processor.h     |   7 +-
+ arch/x86/kernel/cpu/common.c         |   4 +-
+ arch/x86/kernel/espfix_64.c          |   6 ++
+ arch/x86/kernel/head_64.S            |  16 +++-
+ arch/x86/kernel/irqinit.c            |   2 +-
+ arch/x86/kernel/process.c            |   2 +-
+ arch/x86/mm/Makefile                 |   2 +-
+ arch/x86/mm/kaiser.c                 | 160 +++++++++++++++++++++++++++++++++++
+ arch/x86/mm/pageattr.c               |   2 +-
+ arch/x86/mm/pgtable.c                |  26 ++++++
+ include/asm-generic/vmlinux.lds.h    |  11 ++-
+ include/linux/percpu-defs.h          |  30 +++++++
+ init/main.c                          |   6 ++
+ kernel/fork.c                        |   8 ++
+ security/Kconfig                     |   7 ++
+ 22 files changed, 449 insertions(+), 16 deletions(-)
+ create mode 100644 arch/x86/include/asm/kaiser.h
+ create mode 100644 arch/x86/mm/kaiser.c
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index ef766a3..6c880dc 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -36,6 +36,7 @@
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
+ #include <asm/export.h>
++#include <asm/kaiser.h>
+ #include <linux/err.h>
+ 
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
+@@ -146,6 +147,7 @@ ENTRY(entry_SYSCALL_64)
+ 	 * it is too small to ever cause noticeable irq latency.
+ 	 */
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	/*
+ 	 * A hypervisor implementation might want to use a label
+ 	 * after the swapgs, so that it can do the swapgs
+@@ -228,6 +230,7 @@ entry_SYSCALL_64_fastpath:
+ 	movq	RIP(%rsp), %rcx
+ 	movq	EFLAGS(%rsp), %r11
+ 	RESTORE_C_REGS_EXCEPT_RCX_R11
++	SWITCH_USER_CR3
+ 	movq	RSP(%rsp), %rsp
+ 	USERGS_SYSRET64
+ 
+@@ -323,10 +326,12 @@ return_from_SYSCALL_64:
+ syscall_return_via_sysret:
+ 	/* rcx and r11 are already restored (see code above) */
+ 	RESTORE_C_REGS_EXCEPT_RCX_R11
++	SWITCH_USER_CR3
+ 	movq	RSP(%rsp), %rsp
+ 	USERGS_SYSRET64
+ 
+ opportunistic_sysret_failed:
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_c_regs_and_iret
+ END(entry_SYSCALL_64)
+@@ -424,6 +429,7 @@ ENTRY(ret_from_fork)
+ 	movq	%rsp, %rdi
+ 	call	syscall_return_slowpath	/* returns with IRQs disabled */
+ 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
+ 
+@@ -478,6 +484,7 @@ END(irq_entries_start)
+ 	 * tracking that we're in kernel mode.
+ 	 */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 
+ 	/*
+ 	 * We need to tell lockdep that IRQs are off.  We can't do this until
+@@ -535,6 +542,7 @@ GLOBAL(retint_user)
+ 	mov	%rsp,%rdi
+ 	call	prepare_exit_to_usermode
+ 	TRACE_IRQS_IRETQ
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
+ 
+@@ -612,6 +620,7 @@ native_irq_return_ldt:
+ 
+ 	pushq	%rdi				/* Stash user RDI */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 	movq	PER_CPU_VAR(espfix_waddr), %rdi
+ 	movq	%rax, (0*8)(%rdi)		/* user RAX */
+ 	movq	(1*8)(%rsp), %rax		/* user RIP */
+@@ -638,6 +647,7 @@ native_irq_return_ldt:
+ 	 * still points to an RO alias of the ESPFIX stack.
+ 	 */
+ 	orq	PER_CPU_VAR(espfix_stack), %rax
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	movq	%rax, %rsp
+ 
+@@ -1034,6 +1044,7 @@ ENTRY(paranoid_entry)
+ 	testl	%edx, %edx
+ 	js	1f				/* negative -> in kernel */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 	xorl	%ebx, %ebx
+ 1:	ret
+ END(paranoid_entry)
+@@ -1056,6 +1067,7 @@ ENTRY(paranoid_exit)
+ 	testl	%ebx, %ebx			/* swapgs needed? */
+ 	jnz	paranoid_exit_no_swapgs
+ 	TRACE_IRQS_IRETQ
++	SWITCH_USER_CR3_NO_STACK
+ 	SWAPGS_UNSAFE_STACK
+ 	jmp	paranoid_exit_restore
+ paranoid_exit_no_swapgs:
+@@ -1084,6 +1096,7 @@ ENTRY(error_entry)
+ 	 * from user mode due to an IRET fault.
+ 	 */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 
+ .Lerror_entry_from_usermode_after_swapgs:
+ 	/*
+@@ -1135,6 +1148,7 @@ ENTRY(error_entry)
+ 	 * Switch to kernel gsbase:
+ 	 */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 
+ 	/*
+ 	 * Pretend that the exception came from user mode: set up pt_regs
+@@ -1233,6 +1247,7 @@ ENTRY(nmi)
+ 	 */
+ 
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	cld
+ 	movq	%rsp, %rdx
+ 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1273,6 +1288,7 @@ ENTRY(nmi)
+ 	 * work, because we don't want to enable interrupts.  Fortunately,
+ 	 * do_nmi doesn't modify pt_regs.
+ 	 */
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp	restore_c_regs_and_iret
+ 
+@@ -1484,6 +1500,7 @@ end_repeat_nmi:
+ 	testl	%ebx, %ebx			/* swapgs needed? */
+ 	jnz	nmi_restore
+ nmi_swapgs:
++	SWITCH_USER_CR3_NO_STACK
+ 	SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ 	RESTORE_EXTRA_REGS
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index e1721da..f0e384e 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+ 
+@@ -48,6 +49,7 @@
+ ENTRY(entry_SYSENTER_compat)
+ 	/* Interrupts are off on entry. */
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+ 	/*
+@@ -184,6 +186,7 @@ ENDPROC(entry_SYSENTER_compat)
+ ENTRY(entry_SYSCALL_compat)
+ 	/* Interrupts are off on entry. */
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 
+ 	/* Stash user ESP and switch to the kernel stack. */
+ 	movl	%esp, %r8d
+@@ -259,6 +262,7 @@ sysret32_from_system_call:
+ 	xorq	%r8, %r8
+ 	xorq	%r9, %r9
+ 	xorq	%r10, %r10
++	SWITCH_USER_CR3
+ 	movq	RSP-ORIG_RAX(%rsp), %rsp
+ 	swapgs
+ 	sysretl
+@@ -297,7 +301,7 @@ ENTRY(entry_INT80_compat)
+ 	PARAVIRT_ADJUST_EXCEPTION_FRAME
+ 	ASM_CLAC			/* Do this early to minimize exposure */
+ 	SWAPGS
+-
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	/*
+ 	 * User tracing code (ptrace or signal handlers) might assume that
+ 	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
+@@ -338,6 +342,7 @@ ENTRY(entry_INT80_compat)
+ 
+ 	/* Go back to user mode. */
+ 	TRACE_IRQS_ON
++	SWITCH_USER_CR3_NO_STACK
+ 	SWAPGS
+ 	jmp	restore_regs_and_iret
+ END(entry_INT80_compat)
+diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
+index b90e105..0817d63 100644
+--- a/arch/x86/include/asm/hw_irq.h
++++ b/arch/x86/include/asm/hw_irq.h
+@@ -178,7 +178,7 @@ extern char irq_entries_start[];
+ #define VECTOR_RETRIGGERED	((void *)~0UL)
+ 
+ typedef struct irq_desc* vector_irq_t[NR_VECTORS];
+-DECLARE_PER_CPU(vector_irq_t, vector_irq);
++DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
+ 
+ #endif /* !ASSEMBLY_ */
+ 
+diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
+new file mode 100644
+index 0000000..63ee830
+--- /dev/null
++++ b/arch/x86/include/asm/kaiser.h
+@@ -0,0 +1,113 @@
++#ifndef _ASM_X86_KAISER_H
++#define _ASM_X86_KAISER_H
++
++/* This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
++ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
++ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
++ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
++ *
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
++ * of the user space, or the stacks.
++ */
++#ifdef __ASSEMBLY__
++#ifdef CONFIG_KAISER
++
++.macro _SWITCH_TO_KERNEL_CR3 reg
++movq %cr3, \reg
++andq $(~0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro _SWITCH_TO_USER_CR3 reg
++movq %cr3, \reg
++orq $(0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro SWITCH_KERNEL_CR3
++pushq %rax
++_SWITCH_TO_KERNEL_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_USER_CR3
++pushq %rax
++_SWITCH_TO_USER_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_KERNEL_CR3_NO_STACK
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_KERNEL_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++.endm
++
++
++.macro SWITCH_USER_CR3_NO_STACK
++
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_USER_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++
++.endm
++
++#else /* CONFIG_KAISER */
++
++.macro SWITCH_KERNEL_CR3 reg
++.endm
++.macro SWITCH_USER_CR3 reg
++.endm
++.macro SWITCH_USER_CR3_NO_STACK
++.endm
++.macro SWITCH_KERNEL_CR3_NO_STACK
++.endm
++
++#endif /* CONFIG_KAISER */
++#else /* __ASSEMBLY__ */
++
++
++#ifdef CONFIG_KAISER
++// Upon kernel/user mode switch, it may happen that
++// the address space has to be switched before the registers have been stored.
++// To change the address space, another register is needed.
++// A register therefore has to be stored/restored.
++//
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++#endif /* CONFIG_KAISER */
++
++/**
++ *  shadowmem_add_mapping - map a virtual memory part to the shadow mapping
++ *  @addr: the start address of the range
++ *  @size: the size of the range
++ *  @flags: The mapping flags of the pages
++ *
++ *  the mapping is done on a global scope, so no bigger synchronization has to be done.
++ *  the pages have to be manually unmapped again when they are not needed any longer.
++ */
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++
++
++/**
++ *  shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
++ *  @addr: the start address of the range
++ *  @size: the size of the range
++ */
++extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
++
++/**
++ *  shadowmem_initialize_mapping - Initalize the shadow mapping
++ *
++ *  most parts of the shadow mapping can be mapped upon boot time.
++ *  only the thread stacks have to be mapped on runtime.
++ *  the mapped regions are not unmapped at all.
++ */
++extern void kaiser_init(void);
++
++#endif
++
++
++
++#endif /* _ASM_X86_KAISER_H */
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 437feb4..4b479c9 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -904,6 +904,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+        memcpy(dst, src, count * sizeof(pgd_t));
++#ifdef CONFIG_KAISER
++	// clone the shadow pgd part as well
++	memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
++#endif
+ }
+ 
+ #define PTE_SHIFT ilog2(PTRS_PER_PTE)
+diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
+index 1cc82ec..e6ea39f 100644
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_t *pud)
+ 	native_set_pud(pud, native_make_pud(0));
+ }
+ 
++#ifdef CONFIG_KAISER
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
++	return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
++}
++
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
++	return (pgd_t *)(void*)((unsigned long)(void*)pgdp &  ~(unsigned long)PAGE_SIZE);
++}
++#endif /* CONFIG_KAISER */
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++#ifdef CONFIG_KAISER
++	// We know that a pgd is page aligned.
++	// Therefore the lower indices have to be mapped to user space.
++	// These pages are mapped to the shadow mapping.
++	if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
++		native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++	}
++
++	pgdp->pgd = pgd.pgd & ~_PAGE_USER;
++#else /* CONFIG_KAISER */
+ 	*pgdp = pgd;
++#endif
+ }
+ 
+ static inline void native_pgd_clear(pgd_t *pgd)
+diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
+index 8b4de22..00fecbb 100644
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -45,7 +45,11 @@
+ #define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#ifdef CONFIG_KAISER
++#define _PAGE_GLOBAL	(_AT(pteval_t, 0))
++#else
++#define _PAGE_GLOBAL  (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#endif
+ #define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+ #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+@@ -119,7 +123,11 @@
+ #define _PAGE_DEVMAP	(_AT(pteval_t, 0))
+ #endif
+ 
+-#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#ifdef CONFIG_KAISER
++#define _PAGE_PROTNONE	(_AT(pteval_t, 0))
++#else
++#define _PAGE_PROTNONE  (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#endif
+ 
+ #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
+ 			 _PAGE_ACCESSED | _PAGE_DIRTY)
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 83db0ea..3d4784e2 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -308,7 +308,7 @@ struct tss_struct {
+ 
+ } ____cacheline_aligned;
+ 
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
+ 
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+@@ -335,6 +335,11 @@ union irq_stack_union {
+ 		char gs_base[40];
+ 		unsigned long stack_canary;
+ 	};
++
++	struct {
++		char irq_stack_pointer[64];
++		char unused[IRQ_STACK_SIZE - 64];
++	};
+ };
+ 
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 91588be..3efde13 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -93,7 +93,7 @@ static const struct cpu_dev default_cpu = {
+ 
+ static const struct cpu_dev *this_cpu = &default_cpu;
+ 
+-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
+ #ifdef CONFIG_X86_64
+ 	/*
+ 	 * We need valid kernel segments for data and code in long mode too
+@@ -1365,7 +1365,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+ 	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
+ };
+ 
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
+ 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ 
+ /* May not be marked __init: used by software suspend */
+diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
+index 04f89ca..9ff875a 100644
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -41,6 +41,7 @@
+ #include <asm/pgalloc.h>
+ #include <asm/setup.h>
+ #include <asm/espfix.h>
++#include <asm/kaiser.h>
+ 
+ /*
+  * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
+@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void)
+ 	/* Install the espfix pud into the kernel page directory */
+ 	pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ 	pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
++#ifdef CONFIG_KAISER
++	// add the esp stack pud to the shadow mapping here.
++	// This can be done directly, because the fixup stack has its own pud
++	set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
++#endif
+ 
+ 	/* Randomize the locations */
+ 	init_espfix_random();
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index b4421cc..9e849b5 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -405,6 +405,14 @@ GLOBAL(early_recursion_flag)
+ 	.balign	PAGE_SIZE; \
+ GLOBAL(name)
+ 
++#ifdef CONFIG_KAISER
++#define NEXT_PGD_PAGE(name) \
++	.balign 2 * PAGE_SIZE; \
++GLOBAL(name)
++#else
++#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#endif
++
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+ #define PMDS(START, PERM, COUNT)			\
+ 	i = 0 ;						\
+@@ -414,7 +422,7 @@ GLOBAL(name)
+ 	.endr
+ 
+ 	__INITDATA
+-NEXT_PAGE(early_level4_pgt)
++NEXT_PGD_PAGE(early_level4_pgt)
+ 	.fill	511,8,0
+ 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ 
+@@ -424,10 +432,10 @@ NEXT_PAGE(early_dynamic_pgts)
+ 	.data
+ 
+ #ifndef CONFIG_XEN
+-NEXT_PAGE(init_level4_pgt)
+-	.fill	512,8,0
++NEXT_PGD_PAGE(init_level4_pgt)
++	.fill	2*512,8,0
+ #else
+-NEXT_PAGE(init_level4_pgt)
++NEXT_PGD_PAGE(init_level4_pgt)
+ 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ 	.org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
+ 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
+index 1423ab1..f480b38 100644
+--- a/arch/x86/kernel/irqinit.c
++++ b/arch/x86/kernel/irqinit.c
+@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
+ 	.flags = IRQF_NO_THREAD,
+ };
+ 
+-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
++DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
+ 	[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
+ };
+ 
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 8e10e72..a55b320 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -41,7 +41,7 @@
+  * section. Since TSS's are completely CPU-local, we want them
+  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+  */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
++__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
+ 	.x86_tss = {
+ 		.sp0 = TOP_OF_INIT_STACK,
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
+index 96d2b84..682c162 100644
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
+ obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+-
++obj-$(CONFIG_KAISER) += kaiser.o
+diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
+new file mode 100644
+index 0000000..cf1bb92
+--- /dev/null
++++ b/arch/x86/mm/kaiser.c
+@@ -0,0 +1,160 @@
++
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/bug.h>
++#include <linux/init.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++
++#include <linux/uaccess.h>
++#include <asm/pgtable.h>
++#include <asm/pgalloc.h>
++#include <asm/desc.h>
++#ifdef CONFIG_KAISER
++
++__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/**
++ * Get the real ppn from a address in kernel mapping.
++ * @param address The virtual adrress
++ * @return the physical address
++ */
++static inline unsigned long get_pa_from_mapping (unsigned long address)
++{
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++
++	pgd = pgd_offset_k(address);
++	BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
++
++	pud = pud_offset(pgd, address);
++	BUG_ON(pud_none(*pud));
++
++	if (pud_large(*pud)) {
++		return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
++	}
++
++	pmd = pmd_offset(pud, address);
++	BUG_ON(pmd_none(*pmd));
++
++	if (pmd_large(*pmd)) {
++		return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
++	}
++
++	pte = pte_offset_kernel(pmd, address);
++	BUG_ON(pte_none(*pte));
++
++	return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
++}
++
++void _kaiser_copy (unsigned long start_addr, unsigned long size,
++					unsigned long flags)
++{
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++	unsigned long address;
++	unsigned long end_addr = start_addr + size;
++	unsigned long target_address;
++
++	for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
++			address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
++		target_address = get_pa_from_mapping(address);
++
++		pgd = native_get_shadow_pgd(pgd_offset_k(address));
++
++		BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
++		BUG_ON(pgd_large(*pgd));
++
++		pud = pud_offset(pgd, address);
++		if (pud_none(*pud)) {
++			set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
++		}
++		BUG_ON(pud_large(*pud));
++
++		pmd = pmd_offset(pud, address);
++		if (pmd_none(*pmd)) {
++			set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
++		}
++		BUG_ON(pmd_large(*pmd));
++
++		pte = pte_offset_kernel(pmd, address);
++		if (pte_none(*pte)) {
++			set_pte(pte, __pte(flags | target_address));
++		} else {
++			BUG_ON(__pa(pte_page(*pte)) != target_address);
++		}
++	}
++}
++
++// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
++static inline void __init _kaiser_init(void)
++{
++	pgd_t *pgd;
++	int i = 0;
++
++	pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
++	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
++		set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
++	}
++}
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++spinlock_t shadow_table_lock;
++void __init kaiser_init(void)
++{
++	int cpu;
++	spin_lock_init(&shadow_table_lock);
++
++	spin_lock(&shadow_table_lock);
++
++	_kaiser_init();
++
++	for_each_possible_cpu(cpu) {
++		// map the per cpu user variables
++		_kaiser_copy(
++				(unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
++				(unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
++				__PAGE_KERNEL);
++	}
++
++	// map the entry/exit text section, which is responsible to switch between user- and kernel mode
++	_kaiser_copy(
++			(unsigned long) __entry_text_start,
++			(unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
++			__PAGE_KERNEL_RX);
++
++	// the fixed map address of the idt_table
++	_kaiser_copy(
++			(unsigned long) idt_descr.address,
++			sizeof(gate_desc) * NR_VECTORS,
++			__PAGE_KERNEL_RO);
++
++	spin_unlock(&shadow_table_lock);
++}
++
++// add a mapping to the shadow-mapping, and synchronize the mappings
++void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++	spin_lock(&shadow_table_lock);
++	_kaiser_copy(addr, size, flags);
++	spin_unlock(&shadow_table_lock);
++}
++
++extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
++void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
++	spin_lock(&shadow_table_lock);
++	do {
++		unmap_pud_range(pgd, start, start + size);
++	} while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
++	spin_unlock(&shadow_table_lock);
++}
++#endif /* CONFIG_KAISER */
+diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
+index e3353c9..c17412f 100644
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -823,7 +823,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
+ 			pud_clear(pud);
+ }
+ 
+-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+ {
+ 	pud_t *pud = pud_offset(pgd, start);
+ 
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index 3feec5a..27d218b 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -346,12 +346,38 @@ static inline void _pgd_free(pgd_t *pgd)
+ #else
+ static inline pgd_t *_pgd_alloc(void)
+ {
++#ifdef CONFIG_KAISER
++	// Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
++	// block. Therefore, we have to allocate at least 3 pages. However, the
++	// __get_free_pages returns us 4 pages. Hence, we store the base pointer at
++	// the beginning of the page of our 8kb-aligned memory block in order to
++	// correctly free it afterwars.
++
++	unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
++
++	if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
++	{
++		*((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
++		return (pgd_t *) pages;
++	}
++	else
++	{
++		*((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
++		return (pgd_t *) (pages + PAGE_SIZE);
++	}
++#else
+ 	return (pgd_t *)__get_free_page(PGALLOC_GFP);
++#endif
+ }
+ 
+ static inline void _pgd_free(pgd_t *pgd)
+ {
++#ifdef CONFIG_KAISER
++  unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
++	free_pages(pages, get_order(4*PAGE_SIZE));
++#else
+ 	free_page((unsigned long)pgd);
++#endif
+ }
+ #endif /* CONFIG_X86_PAE */
+ 
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 31e1d63..0b16b5d 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -764,7 +764,16 @@
+  */
+ #define PERCPU_INPUT(cacheline)						\
+ 	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
+-	*(.data..percpu..first)						\
++	\
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .;        \
++	*(.data..percpu..first)           \
++	. = ALIGN(cacheline);           \
++	*(.data..percpu..user_mapped)            \
++	*(.data..percpu..user_mapped..shared_aligned)        \
++	. = ALIGN(PAGE_SIZE);           \
++	*(.data..percpu..user_mapped..page_aligned)          \
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .;        \
++	\
+ 	. = ALIGN(PAGE_SIZE);						\
+ 	*(.data..percpu..page_aligned)					\
+ 	. = ALIGN(cacheline);						\
+diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
+index 8f16299..8ea945f 100644
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -35,6 +35,12 @@
+ 
+ #endif
+ 
++#ifdef CONFIG_KAISER
++#define USER_MAPPED_SECTION "..user_mapped"
++#else
++#define USER_MAPPED_SECTION ""
++#endif
++
+ /*
+  * Base implementations of per-CPU variable declarations and definitions, where
+  * the section in which the variable is to be placed is provided by the
+@@ -115,6 +121,12 @@
+ #define DEFINE_PER_CPU(type, name)					\
+ 	DEFINE_PER_CPU_SECTION(type, name, "")
+ 
++#define DECLARE_PER_CPU_USER_MAPPED(type, name)         \
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
++#define DEFINE_PER_CPU_USER_MAPPED(type, name)          \
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
+ /*
+  * Declaration/definition used for per-CPU variables that must come first in
+  * the set of variables.
+@@ -144,6 +156,14 @@
+ 	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ 	____cacheline_aligned_in_smp
+ 
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++	____cacheline_aligned_in_smp
++
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++	____cacheline_aligned_in_smp
++
+ #define DECLARE_PER_CPU_ALIGNED(type, name)				\
+ 	DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION)	\
+ 	____cacheline_aligned
+@@ -162,6 +182,16 @@
+ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)				\
+ 	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
+ 	__aligned(PAGE_SIZE)
++/*
++ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
++ */
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)      \
++  DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")   \
++  __aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)       \
++  DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")    \
++  __aligned(PAGE_SIZE)
+ 
+ /*
+  * Declaration/definition used for per-CPU variables that must be read mostly.
+diff --git a/init/main.c b/init/main.c
+index f23b7fa..d2c8c23 100644
+--- a/init/main.c
++++ b/init/main.c
+@@ -87,6 +87,9 @@
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++#endif
+ 
+ static int kernel_init(void *);
+ 
+@@ -474,6 +477,9 @@ static void __init mm_init(void)
+ 	pgtable_init();
+ 	vmalloc_init();
+ 	ioremap_huge_init();
++#ifdef CONFIG_KAISER
++	kaiser_init();
++#endif
+ }
+ 
+ asmlinkage __visible void __init start_kernel(void)
+diff --git a/kernel/fork.c b/kernel/fork.c
+index fc76aff..d34394e 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -211,8 +211,12 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+ #endif
+ }
+ 
++extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
+ static inline void free_thread_stack(struct task_struct *tsk)
+ {
++#ifdef CONFIG_KAISER
++	kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE);
++#endif
+ #ifdef CONFIG_VMAP_STACK
+ 	if (task_stack_vm_area(tsk)) {
+ 		unsigned long flags;
+@@ -468,6 +472,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
+ 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+ }
+ 
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ 	struct task_struct *tsk;
+@@ -495,6 +500,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ 	 * functions again.
+ 	 */
+ 	tsk->stack = stack;
++#ifdef CONFIG_KAISER
++	kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++#endif
+ #ifdef CONFIG_VMAP_STACK
+ 	tsk->stack_vm_area = stack_vm_area;
+ #endif
+diff --git a/security/Kconfig b/security/Kconfig
+index 118f454..f515ac3 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,6 +30,13 @@ config SECURITY
+ 	  model will be used.
+ 
+ 	  If you are unsure how to answer this question, answer N.
++config KAISER
++	bool "Remove the kernel mapping in user mode"
++	depends on X86_64
++	depends on !PARAVIRT
++	help
++	  This enforces a strict kernel and user space isolation in order to close
++	  hardware side channels on kernel address information.
+ 
+ config SECURITYFS
+ 	bool "Enable the securityfs filesystem"
+-- 
+2.7.4
+