aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig1
-rw-r--r--arch/alpha/include/asm/spinlock_types.h4
-rw-r--r--arch/arm/Kconfig5
-rw-r--r--arch/arm/include/asm/irq.h2
-rw-r--r--arch/arm/include/asm/spinlock_types.h4
-rw-r--r--arch/arm/include/asm/switch_to.h8
-rw-r--r--arch/arm/include/asm/thread_info.h8
-rw-r--r--arch/arm/kernel/asm-offsets.c1
-rw-r--r--arch/arm/kernel/entry-armv.S19
-rw-r--r--arch/arm/kernel/entry-common.S9
-rw-r--r--arch/arm/kernel/process.c24
-rw-r--r--arch/arm/kernel/signal.c3
-rw-r--r--arch/arm/kernel/smp.c2
-rw-r--r--arch/arm/mach-at91/Kconfig23
-rw-r--r--arch/arm/mach-imx/cpuidle-imx6q.c10
-rw-r--r--arch/arm/mm/fault.c6
-rw-r--r--arch/arm/mm/highmem.c58
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/crypto/Kconfig30
-rw-r--r--arch/arm64/include/asm/alternative.h6
-rw-r--r--arch/arm64/include/asm/preempt.h23
-rw-r--r--arch/arm64/include/asm/spinlock_types.h4
-rw-r--r--arch/arm64/include/asm/thread_info.h6
-rw-r--r--arch/arm64/kernel/alternative.c1
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/entry.S12
-rw-r--r--arch/arm64/kernel/fpsimd.c31
-rw-r--r--arch/arm64/kernel/signal.c2
-rw-r--r--arch/arm64/kvm/va_layout.c7
-rw-r--r--arch/hexagon/include/asm/spinlock_types.h4
-rw-r--r--arch/ia64/include/asm/spinlock_types.h4
-rw-r--r--arch/ia64/kernel/mca.c2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/include/asm/switch_to.h4
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c2
-rw-r--r--arch/mips/kernel/traps.c6
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/include/asm/spinlock_types.h4
-rw-r--r--arch/powerpc/include/asm/stackprotector.h4
-rw-r--r--arch/powerpc/include/asm/thread_info.h16
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/entry_32.S29
-rw-r--r--arch/powerpc/kernel/entry_64.S26
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/misc_32.S2
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/traps.c1
-rw-r--r--arch/powerpc/kernel/watchdog.c5
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c16
-rw-r--r--arch/s390/include/asm/spinlock_types.h4
-rw-r--r--arch/sh/include/asm/spinlock_types.h4
-rw-r--r--arch/sh/kernel/irq.c2
-rw-r--r--arch/sparc/kernel/irq_64.c2
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c22
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c21
-rw-r--r--arch/x86/crypto/chacha_glue.c11
-rw-r--r--arch/x86/crypto/glue_helper.c31
-rw-r--r--arch/x86/entry/common.c19
-rw-r--r--arch/x86/entry/entry_32.S17
-rw-r--r--arch/x86/entry/entry_64.S18
-rw-r--r--arch/x86/ia32/ia32_signal.c17
-rw-r--r--arch/x86/include/asm/fpu/api.h32
-rw-r--r--arch/x86/include/asm/fpu/internal.h128
-rw-r--r--arch/x86/include/asm/fpu/signal.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h9
-rw-r--r--arch/x86/include/asm/fpu/xstate.h5
-rw-r--r--arch/x86/include/asm/pgtable.h28
-rw-r--r--arch/x86/include/asm/preempt.h31
-rw-r--r--arch/x86/include/asm/signal.h13
-rw-r--r--arch/x86/include/asm/special_insns.h18
-rw-r--r--arch/x86/include/asm/stackprotector.h8
-rw-r--r--arch/x86/include/asm/thread_info.h13
-rw-r--r--arch/x86/include/asm/trace/fpu.h8
-rw-r--r--arch/x86/kernel/apic/io_apic.c23
-rw-r--r--arch/x86/kernel/asm-offsets.c2
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c2
-rw-r--r--arch/x86/kernel/fpu/core.c206
-rw-r--r--arch/x86/kernel/fpu/init.c2
-rw-r--r--arch/x86/kernel/fpu/regset.c24
-rw-r--r--arch/x86/kernel/fpu/signal.c189
-rw-r--r--arch/x86/kernel/fpu/xstate.c42
-rw-r--r--arch/x86/kernel/ima_arch.c5
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c43
-rw-r--r--arch/x86/kernel/process_64.c11
-rw-r--r--arch/x86/kernel/signal.c17
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c56
-rw-r--r--arch/x86/math-emu/fpu_entry.c3
-rw-r--r--arch/x86/mm/highmem_32.c13
-rw-r--r--arch/x86/mm/iomap_32.c11
-rw-r--r--arch/x86/mm/mpx.c6
-rw-r--r--arch/x86/mm/pkeys.c21
-rw-r--r--arch/xtensa/include/asm/spinlock_types.h4
102 files changed, 1070 insertions, 552 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 4cfb6de48f79..00b849e9c5d4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -28,6 +28,7 @@ config OPROFILE
tristate "OProfile system profiling"
depends on PROFILING
depends on HAVE_OPROFILE
+ depends on !PREEMPT_RT_FULL
select RING_BUFFER
select RING_BUFFER_ALLOW_SWAP
help
diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
index 1d5716bc060b..6883bc952d22 100644
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -2,10 +2,6 @@
#ifndef _ALPHA_SPINLOCK_TYPES_H
#define _ALPHA_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3b353af9c48d..b9b871329cdd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -52,7 +52,7 @@ config ARM
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
@@ -90,6 +90,7 @@ config ARM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_RCU_TABLE_FREE if SMP && ARM_LPAE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
@@ -2132,7 +2133,7 @@ config NEON
config KERNEL_MODE_NEON
bool "Support for NEON in kernel mode"
- depends on NEON && AEABI
+ depends on NEON && AEABI && !PREEMPT_RT_BASE
help
Say Y to include support for NEON in kernel mode.
diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index 46d41140df27..c421b5b81946 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -23,6 +23,8 @@
#endif
#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+
struct irqaction;
struct pt_regs;
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index 5976958647fe..a37c0803954b 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -2,10 +2,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
#define TICKET_SHIFT 16
typedef struct {
diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index d3e937dcee4d..6ab96a2ce1f8 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -4,6 +4,13 @@
#include <linux/thread_info.h>
+#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
+#else
+static inline void
+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
+#endif
+
/*
* For v7 SMP cores running a preemptible kernel we may be pre-empted
* during a TLB maintenance operation, so execute an inner-shareable dsb
@@ -26,6 +33,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
#define switch_to(prev,next,last) \
do { \
__complete_pending_tlbi(); \
+ switch_kmaps(prev, next); \
last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
} while (0)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 286eb61c632b..d0b5732e45c6 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -49,6 +49,7 @@ struct cpu_context_save {
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
__u32 cpu; /* cpu */
@@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
+#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
+#define TIF_NEED_RESCHED_LAZY 7
#define TIF_NOHZ 12 /* in adaptive nohz mode */
#define TIF_USING_IWMMXT 17
@@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
@@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
* Change these and you break ASM code in entry-common.S
*/
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME | _TIF_UPROBE)
+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ _TIF_NEED_RESCHED_LAZY)
#endif /* __KERNEL__ */
#endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 28b27104ac0c..fac184483fdf 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index e85a3af9ddeb..cc67c0a3ae7b 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -216,11 +216,18 @@ __irq_svc:
#ifdef CONFIG_PREEMPT
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
- ldr r0, [tsk, #TI_FLAGS] @ get flags
teq r8, #0 @ if preempt count != 0
+ bne 1f @ return from exeption
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
+ blne svc_preempt @ preempt!
+
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r8, #0 @ if preempt lazy count != 0
movne r0, #0 @ force flags to 0
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED_LAZY
blne svc_preempt
+1:
#endif
svc_exit r5, irq = 1 @ return from exception
@@ -235,8 +242,14 @@ svc_preempt:
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
+ bne 1b
+ tst r0, #_TIF_NEED_RESCHED_LAZY
reteq r8 @ go again
- b 1b
+ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r0, #0 @ if preempt lazy count != 0
+ beq 1b
+ ret r8 @ go again
+
#endif
__und_fault:
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 08f4fcb9b3b5..27f801484b38 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -56,7 +56,9 @@ __ret_fast_syscall:
cmp r2, #TASK_SIZE
blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+ tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
+ bne fast_work_pending
+ tst r1, #_TIF_SECCOMP
bne fast_work_pending
@@ -93,8 +95,11 @@ __ret_fast_syscall:
cmp r2, #TASK_SIZE
blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+ tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
+ bne do_slower_path
+ tst r1, #_TIF_SECCOMP
beq no_work_pending
+do_slower_path:
UNWIND(.fnend )
ENDPROC(ret_fast_syscall)
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 16601d1442d1..79405054420c 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -328,6 +328,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
}
#ifdef CONFIG_MMU
+/*
+ * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
+ * initialized by pgtable_page_ctor() then a coredump of the vector page will
+ * fail.
+ */
+static int __init vectors_user_mapping_init_page(void)
+{
+ struct page *page;
+ unsigned long addr = 0xffff0000;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset_k(addr);
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ page = pmd_page(*(pmd));
+
+ pgtable_page_ctor(page);
+
+ return 0;
+}
+late_initcall(vectors_user_mapping_init_page);
+
#ifdef CONFIG_KUSER_HELPERS
/*
* The vectors page is always readable from user space for the
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 76bb8de6bf6b..670b6fc931aa 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -652,7 +652,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
*/
trace_hardirqs_off();
do {
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
+ _TIF_NEED_RESCHED_LAZY))) {
schedule();
} else {
if (unlikely(!user_mode(regs)))
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index a3ce7c5365fa..cd50bea6480b 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -684,11 +684,9 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
case IPI_CPU_BACKTRACE:
- printk_nmi_enter();
irq_enter();
nmi_cpu_backtrace(regs);
irq_exit();
- printk_nmi_exit();
break;
default:
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 903f23c309df..da1d97a06c53 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -107,6 +107,29 @@ config SOC_AT91SAM9
AT91SAM9X35
AT91SAM9XE
+comment "Clocksource driver selection"
+
+config ATMEL_CLOCKSOURCE_PIT
+ bool "Periodic Interval Timer (PIT) support"
+ depends on SOC_AT91SAM9 || SOC_SAMA5
+ default SOC_AT91SAM9 || SOC_SAMA5
+ select ATMEL_PIT
+ help
+ Select this to get a clocksource based on the Atmel Periodic Interval
+ Timer. It has a relatively low resolution and the TC Block clocksource
+ should be preferred.
+
+config ATMEL_CLOCKSOURCE_TCB
+ bool "Timer Counter Blocks (TCB) support"
+ default SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5
+ select ATMEL_TCB_CLKSRC
+ help
+ Select this to get a high precision clocksource based on a
+ TC block with a 5+ MHz base clock rate.
+ On platforms with 16-bit counters, two timer channels are combined
+ to make a single 32-bit timer.
+ It can also be used as a clock event device supporting oneshot mode.
+
config HAVE_AT91_UTMI
bool
diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index 326e870d7123..d9ac80aa1eb0 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -17,22 +17,22 @@
#include "hardware.h"
static int num_idle_cpus = 0;
-static DEFINE_SPINLOCK(cpuidle_lock);
+static DEFINE_RAW_SPINLOCK(cpuidle_lock);
static int imx6q_enter_wait(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- spin_lock(&cpuidle_lock);
+ raw_spin_lock(&cpuidle_lock);
if (++num_idle_cpus == num_online_cpus())
imx6_set_lpm(WAIT_UNCLOCKED);
- spin_unlock(&cpuidle_lock);
+ raw_spin_unlock(&cpuidle_lock);
cpu_do_idle();
- spin_lock(&cpuidle_lock);
+ raw_spin_lock(&cpuidle_lock);
if (num_idle_cpus-- == num_online_cpus())
imx6_set_lpm(WAIT_CLOCKED);
- spin_unlock(&cpuidle_lock);
+ raw_spin_unlock(&cpuidle_lock);
return index;
}
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 2daa276f0e5b..9d951ad716ac 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -437,6 +437,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
if (user_mode(regs))
goto bad_area;
@@ -513,6 +516,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
do_bad_area(addr, fsr, regs);
return 0;
}
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index d02f8187b1cc..542692dbd40a 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr)
return *ptep;
}
+static unsigned int fixmap_idx(int type)
+{
+ return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+}
+
void *kmap(struct page *page)
{
might_sleep();
@@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap);
void *kmap_atomic(struct page *page)
{
+ pte_t pte = mk_pte(page, kmap_prot);
unsigned int idx;
unsigned long vaddr;
void *kmap;
int type;
- preempt_disable();
+ preempt_disable_nort();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page)
type = kmap_atomic_idx_push();
- idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+ idx = fixmap_idx(type);
vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
/*
@@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page)
* in place, so the contained TLB flush ensures the TLB is updated
* with the new mapping.
*/
- set_fixmap_pte(idx, mk_pte(page, kmap_prot));
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_fixmap_pte(idx, pte);
return (void *)vaddr;
}
@@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr)
if (kvaddr >= (void *)FIXADDR_START) {
type = kmap_atomic_idx();
- idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+ idx = fixmap_idx(type);
if (cache_is_vivt())
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = __pte(0);
+#endif
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(vaddr != __fix_to_virt(idx));
- set_fixmap_pte(idx, __pte(0));
#else
(void) idx; /* to kill a warning */
#endif
+ set_fixmap_pte(idx, __pte(0));
kmap_atomic_idx_pop();
} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
/* this address was obtained through kmap_high_get() */
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
}
pagefault_enable();
- preempt_enable();
+ preempt_enable_nort();
}
EXPORT_SYMBOL(__kunmap_atomic);
void *kmap_atomic_pfn(unsigned long pfn)
{
+ pte_t pte = pfn_pte(pfn, kmap_prot);
unsigned long vaddr;
int idx, type;
struct page *page = pfn_to_page(pfn);
- preempt_disable();
+ preempt_disable_nort();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
type = kmap_atomic_idx_push();
- idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+ idx = fixmap_idx(type);
vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
#endif
- set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_fixmap_pte(idx, pte);
return (void *)vaddr;
}
+#if defined CONFIG_PREEMPT_RT_FULL
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ int i;
+
+ /*
+ * Clear @prev's kmap_atomic mappings
+ */
+ for (i = 0; i < prev_p->kmap_idx; i++) {
+ int idx = fixmap_idx(i);
+
+ set_fixmap_pte(idx, __pte(0));
+ }
+ /*
+ * Restore @next_p's kmap_atomic mappings
+ */
+ for (i = 0; i < next_p->kmap_idx; i++) {
+ int idx = fixmap_idx(i);
+
+ if (!pte_none(next_p->kmap_pte[i]))
+ set_fixmap_pte(idx, next_p->kmap_pte[i]);
+ }
+}
+#endif
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a4168d366127..0e7392afd51e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -146,6 +146,7 @@ config ARM64
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
select HAVE_RCU_TABLE_INVALIDATE
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index d9a523ecdd83..d5a8f11eab04 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -19,43 +19,43 @@ config CRYPTO_SHA512_ARM64
config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA1
config CRYPTO_SHA2_ARM64_CE
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA256_ARM64
config CRYPTO_SHA512_ARM64_CE
tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA512_ARM64
config CRYPTO_SHA3_ARM64
tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA3
config CRYPTO_SM3_ARM64_CE
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SM3
config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
select CRYPTO_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_GF128MUL
select CRYPTO_AES
@@ -63,7 +63,7 @@ config CRYPTO_GHASH_ARM64_CE
config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC_T10DIF
+ depends on KERNEL_MODE_NEON && CRC_T10DIF && !PREEMPT_RT_BASE
select CRYPTO_HASH
config CRYPTO_AES_ARM64
@@ -72,13 +72,13 @@ config CRYPTO_AES_ARM64
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64
config CRYPTO_AES_ARM64_CE_CCM
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
@@ -86,7 +86,7 @@ config CRYPTO_AES_ARM64_CE_CCM
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
@@ -94,7 +94,7 @@ config CRYPTO_AES_ARM64_CE_BLK
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64
select CRYPTO_AES
@@ -102,18 +102,18 @@ config CRYPTO_AES_ARM64_NEON_BLK
config CRYPTO_CHACHA20_NEON
tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
config CRYPTO_NHPOLY1305_NEON
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_NHPOLY1305
config CRYPTO_AES_ARM64_BS
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES_ARM64
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 4b650ec1d7dd..f561ea0ac645 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -35,6 +35,12 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
+#ifdef CONFIG_KVM_ARM_HOST
+void kvm_compute_layout(void);
+#else
+static inline void kvm_compute_layout(void) { }
+#endif
+
#define ALTINSTR_ENTRY(feature,cb) \
" .word 661b - .\n" /* label */ \
" .if " __stringify(cb) " == 0\n" \
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index d49951647014..3bfad251203b 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -70,13 +70,34 @@ static inline bool __preempt_count_dec_and_test(void)
* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
* pair.
*/
- return !pc || !READ_ONCE(ti->preempt_count);
+ if (!pc || !READ_ONCE(ti->preempt_count))
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
}
static inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ if (pc == preempt_offset)
+ return true;
+
+ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
+ return false;
+
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
u64 pc = READ_ONCE(current_thread_info()->preempt_count);
return pc == preempt_offset;
+#endif
}
#ifdef CONFIG_PREEMPT
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index a157ff465e27..f952fdda8346 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -16,10 +16,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
-# error "please don't include this file directly"
-#endif
-
#include <asm-generic/qspinlock_types.h>
#include <asm-generic/qrwlock_types.h>
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index bbca68b54732..40b5a876a2c9 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -42,6 +42,7 @@ struct thread_info {
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
union {
u64 preempt_count; /* 0 => preemptible, <0 => bug */
struct {
@@ -87,6 +88,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */
+#define TIF_NEED_RESCHED_LAZY 6
#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
@@ -105,6 +107,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
@@ -117,8 +120,9 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
- _TIF_UPROBE | _TIF_FSCHECK)
+ _TIF_UPROBE | _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_NOHZ)
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index b5d603992d40..f92815d56d17 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -224,6 +224,7 @@ static int __apply_alternatives_multi_stop(void *unused)
void __init apply_alternatives_all(void)
{
/* better not try code patching on a live SMP system */
+ kvm_compute_layout();
stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
}
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 65b8afc84466..6a859f52195e 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -41,6 +41,7 @@ int main(void)
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count));
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 0ec0c46b2c0c..81312fb7f11c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -610,9 +610,16 @@ el1_irq:
#ifdef CONFIG_PREEMPT
ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
- cbnz x24, 1f // preempt count != 0
- bl el1_preempt
+ cbnz x24, 2f // preempt count != 0
+
+ ldr w24, [tsk, #TSK_TI_PREEMPT_LAZY] // get preempt lazy count
+ cbnz w24, 2f // preempt lazy count != 0
+
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
+ tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
1:
+ bl el1_preempt
+2:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on
@@ -626,6 +633,7 @@ el1_preempt:
1: bl preempt_schedule_irq // irq en/disable is done inside
ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
+ tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
ret x24
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5ebe73b69961..e8d67e6d1e7e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -159,6 +159,16 @@ static void sve_free(struct task_struct *task)
__sve_free(task);
}
+static void *sve_free_atomic(struct task_struct *task)
+{
+ void *sve_state = task->thread.sve_state;
+
+ WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+ task->thread.sve_state = NULL;
+ return sve_state;
+}
+
/*
* TIF_SVE controls whether a task can use SVE without trapping while
* in userspace, and also the way a task's FPSIMD/SVE state is stored
@@ -547,6 +557,7 @@ int sve_set_vector_length(struct task_struct *task,
* non-SVE thread.
*/
if (task == current) {
+ preempt_disable();
local_bh_disable();
fpsimd_save();
@@ -557,8 +568,10 @@ int sve_set_vector_length(struct task_struct *task,
if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
sve_to_fpsimd(task);
- if (task == current)
+ if (task == current) {
local_bh_enable();
+ preempt_enable();
+ }
/*
* Force reallocation of task SVE state to the correct size
@@ -813,6 +826,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
sve_alloc(current);
+ preempt_disable();
local_bh_disable();
fpsimd_save();
@@ -826,6 +840,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
WARN_ON(1); /* SVE access shouldn't have trapped */
local_bh_enable();
+ preempt_enable();
}
/*
@@ -888,10 +903,12 @@ void fpsimd_thread_switch(struct task_struct *next)
void fpsimd_flush_thread(void)
{
int vl, supported_vl;
+ void *mem = NULL;
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
memset(&current->thread.uw.fpsimd_state, 0,
@@ -900,7 +917,7 @@ void fpsimd_flush_thread(void)
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
- sve_free(current);
+ mem = sve_free_atomic(current);
/*
* Reset the task vector length as required.
@@ -936,6 +953,8 @@ void fpsimd_flush_thread(void)
set_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
+ preempt_enable();
+ kfree(mem);
}
/*
@@ -947,9 +966,11 @@ void fpsimd_preserve_current_state(void)
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
fpsimd_save();
local_bh_enable();
+ preempt_enable();
}
/*
@@ -1007,6 +1028,7 @@ void fpsimd_restore_current_state(void)
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -1015,6 +1037,7 @@ void fpsimd_restore_current_state(void)
}
local_bh_enable();
+ preempt_enable();
}
/*
@@ -1027,6 +1050,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
if (!system_supports_fpsimd())
return;
+ preempt_disable();
local_bh_disable();
current->thread.uw.fpsimd_state = *state;
@@ -1039,6 +1063,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
clear_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
+ preempt_enable();
}
/*
@@ -1084,6 +1109,7 @@ void kernel_neon_begin(void)
BUG_ON(!may_use_simd());
+ preempt_disable();
local_bh_disable();
__this_cpu_write(kernel_neon_busy, true);
@@ -1097,6 +1123,7 @@ void kernel_neon_begin(void)
preempt_disable();
local_bh_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(kernel_neon_begin);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 867a7cea70e5..4d1f916c27fb 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -926,7 +926,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
/* Check valid user FS if needed */
addr_limit_user_check();
- if (thread_flags & _TIF_NEED_RESCHED) {
+ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index c712a7376bc1..792da0e125de 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -33,7 +33,7 @@ static u8 tag_lsb;
static u64 tag_val;
static u64 va_mask;
-static void compute_layout(void)
+__init void kvm_compute_layout(void)
{
phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
u64 hyp_va_msb;
@@ -121,8 +121,6 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
BUG_ON(nr_inst != 5);
- if (!has_vhe() && !va_mask)
- compute_layout();
for (i = 0; i < nr_inst; i++) {
u32 rd, rn, insn, oinsn;
@@ -167,9 +165,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
return;
}
- if (!va_mask)
- compute_layout();
-
/*
* Compute HYP VA by using the same computation as kern_hyp_va()
*/
diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h
index 7a906b5214a4..d8f596fec022 100644
--- a/arch/hexagon/include/asm/spinlock_types.h
+++ b/arch/hexagon/include/asm/spinlock_types.h
@@ -21,10 +21,6 @@
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 6e345fefcdca..681408d6816f 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -2,10 +2,6 @@
#ifndef _ASM_IA64_SPINLOCK_TYPES_H
#define _ASM_IA64_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 91bd1e129379..1bf4f177f262 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1824,7 +1824,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
ti->cpu = cpu;
p->stack = ti;
p->state = TASK_UNINTERRUPTIBLE;
- cpumask_set_cpu(cpu, &p->cpus_allowed);
+ cpumask_set_cpu(cpu, &p->cpus_mask);
INIT_LIST_HEAD(&p->tasks);
p->parent = p->real_parent = p->group_leader = p;
INIT_LIST_HEAD(&p->children);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a84c24d894aa..7ac1814b6fdd 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2600,7 +2600,7 @@ config MIPS_CRC_SUPPORT
#
config HIGHMEM
bool "High Memory Support"
- depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
+ depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
config CPU_SUPPORTS_HIGHMEM
bool
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 0f813bb753c6..09cbe9042828 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
* inline to try to keep the overhead down. If we have been forced to run on
* a "CPU" with an FPU because of a previous high level of FP computation,
* but did not actually use the FPU during the most recent time-slice (CU1
- * isn't set), we undo the restriction on cpus_allowed.
+ * isn't set), we undo the restriction on cpus_mask.
*
* We're not calling set_cpus_allowed() here, because we have no need to
* force prompt migration - we're already switching the current CPU to a
@@ -57,7 +57,7 @@ do { \
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
- prev->cpus_allowed = prev->thread.user_cpus_allowed; \
+ prev->cpus_mask = prev->thread.user_cpus_allowed; \
} \
next->thread.emulated_fp = 0; \
} while(0)
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index a7c0f97e4b0d..1a08428eedcf 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
if (retval)
goto out_unlock;
- cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
+ cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
cpumask_and(&mask, &allowed, cpu_active_mask);
out_unlock:
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c91097f7b32f..a7ca580bb143 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void)
* restricted the allowed set to exclude any CPUs with FPUs,
* we'll skip the procedure.
*/
- if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) {
+ if (cpumask_intersects(&current->cpus_mask, &mt_fpu_cpumask)) {
cpumask_t tmask;
current->thread.user_cpus_allowed
- = current->cpus_allowed;
- cpumask_and(&tmask, &current->cpus_allowed,
+ = current->cpus_mask;
+ cpumask_and(&tmask, &current->cpus_mask,
&mt_fpu_cpumask);
set_cpus_allowed_ptr(current, &tmask);
set_thread_flag(TIF_FPUBOUND);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2890d36eb531..df0cc2dcbcd8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -105,10 +105,11 @@ config LOCKDEP_SUPPORT
config RWSEM_GENERIC_SPINLOCK
bool
+ default y if PREEMPT_RT_FULL
config RWSEM_XCHGADD_ALGORITHM
bool
- default y
+ default y if !PREEMPT_RT_FULL
config GENERIC_LOCKBREAK
bool
@@ -218,6 +219,7 @@ config PPC
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC64 && CPU_LITTLE_ENDIAN
@@ -394,7 +396,7 @@ menu "Kernel options"
config HIGHMEM
bool "High memory support"
- depends on PPC32
+ depends on PPC32 && !PREEMPT_RT_FULL
source "kernel/Kconfig.hz"
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 87adaf13b7e8..7305cb6a53e4 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -2,10 +2,6 @@
#ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
#define _ASM_POWERPC_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int slock;
} arch_spinlock_t;
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
index 1c8460e23583..e764eb4b6c28 100644
--- a/arch/powerpc/include/asm/stackprotector.h
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void)
unsigned long canary;
/* Try to get a semi random initial value. */
+#ifdef CONFIG_PREEMPT_RT_FULL
+ canary = (unsigned long)&canary;
+#else
canary = get_random_canary();
+#endif
canary ^= mftb();
canary ^= LINUX_VERSION_CODE;
canary &= CANARY_MASK;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 544cac0474cb..f9132398566d 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -38,6 +38,8 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ int preempt_lazy_count; /* 0 => preemptable,
+ <0 => BUG */
unsigned long local_flags; /* private flags for thread */
#ifdef CONFIG_LIVEPATCH
unsigned long *livepatch_sp;
@@ -99,11 +101,12 @@ void arch_setup_new_exec(void);
#define TIF_SINGLESTEP 8 /* singlestepping active */
#define TIF_NOHZ 9 /* in adaptive nohz mode */
#define TIF_SECCOMP 10 /* secure computing */
-#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
-#define TIF_NOERROR 12 /* Force successful syscall return */
+
+#define TIF_NEED_RESCHED_LAZY 11 /* lazy rescheduling necessary */
+#define TIF_SYSCALL_TRACEPOINT 12 /* syscall tracepoint instrumentation */
+
#define TIF_NOTIFY_RESUME 13 /* callback before returning to user */
#define TIF_UPROBE 14 /* breakpointed or single-stepping */
-#define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */
#define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
for stack store? */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
@@ -112,6 +115,9 @@ void arch_setup_new_exec(void);
#endif
#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_32BIT 20 /* 32 bit binary */
+#define TIF_RESTOREALL 21 /* Restore all regs (implies NOERROR) */
+#define TIF_NOERROR 22 /* Force successful syscall return */
+
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -131,6 +137,7 @@ void arch_setup_new_exec(void);
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_NOHZ (1<<TIF_NOHZ)
+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
#define _TIF_FSCHECK (1<<TIF_FSCHECK)
#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
@@ -140,8 +147,9 @@ void arch_setup_new_exec(void);
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
- _TIF_FSCHECK)
+ _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
/* Bits in local_flags */
/* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 9ffc72ded73a..f23224826a84 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -161,6 +161,7 @@ int main(void)
OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
OFFSET(TI_PREEMPT, thread_info, preempt_count);
+ OFFSET(TI_PREEMPT_LAZY, thread_info, preempt_lazy_count);
OFFSET(TI_TASK, thread_info, task);
OFFSET(TI_CPU, thread_info, cpu);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index fdd528cdb2ee..1b6c95d49cd2 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -393,7 +393,9 @@ ret_from_syscall:
MTMSRD(r10)
lwz r9,TI_FLAGS(r12)
li r8,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
+ lis r0,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)@h
+ ori r0,r0, (_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)@l
+ and. r0,r9,r0
bne- syscall_exit_work
cmplw 0,r3,r8
blt+ syscall_exit_cont
@@ -511,13 +513,13 @@ syscall_dotrace:
b syscall_dotrace_cont
syscall_exit_work:
- andi. r0,r9,_TIF_RESTOREALL
+ andis. r0,r9,_TIF_RESTOREALL@h
beq+ 0f
REST_NVGPRS(r1)
b 2f
0: cmplw 0,r3,r8
blt+ 1f
- andi. r0,r9,_TIF_NOERROR
+ andis. r0,r9,_TIF_NOERROR@h
bne- 1f
lwz r11,_CCR(r1) /* Load CR */
neg r3,r3
@@ -526,12 +528,12 @@ syscall_exit_work:
1: stw r6,RESULT(r1) /* Save result */
stw r3,GPR3(r1) /* Update return value */
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
+2: andis. r0,r9,(_TIF_PERSYSCALL_MASK)@h
beq 4f
/* Clear per-syscall TIF flags if any are set. */
- li r11,_TIF_PERSYSCALL_MASK
+ lis r11,_TIF_PERSYSCALL_MASK@h
addi r12,r12,TI_FLAGS
3: lwarx r8,0,r12
andc r8,r8,r11
@@ -888,7 +890,14 @@ resume_kernel:
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
andi. r8,r8,_TIF_NEED_RESCHED
+ bne+ 1f
+ lwz r0,TI_PREEMPT_LAZY(r9)
+ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
+ bne restore
+ lwz r0,TI_FLAGS(r9)
+ andi. r0,r0,_TIF_NEED_RESCHED_LAZY
beq+ restore
+1:
lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
beq restore /* don't schedule if so */
@@ -899,11 +908,11 @@ resume_kernel:
*/
bl trace_hardirqs_off
#endif
-1: bl preempt_schedule_irq
+2: bl preempt_schedule_irq
CURRENT_THREAD_INFO(r9, r1)
lwz r3,TI_FLAGS(r9)
- andi. r0,r3,_TIF_NEED_RESCHED
- bne- 1b
+ andi. r0,r3,_TIF_NEED_RESCHED_MASK
+ bne- 2b
#ifdef CONFIG_TRACE_IRQFLAGS
/* And now, to properly rebalance the above, we tell lockdep they
* are being turned back on, which will happen when we return
@@ -1232,7 +1241,7 @@ global_dbcr0:
#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
do_work: /* r10 contains MSR_KERNEL here */
- andi. r0,r9,_TIF_NEED_RESCHED
+ andi. r0,r9,_TIF_NEED_RESCHED_MASK
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
@@ -1253,7 +1262,7 @@ recheck:
MTMSRD(r10) /* disable interrupts */
CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_FLAGS(r9)
- andi. r0,r9,_TIF_NEED_RESCHED
+ andi. r0,r9,_TIF_NEED_RESCHED_MASK
bne- do_resched
andi. r0,r9,_TIF_USER_WORK_MASK
beq restore_user
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index a2c168b395d2..abdd089c20a6 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -253,7 +253,9 @@ system_call_exit:
ld r9,TI_FLAGS(r12)
li r11,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
+ lis r0,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)@h
+ ori r0,r0,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)@l
+ and. r0,r9,r0
bne- .Lsyscall_exit_work
andi. r0,r8,MSR_FP
@@ -370,25 +372,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
- andi. r0,r9,_TIF_RESTOREALL
+ andis. r0,r9,_TIF_RESTOREALL@h
beq+ 0f
REST_NVGPRS(r1)
b 2f
0: cmpld r3,r11 /* r11 is -MAX_ERRNO */
blt+ 1f
- andi. r0,r9,_TIF_NOERROR
+ andis. r0,r9,_TIF_NOERROR@h
bne- 1f
ld r5,_CCR(r1)
neg r3,r3
oris r5,r5,0x1000 /* Set SO bit in CR */
std r5,_CCR(r1)
1: std r3,GPR3(r1)
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
+2: andis. r0,r9,(_TIF_PERSYSCALL_MASK)@h
beq 4f
/* Clear per-syscall TIF flags if any are set. */
- li r11,_TIF_PERSYSCALL_MASK
+ lis r11,(_TIF_PERSYSCALL_MASK)@h
addi r12,r12,TI_FLAGS
3: ldarx r10,0,r12
andc r10,r10,r11
@@ -780,7 +782,7 @@ _GLOBAL(ret_from_except_lite)
bl restore_math
b restore
#endif
-1: andi. r0,r4,_TIF_NEED_RESCHED
+1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
beq 2f
bl restore_interrupts
SCHEDULE_USER
@@ -842,10 +844,18 @@ resume_kernel:
#ifdef CONFIG_PREEMPT
/* Check if we need to preempt */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
+ bne restore
andi. r0,r4,_TIF_NEED_RESCHED
+ bne+ check_count
+
+ andi. r0,r4,_TIF_NEED_RESCHED_LAZY
beq+ restore
+ lwz r8,TI_PREEMPT_LAZY(r9)
+
/* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
+check_count:
cmpwi cr0,r8,0
bne restore
ld r0,SOFTE(r1)
@@ -862,7 +872,7 @@ resume_kernel:
/* Re-test flags and eventually loop */
CURRENT_THREAD_INFO(r9, r1)
ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
+ andi. r0,r4,_TIF_NEED_RESCHED_MASK
bne 1b
/*
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 916ddc4aac44..833d27f85aea 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -766,6 +766,7 @@ void irq_ctx_init(void)
}
}
+#ifndef CONFIG_PREEMPT_RT_FULL
void do_softirq_own_stack(void)
{
struct thread_info *curtp, *irqtp;
@@ -783,6 +784,7 @@ void do_softirq_own_stack(void)
if (irqtp->flags)
set_bits(irqtp->flags, &curtp->flags);
}
+#endif
irq_hw_number_t virq_to_hw(unsigned int virq)
{
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 57d2ffb2d45c..479958bf1a48 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -42,6 +42,7 @@
* We store the saved ksp_limit in the unused part
* of the STACK_FRAME_OVERHEAD
*/
+#ifndef CONFIG_PREEMPT_RT_FULL
_GLOBAL(call_do_softirq)
mflr r0
stw r0,4(r1)
@@ -58,6 +59,7 @@ _GLOBAL(call_do_softirq)
stw r10,THREAD+KSP_LIMIT(r2)
mtlr r0
blr
+#endif
/*
* void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 262ba9481781..4935ef9a142e 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -32,6 +32,7 @@
.text
+#ifndef CONFIG_PREEMPT_RT_FULL
_GLOBAL(call_do_softirq)
mflr r0
std r0,16(r1)
@@ -42,6 +43,7 @@ _GLOBAL(call_do_softirq)
ld r0,16(r1)
mtlr r0
blr
+#endif
_GLOBAL(call_do_irq)
mflr r0
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 7a1de34f38c8..314be8e39d1f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -174,7 +174,6 @@ extern void panic_flush_kmsg_start(void)
extern void panic_flush_kmsg_end(void)
{
- printk_safe_flush_on_panic();
kmsg_dump(KMSG_DUMP_PANIC);
bust_spinlocks(0);
debug_locks_off();
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3c6ab22a0c4e..bf98181c5b30 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -181,11 +181,6 @@ static void watchdog_smp_panic(int cpu, u64 tb)
wd_smp_unlock(&flags);
- printk_safe_flush();
- /*
- * printk_safe_flush() seems to require another print
- * before anything actually goes out to console.
- */
if (sysctl_hardlockup_all_cpu_backtrace)
trigger_allbutself_cpu_backtrace();
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index bfdde04e4905..0bebb00698e8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -178,6 +178,7 @@ config KVM_E500MC
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && E500
+ depends on !PREEMPT_RT_FULL
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9fcccb4490b9..a24010d4f35e 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
* runqueue. The context will be rescheduled on the proper node
* if it is timesliced or preempted.
*/
- cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index e7075aaff1bb..1580464a9d5b 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -752,8 +752,8 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev,
}
pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
- res = wait_event_interruptible(dev->done.wait,
- dev->done.done || kthread_should_stop());
+ res = swait_event_interruptible_exclusive(dev->done.wait,
+ dev->done.done || kthread_should_stop());
if (kthread_should_stop())
res = -EINTR;
if (res) {
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 8fc8fe0b9848..14ecedbd8ff1 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -38,6 +38,7 @@
#include <linux/of.h>
#include <linux/iommu.h>
#include <linux/rculist.h>
+#include <linux/locallock.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -191,6 +192,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
}
static DEFINE_PER_CPU(__be64 *, tce_page);
+static DEFINE_LOCAL_IRQ_LOCK(tcp_page_lock);
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -211,7 +213,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
direction, attrs);
}
- local_irq_save(flags); /* to protect tcep and the page behind it */
+ /* to protect tcep and the page behind it */
+ local_lock_irqsave(tcp_page_lock, flags);
tcep = __this_cpu_read(tce_page);
@@ -222,7 +225,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(tcp_page_lock, flags);
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
}
@@ -256,7 +259,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcenum += limit;
} while (npages > 0 && !rc);
- local_irq_restore(flags);
+ local_unlock_irqrestore(tcp_page_lock, flags);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
@@ -414,13 +417,14 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
u64 rc = 0;
long l, limit;
- local_irq_disable(); /* to protect tcep and the page behind it */
+ /* to protect tcep and the page behind it */
+ local_lock_irq(tcp_page_lock);
tcep = __this_cpu_read(tce_page);
if (!tcep) {
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
if (!tcep) {
- local_irq_enable();
+ local_unlock_irq(tcp_page_lock);
return -ENOMEM;
}
__this_cpu_write(tce_page, tcep);
@@ -466,7 +470,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
/* error cleanup: caller will clear whole range */
- local_irq_enable();
+ local_unlock_irq(tcp_page_lock);
return rc;
}
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index cfed272e4fd5..8e28e8176ec8 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -2,10 +2,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
int lock;
} __attribute__ ((aligned (4))) arch_spinlock_t;
diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
index e82369f286a2..22ca9a98bbb8 100644
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -2,10 +2,6 @@
#ifndef __ASM_SH_SPINLOCK_TYPES_H
#define __ASM_SH_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 5717c7cbdd97..66dd399b2007 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -148,6 +148,7 @@ void irq_ctx_exit(int cpu)
hardirq_ctx[cpu] = NULL;
}
+#ifndef CONFIG_PREEMPT_RT_FULL
void do_softirq_own_stack(void)
{
struct thread_info *curctx;
@@ -175,6 +176,7 @@ void do_softirq_own_stack(void)
"r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
);
}
+#endif
#else
static inline void handle_one_irq(unsigned int irq)
{
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 3ec9f1402aad..8a4c6ea0696e 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
set_irq_regs(old_regs);
}
+#ifndef CONFIG_PREEMPT_RT_FULL
void do_softirq_own_stack(void)
{
void *orig_sp, *sp = softirq_stack[smp_processor_id()];
@@ -868,6 +869,7 @@ void do_softirq_own_stack(void)
__asm__ __volatile__("mov %0, %%sp"
: : "r" (orig_sp));
}
+#endif
#ifdef CONFIG_HOTPLUG_CPU
void fixup_irqs(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 64d5a3327030..f1c002d13b2d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -183,6 +183,7 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_RCU_TABLE_FREE if PARAVIRT
select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
@@ -269,8 +270,11 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
+config RWSEM_GENERIC_SPINLOCK
+ def_bool PREEMPT_RT_FULL
+
config RWSEM_XCHGADD_ALGORITHM
- def_bool y
+ def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -949,7 +953,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL
- select CPUMASK_OFFSTACK
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
---help---
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ae30c8b6ec4d..7d80fbe9088d 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -415,14 +415,14 @@ static int ecb_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, true);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -437,14 +437,14 @@ static int ecb_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, true);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -459,14 +459,14 @@ static int cbc_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, true);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -481,14 +481,14 @@ static int cbc_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, true);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -538,18 +538,20 @@ static int ctr_crypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, true);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+ kernel_fpu_begin();
aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
if (walk.nbytes) {
+ kernel_fpu_begin();
ctr_crypt_final(ctx, &walk);
+ kernel_fpu_end();
err = skcipher_walk_done(&walk, 0);
}
- kernel_fpu_end();
return err;
}
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index d1ce49119da8..c20e76bbc1f9 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -61,7 +61,7 @@ static inline void cast5_fpu_end(bool fpu_enabled)
static int ecb_crypt(struct skcipher_request *req, bool enc)
{
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
@@ -76,7 +76,7 @@ static int ecb_crypt(struct skcipher_request *req, bool enc)
u8 *wsrc = walk.src.virt.addr;
u8 *wdst = walk.dst.virt.addr;
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
@@ -105,10 +105,9 @@ static int ecb_crypt(struct skcipher_request *req, bool enc)
} while (nbytes >= bsize);
done:
+ cast5_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
-
- cast5_fpu_end(fpu_enabled);
return err;
}
@@ -212,7 +211,7 @@ static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
@@ -220,12 +219,11 @@ static int cbc_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
nbytes = __cbc_decrypt(ctx, &walk);
+ cast5_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
-
- cast5_fpu_end(fpu_enabled);
return err;
}
@@ -292,7 +290,7 @@ static int ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
@@ -300,13 +298,12 @@ static int ctr_crypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
nbytes = __ctr_crypt(&walk, ctx);
+ cast5_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
- cast5_fpu_end(fpu_enabled);
-
if (walk.nbytes) {
ctr_crypt_final(&walk, ctx);
err = skcipher_walk_done(&walk, 0);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 45c1c4143176..4f2b71cbe02e 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -131,7 +131,6 @@ static int chacha_simd_stream_xor(struct skcipher_walk *walk,
struct chacha_ctx *ctx, u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
- int next_yield = 4096; /* bytes until next FPU yield */
int err = 0;
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
@@ -144,20 +143,14 @@ static int chacha_simd_stream_xor(struct skcipher_walk *walk,
if (nbytes < walk->total) {
nbytes = round_down(nbytes, walk->stride);
- next_yield -= nbytes;
}
chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
nbytes, ctx->nrounds);
- if (next_yield <= 0) {
- /* temporarily allow preemption */
- kernel_fpu_end();
- kernel_fpu_begin();
- next_yield = 4096;
- }
-
+ kernel_fpu_end();
err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+ kernel_fpu_begin();
}
return err;
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index a78ef99a9981..dac489a1c4da 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -38,7 +38,7 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -51,7 +51,7 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
unsigned int i;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
+ &walk, false, nbytes);
for (i = 0; i < gctx->num_funcs; i++) {
func_bytes = bsize * gctx->funcs[i].num_blocks;
@@ -69,10 +69,9 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
if (nbytes < bsize)
break;
}
+ glue_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
-
- glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
@@ -115,7 +114,7 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -129,7 +128,7 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
u128 last_iv;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
+ &walk, false, nbytes);
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
@@ -161,10 +160,10 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
done:
u128_xor(dst, dst, (u128 *)walk.iv);
*(u128 *)walk.iv = last_iv;
+ glue_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
- glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
@@ -175,7 +174,7 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -189,7 +188,7 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
le128 ctrblk;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
+ &walk, false, nbytes);
be128_to_le128(&ctrblk, (be128 *)walk.iv);
@@ -213,11 +212,10 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
}
le128_to_be128((be128 *)walk.iv, &ctrblk);
+ glue_fpu_end(fpu_enabled);
err = skcipher_walk_done(&walk, nbytes);
}
- glue_fpu_end(fpu_enabled);
-
if (nbytes) {
le128 ctrblk;
u128 tmp;
@@ -278,7 +276,7 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
{
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
- bool fpu_enabled = false;
+ bool fpu_enabled;
unsigned int nbytes;
int err;
@@ -289,21 +287,24 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
/* set minimum length to bsize, for tweak_fn */
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled,
+ &walk, false,
nbytes < bsize ? bsize : nbytes);
/* calculate first value of T */
tweak_fn(tweak_ctx, walk.iv, walk.iv);
while (nbytes) {
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ &walk, fpu_enabled,
+ nbytes < bsize ? bsize : nbytes);
nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
+ glue_fpu_end(fpu_enabled);
+ fpu_enabled = false;
err = skcipher_walk_done(&walk, nbytes);
nbytes = walk.nbytes;
}
- glue_fpu_end(fpu_enabled);
-
return err;
}
EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 8e5498be76bd..73451f7bbc57 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -31,6 +31,7 @@
#include <asm/vdso.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
#include <asm/nospec-branch.h>
#define CREATE_TRACE_POINTS
@@ -134,7 +135,7 @@ static long syscall_trace_enter(struct pt_regs *regs)
#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
+ _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
{
@@ -149,9 +150,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* We have work to do. */
local_irq_enable();
- if (cached_flags & _TIF_NEED_RESCHED)
+ if (cached_flags & _TIF_NEED_RESCHED_MASK)
schedule();
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ if (unlikely(current->forced_info.si_signo)) {
+ struct task_struct *t = current;
+ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
+ t->forced_info.si_signo = 0;
+ }
+#endif
if (cached_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -197,6 +205,13 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags);
+ /* Reload ti->flags; we may have rescheduled above. */
+ cached_flags = READ_ONCE(ti->flags);
+
+ fpregs_assert_state_consistent();
+ if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
#ifdef CONFIG_COMPAT
/*
* Compat syscalls set TS_COMPAT. Make sure we clear it before
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 5fc76b755510..eba847dcd3b9 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -769,8 +769,25 @@ END(ret_from_exception)
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
.Lneed_resched:
+ # preempt count == 0 + NEED_RS set?
cmpl $0, PER_CPU_VAR(__preempt_count)
+#ifndef CONFIG_PREEMPT_LAZY
jnz restore_all_kernel
+#else
+ jz test_int_off
+
+ # atleast preempt count == 0 ?
+ cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
+ jne restore_all_kernel
+
+ movl PER_CPU_VAR(current_task), %ebp
+ cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
+ jnz restore_all_kernel
+
+ testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp)
+ jz restore_all_kernel
+test_int_off:
+#endif
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all_kernel
call preempt_schedule_irq
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4150bd0f5adf..aa1ebc002b19 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -648,7 +648,23 @@ retint_kernel:
btl $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
0: cmpl $0, PER_CPU_VAR(__preempt_count)
+#ifndef CONFIG_PREEMPT_LAZY
jnz 1f
+#else
+ jz do_preempt_schedule_irq
+
+ # atleast preempt count == 0 ?
+ cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
+ jnz 1f
+
+ movq PER_CPU_VAR(current_task), %rcx
+ cmpl $0, TASK_TI_preempt_lazy_count(%rcx)
+ jnz 1f
+
+ btl $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx)
+ jnc 1f
+do_preempt_schedule_irq:
+#endif
call preempt_schedule_irq
jmp 0b
1:
@@ -1035,6 +1051,7 @@ bad_gs:
jmp 2b
.previous
+#ifndef CONFIG_PREEMPT_RT_FULL
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(do_softirq_own_stack)
pushq %rbp
@@ -1045,6 +1062,7 @@ ENTRY(do_softirq_own_stack)
leaveq
ret
ENDPROC(do_softirq_own_stack)
+#endif
#ifdef CONFIG_XEN_PV
idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 321fe5f5d0e9..6eeb3249f22f 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -216,8 +216,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
size_t frame_size,
void __user **fpstate)
{
- struct fpu *fpu = &current->thread.fpu;
- unsigned long sp;
+ unsigned long sp, fx_aligned, math_size;
/* Default to using normal stack */
sp = regs->sp;
@@ -231,15 +230,11 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
- if (fpu->initialized) {
- unsigned long fx_aligned, math_size;
-
- sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
- *fpstate = (struct _fpstate_32 __user *) sp;
- if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
- math_size) < 0)
- return (void __user *) -1L;
- }
+ sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
+ *fpstate = (struct _fpstate_32 __user *) sp;
+ if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
+ math_size) < 0)
+ return (void __user *) -1L;
sp -= frame_size;
/* Align the stack pointer according to the i386 ABI,
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b56d504af654..9bc76b328a7f 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -10,6 +10,7 @@
#ifndef _ASM_X86_FPU_API_H
#define _ASM_X86_FPU_API_H
+#include <linux/bottom_half.h>
/*
* Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
@@ -21,6 +22,37 @@
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
+extern void fpregs_mark_activate(void);
+extern void kernel_fpu_resched(void);
+
+/*
+ * Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
+ * A context switch will (and softirq might) save CPU's FPU register to
+ * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in a random
+ * state.
+ */
+static inline void fpregs_lock(void)
+{
+ preempt_disable();
+ local_bh_disable();
+}
+
+static inline void fpregs_unlock(void)
+{
+ local_bh_enable();
+ preempt_enable();
+}
+
+#ifdef CONFIG_X86_DEBUG_FPU
+extern void fpregs_assert_state_consistent(void);
+#else
+static inline void fpregs_assert_state_consistent(void) { }
+#endif
+
+/*
+ * Load the task FPU state before returning to userspace.
+ */
+extern void switch_fpu_return(void);
/*
* Query the presence of one or more xfeatures. Works on any legacy CPU as well.
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index fa2c93cb42a2..33e2294b5a67 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -14,6 +14,7 @@
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/mm.h>
#include <asm/user.h>
#include <asm/fpu/api.h>
@@ -24,14 +25,12 @@
/*
* High level FPU state handling functions:
*/
-extern void fpu__initialize(struct fpu *fpu);
extern void fpu__prepare_read(struct fpu *fpu);
extern void fpu__prepare_write(struct fpu *fpu);
extern void fpu__save(struct fpu *fpu);
-extern void fpu__restore(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
extern void fpu__drop(struct fpu *fpu);
-extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
+extern int fpu__copy(struct task_struct *dst, struct task_struct *src);
extern void fpu__clear(struct fpu *fpu);
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
@@ -122,6 +121,21 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
err; \
})
+#define kernel_insn_err(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
#define kernel_insn(insn, output, input...) \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
@@ -158,6 +172,14 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
}
}
+static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else
+ return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
@@ -175,6 +197,11 @@ static inline void copy_kernel_to_fregs(struct fregs_state *fx)
kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
+static inline int copy_kernel_to_fregs_err(struct fregs_state *fx)
+{
+ return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
static inline int copy_user_to_fregs(struct fregs_state __user *fx)
{
return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
@@ -401,6 +428,21 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
}
/*
+ * Restore xstate from kernel space xsave area, return an error code instead an
+ * exception.
+ */
+static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
+{
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err;
+
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+
+ return err;
+}
+
+/*
* These must be called with preempt disabled. Returns
* 'true' if the FPU state is still intact and we can
* keep registers active.
@@ -517,6 +559,22 @@ static inline void fpregs_activate(struct fpu *fpu)
trace_x86_fpu_regs_activated(fpu);
}
+static inline void __fpregs_load_activate(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+ int cpu = smp_processor_id();
+
+ if (WARN_ON_ONCE(current->mm == NULL))
+ return;
+
+ if (!fpregs_state_valid(fpu, cpu)) {
+ copy_kernel_to_fpregs(&fpu->state);
+ fpregs_activate(fpu);
+ fpu->last_cpu = cpu;
+ }
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
+}
+
/*
* FPU state switching for scheduling.
*
@@ -525,13 +583,22 @@ static inline void fpregs_activate(struct fpu *fpu)
* - switch_fpu_prepare() saves the old state.
* This is done within the context of the old process.
*
- * - switch_fpu_finish() restores the new state as
- * necessary.
+ * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
+ * will get loaded on return to userspace, or when the kernel needs it.
+ *
+ * The FPU context is only stored/restore for user task and ->mm is used to
+ * distinguish between kernel and user threads.
+ *
+ * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers are saved in
+ * the current thread's FPU registers state.
+ * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not hold current()'s
+ * FPU registers. It is required to load the registers before returning to
+ * userland or using the content otherwise.
*/
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
+ if (static_cpu_has(X86_FEATURE_FPU) && current->mm) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
@@ -539,8 +606,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
/* But leave fpu_fpregs_owner_ctx! */
trace_x86_fpu_regs_deactivated(old_fpu);
- } else
- old_fpu->last_cpu = -1;
+ }
}
/*
@@ -548,36 +614,32 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
*/
/*
- * Set up the userspace FPU context for the new task, if the task
- * has used the FPU.
+ * Load PKRU from the FPU context if available. Delay loading the loading of the
+ * complete FPU state until the return to userland.
*/
-static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
+static inline void switch_fpu_finish(struct fpu *new_fpu)
{
- bool preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->initialized;
+ struct pkru_state *pk;
+ u32 pkru_val = init_pkru_value;
- if (preload) {
- if (!fpregs_state_valid(new_fpu, cpu))
- copy_kernel_to_fpregs(&new_fpu->state);
- fpregs_activate(new_fpu);
- }
-}
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return;
-/*
- * Needs to be preemption-safe.
- *
- * NOTE! user_fpu_begin() must be used only immediately before restoring
- * the save state. It does not do any saving/restoring on its own. In
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
- * the task can lose the FPU right after preempt_enable().
- */
-static inline void user_fpu_begin(void)
-{
- struct fpu *fpu = &current->thread.fpu;
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return;
- preempt_disable();
- fpregs_activate(fpu);
- preempt_enable();
+ /*
+ * PKRU state is switched eagerly because it needs to be valid before we
+ * return to userland e.g. for a copy_to_user() operation.
+ */
+ if (current->mm) {
+ pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
+ if (pk)
+ pkru_val = pk->pkru;
+ }
+ __write_pkru(pkru_val);
}
/*
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 44bbc39a57b3..7fb516b6893a 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -22,7 +22,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
struct task_struct *tsk);
-extern void convert_to_fxsr(struct task_struct *tsk,
+extern void convert_to_fxsr(struct fxregs_state *fxsave,
const struct user_i387_ia32_struct *env);
unsigned long
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 202c53918ecf..c5a6edd92de4 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -294,15 +294,6 @@ struct fpu {
unsigned int last_cpu;
/*
- * @initialized:
- *
- * This flag indicates whether this context is initialized: if the task
- * is not running then we can restore from this context, if the task
- * is running then we should save into this context.
- */
- unsigned char initialized;
-
- /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 48581988d78c..4e18a837223f 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
+#include <asm/user.h>
/* Bit 63 of XCR0 is reserved for future expansion */
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
@@ -46,8 +47,8 @@ extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
void fpu__xstate_clear_all_cpu_caps(void);
-void *get_xsave_addr(struct xregs_state *xsave, int xstate);
-const void *get_xsave_field_ptr(int xstate_field);
+void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2779ace16d23..58a3a68e1f11 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -23,6 +23,8 @@
#ifndef __ASSEMBLY__
#include <asm/x86_init.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/api.h>
extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -127,14 +129,28 @@ static inline int pte_dirty(pte_t pte)
static inline u32 read_pkru(void)
{
if (boot_cpu_has(X86_FEATURE_OSPKE))
- return __read_pkru();
+ return __read_pkru_ins();
return 0;
}
static inline void write_pkru(u32 pkru)
{
- if (boot_cpu_has(X86_FEATURE_OSPKE))
- __write_pkru(pkru);
+ struct pkru_state *pk;
+
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ return;
+
+ pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
+ /*
+ * The PKRU value in xstate needs to be in sync with the value that is
+ * written to the CPU. The FPU restore on return to userland would
+ * otherwise load the previous value again.
+ */
+ fpregs_lock();
+ if (pk)
+ pk->pkru = pkru;
+ __write_pkru(pkru);
+ fpregs_unlock();
}
static inline int pte_young(pte_t pte)
@@ -1355,6 +1371,12 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
#define PKRU_WD_BIT 0x2
#define PKRU_BITS_PER_PKEY 2
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+extern u32 init_pkru_value;
+#else
+#define init_pkru_value 0
+#endif
+
static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
{
int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..f8e42abd874a 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -89,17 +89,46 @@ static __always_inline void __preempt_count_sub(int val)
* a decrement which hits zero means we have no preempt_count and should
* reschedule.
*/
-static __always_inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool ____preempt_count_dec_and_test(void)
{
return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
}
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+ if (____preempt_count_dec_and_test())
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
+}
+
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u32 tmp;
+
+ tmp = raw_cpu_read_4(__preempt_count);
+ if (tmp == preempt_offset)
+ return true;
+
+ /* preempt count == 0 ? */
+ tmp &= ~PREEMPT_NEED_RESCHED;
+ if (tmp != preempt_offset)
+ return false;
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+#endif
}
#ifdef CONFIG_PREEMPT
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 33d3c88a7225..c00e27af2205 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -28,6 +28,19 @@ typedef struct {
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
+/*
+ * Because some traps use the IST stack, we must keep preemption
+ * disabled while calling do_trap(), but do_trap() may call
+ * force_sig_info() which will grab the signal spin_locks for the
+ * task, which in PREEMPT_RT_FULL are mutexes. By defining
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
+ * trap.
+ */
+#if defined(CONFIG_PREEMPT_RT_FULL)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
#ifndef CONFIG_COMPAT
typedef sigset_t compat_sigset_t;
#endif
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..28ffdf0c1add 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -92,7 +92,7 @@ static inline void native_write_cr8(unsigned long val)
#endif
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-static inline u32 __read_pkru(void)
+static inline u32 __read_pkru_ins(void)
{
u32 ecx = 0;
u32 edx, pkru;
@@ -107,7 +107,7 @@ static inline u32 __read_pkru(void)
return pkru;
}
-static inline void __write_pkru(u32 pkru)
+static inline void __write_pkru_ins(u32 pkru)
{
u32 ecx = 0, edx = 0;
@@ -118,8 +118,20 @@ static inline void __write_pkru(u32 pkru)
asm volatile(".byte 0x0f,0x01,0xef\n\t"
: : "a" (pkru), "c"(ecx), "d"(edx));
}
+
+static inline void __write_pkru(u32 pkru)
+{
+ /*
+ * WRPKRU is relatively expensive compared to RDPKRU.
+ * Avoid WRPKRU when it would not change the value.
+ */
+ if (pkru == __read_pkru_ins())
+ return;
+ __write_pkru_ins(pkru);
+}
+
#else
-static inline u32 __read_pkru(void)
+static inline u32 __read_pkru_ins(void)
{
return 0;
}
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 8ec97a62c245..7bc85841fc56 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -60,7 +60,7 @@
*/
static __always_inline void boot_init_stack_canary(void)
{
- u64 canary;
+ u64 uninitialized_var(canary);
u64 tsc;
#ifdef CONFIG_X86_64
@@ -71,8 +71,14 @@ static __always_inline void boot_init_stack_canary(void)
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
* on during the bootup the random pool has true entropy too.
+ * For preempt-rt we need to weaken the randomness a bit, as
+ * we can't call into the random generator from atomic context
+ * due to locking constraints. We just leave canary
+ * uninitialized and use the TSC based randomness on top of it.
*/
+#ifndef CONFIG_PREEMPT_RT_FULL
get_random_bytes(&canary, sizeof(canary));
+#endif
tsc = rdtsc();
canary += tsc + (tsc << 32UL);
canary &= CANARY_MASK;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0eccbcb8447..f346f24140be 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -56,17 +56,24 @@ struct task_struct;
struct thread_info {
unsigned long flags; /* low level flags */
u32 status; /* thread synchronous flags */
+ int preempt_lazy_count; /* 0 => lazy preemptable
+ <0 => BUG */
};
#define INIT_THREAD_INFO(tsk) \
{ \
.flags = 0, \
+ .preempt_lazy_count = 0, \
}
#else /* !__ASSEMBLY__ */
#include <asm/asm-offsets.h>
+#define GET_THREAD_INFO(reg) \
+ _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
+ _ASM_SUB $(THREAD_SIZE),reg ;
+
#endif
/*
@@ -88,9 +95,11 @@ struct thread_info {
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
+#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
+#define TIF_NEED_RESCHED_LAZY 18 /* lazy rescheduling necessary */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
@@ -117,9 +126,11 @@ struct thread_info {
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
+#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
@@ -157,6 +168,8 @@ struct thread_info {
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
#define STACK_WARN (THREAD_SIZE/8)
/*
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 069c04be1507..91a1422091ce 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
TP_STRUCT__entry(
__field(struct fpu *, fpu)
- __field(bool, initialized)
+ __field(bool, load_fpu)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
TP_fast_assign(
__entry->fpu = fpu;
- __entry->initialized = fpu->initialized;
+ __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD);
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
- __entry->initialized,
+ __entry->load_fpu,
__entry->xfeatures,
__entry->xcomp_bv
)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2953bbf05c08..d9c45d5609b7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1722,19 +1722,20 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
return false;
}
-static inline bool ioapic_irqd_mask(struct irq_data *data)
+static inline bool ioapic_prepare_move(struct irq_data *data)
{
/* If we are moving the irq we need to mask it */
if (unlikely(irqd_is_setaffinity_pending(data))) {
- mask_ioapic_irq(data);
+ if (!irqd_irq_masked(data))
+ mask_ioapic_irq(data);
return true;
}
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
{
- if (unlikely(masked)) {
+ if (unlikely(moveit)) {
/* Only migrate the irq if the ack has been received.
*
* On rare occasions the broadcast level triggered ack gets
@@ -1763,15 +1764,17 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
*/
if (!io_apic_level_ack_pending(data->chip_data))
irq_move_masked_irq(data);
- unmask_ioapic_irq(data);
+ /* If the irq is masked in the core, leave it */
+ if (!irqd_irq_masked(data))
+ unmask_ioapic_irq(data);
}
}
#else
-static inline bool ioapic_irqd_mask(struct irq_data *data)
+static inline bool ioapic_prepare_move(struct irq_data *data)
{
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
{
}
#endif
@@ -1780,11 +1783,11 @@ static void ioapic_ack_level(struct irq_data *irq_data)
{
struct irq_cfg *cfg = irqd_cfg(irq_data);
unsigned long v;
- bool masked;
+ bool moveit;
int i;
irq_complete_move(cfg);
- masked = ioapic_irqd_mask(irq_data);
+ moveit = ioapic_prepare_move(irq_data);
/*
* It appears there is an erratum which affects at least version 0x11
@@ -1839,7 +1842,7 @@ static void ioapic_ack_level(struct irq_data *irq_data)
eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
}
- ioapic_irqd_unmask(irq_data, masked);
+ ioapic_finish_move(irq_data, moveit);
}
static void ioapic_ir_ack_level(struct irq_data *irq_data)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d077d7..3f4d6b41965c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -39,6 +39,7 @@ static void __used common(void)
BLANK();
OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
+ OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();
@@ -92,6 +93,7 @@ static void __used common(void)
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+ DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
/* TLB state for the entry code */
OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 132a63dc5a76..abb9b52b6602 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -372,6 +372,8 @@ static bool pku_disabled;
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
+ struct pkru_state *pk;
+
/* check the boot processor, plus compile options for PKU: */
if (!cpu_feature_enabled(X86_FEATURE_PKU))
return;
@@ -382,6 +384,9 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
return;
cr4_set_bits(X86_CR4_PKE);
+ pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
+ if (pk)
+ pk->pkru = init_pkru_value;
/*
* Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
* cpuid bit to be set. We need to ensure that we
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 14bed6af8377..89fc723c818a 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1510,7 +1510,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
- if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) {
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 2e5003fef51a..8472c66c0a5a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -101,24 +101,21 @@ static void __kernel_fpu_begin(void)
kernel_fpu_disable();
- if (fpu->initialized) {
- /*
- * Ignore return value -- we don't care if reg state
- * is clobbered.
- */
- copy_fpregs_to_fpstate(fpu);
- } else {
- __cpu_invalidate_fpregs_state();
+ if (current->mm) {
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
+ copy_fpregs_to_fpstate(fpu);
+ }
}
+ __cpu_invalidate_fpregs_state();
}
static void __kernel_fpu_end(void)
{
- struct fpu *fpu = &current->thread.fpu;
-
- if (fpu->initialized)
- copy_kernel_to_fpregs(&fpu->state);
-
kernel_fpu_enable();
}
@@ -136,6 +133,18 @@ void kernel_fpu_end(void)
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
+void kernel_fpu_resched(void)
+{
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+
+ if (should_resched(PREEMPT_OFFSET)) {
+ kernel_fpu_end();
+ cond_resched();
+ kernel_fpu_begin();
+ }
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_resched);
+
/*
* Save the FPU state (mark it for reload if necessary):
*
@@ -145,15 +154,17 @@ void fpu__save(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu);
- preempt_disable();
+ fpregs_lock();
trace_x86_fpu_before_save(fpu);
- if (fpu->initialized) {
+
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
if (!copy_fpregs_to_fpstate(fpu)) {
copy_kernel_to_fpregs(&fpu->state);
}
}
+
trace_x86_fpu_after_save(fpu);
- preempt_enable();
+ fpregs_unlock();
}
EXPORT_SYMBOL_GPL(fpu__save);
@@ -186,11 +197,14 @@ void fpstate_init(union fpregs_state *state)
}
EXPORT_SYMBOL_GPL(fpstate_init);
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+int fpu__copy(struct task_struct *dst, struct task_struct *src)
{
+ struct fpu *dst_fpu = &dst->thread.fpu;
+ struct fpu *src_fpu = &src->thread.fpu;
+
dst_fpu->last_cpu = -1;
- if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
+ if (!static_cpu_has(X86_FEATURE_FPU))
return 0;
WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -202,16 +216,23 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
/*
- * Save current FPU registers directly into the child
- * FPU context, without any memory-to-memory copying.
+ * If the FPU registers are not current just memcpy() the state.
+ * Otherwise save current FPU registers directly into the child's FPU
+ * context, without any memory-to-memory copying.
*
* ( The function 'fails' in the FNSAVE case, which destroys
- * register contents so we have to copy them back. )
+ * register contents so we have to load them back. )
*/
- if (!copy_fpregs_to_fpstate(dst_fpu)) {
- memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
- copy_kernel_to_fpregs(&src_fpu->state);
- }
+ fpregs_lock();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
+
+ else if (!copy_fpregs_to_fpstate(dst_fpu))
+ copy_kernel_to_fpregs(&dst_fpu->state);
+
+ fpregs_unlock();
+
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
@@ -223,20 +244,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Activate the current task's in-memory FPU context,
* if it has not been used before:
*/
-void fpu__initialize(struct fpu *fpu)
+static void fpu__initialize(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu);
- if (!fpu->initialized) {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
-
- trace_x86_fpu_activate_state(fpu);
- /* Safe to do for the current task: */
- fpu->initialized = 1;
- }
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ fpstate_init(&fpu->state);
+ trace_x86_fpu_init_state(fpu);
}
-EXPORT_SYMBOL_GPL(fpu__initialize);
/*
* This function must be called before we read a task's fpstate.
@@ -248,32 +263,20 @@ EXPORT_SYMBOL_GPL(fpu__initialize);
*
* - or it's called for stopped tasks (ptrace), in which case the
* registers were already saved by the context-switch code when
- * the task scheduled out - we only have to initialize the registers
- * if they've never been initialized.
+ * the task scheduled out.
*
* If the task has used the FPU before then save it.
*/
void fpu__prepare_read(struct fpu *fpu)
{
- if (fpu == &current->thread.fpu) {
+ if (fpu == &current->thread.fpu)
fpu__save(fpu);
- } else {
- if (!fpu->initialized) {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
-
- trace_x86_fpu_activate_state(fpu);
- /* Safe to do for current and for stopped child tasks: */
- fpu->initialized = 1;
- }
- }
}
/*
* This function must be called before we write a task's fpstate.
*
- * If the task has used the FPU before then invalidate any cached FPU registers.
- * If the task has not used the FPU before then initialize its fpstate.
+ * Invalidate any cached FPU registers.
*
* After this function call, after registers in the fpstate are
* modified and the child task has woken up, the child task will
@@ -290,42 +293,9 @@ void fpu__prepare_write(struct fpu *fpu)
*/
WARN_ON_FPU(fpu == &current->thread.fpu);
- if (fpu->initialized) {
- /* Invalidate any cached state: */
- __fpu_invalidate_fpregs_state(fpu);
- } else {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
-
- trace_x86_fpu_activate_state(fpu);
- /* Safe to do for stopped child tasks: */
- fpu->initialized = 1;
- }
-}
-
-/*
- * 'fpu__restore()' is called to copy FPU registers from
- * the FPU fpstate to the live hw registers and to activate
- * access to the hardware registers, so that FPU instructions
- * can be used afterwards.
- *
- * Must be called with kernel preemption disabled (for example
- * with local interrupts disabled, as it is in the case of
- * do_device_not_available()).
- */
-void fpu__restore(struct fpu *fpu)
-{
- fpu__initialize(fpu);
-
- /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
- kernel_fpu_disable();
- trace_x86_fpu_before_restore(fpu);
- fpregs_activate(fpu);
- copy_kernel_to_fpregs(&fpu->state);
- trace_x86_fpu_after_restore(fpu);
- kernel_fpu_enable();
+ /* Invalidate any cached state: */
+ __fpu_invalidate_fpregs_state(fpu);
}
-EXPORT_SYMBOL_GPL(fpu__restore);
/*
* Drops current FPU state: deactivates the fpregs and
@@ -341,17 +311,13 @@ void fpu__drop(struct fpu *fpu)
preempt_disable();
if (fpu == &current->thread.fpu) {
- if (fpu->initialized) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- fpregs_deactivate(fpu);
- }
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ fpregs_deactivate(fpu);
}
- fpu->initialized = 0;
-
trace_x86_fpu_dropped(fpu);
preempt_enable();
@@ -363,6 +329,8 @@ void fpu__drop(struct fpu *fpu)
*/
static inline void copy_init_fpstate_to_fpregs(void)
{
+ fpregs_lock();
+
if (use_xsave())
copy_kernel_to_xregs(&init_fpstate.xsave, -1);
else if (static_cpu_has(X86_FEATURE_FXSR))
@@ -372,6 +340,9 @@ static inline void copy_init_fpstate_to_fpregs(void)
if (boot_cpu_has(X86_FEATURE_OSPKE))
copy_init_pkru_to_fpregs();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
}
/*
@@ -389,16 +360,51 @@ void fpu__clear(struct fpu *fpu)
/*
* Make sure fpstate is cleared and initialized.
*/
- if (static_cpu_has(X86_FEATURE_FPU)) {
- preempt_disable();
- fpu__initialize(fpu);
- user_fpu_begin();
+ fpu__initialize(fpu);
+ if (static_cpu_has(X86_FEATURE_FPU))
copy_init_fpstate_to_fpregs();
- preempt_enable();
- }
}
/*
+ * Load FPU context before returning to userspace.
+ */
+void switch_fpu_return(void)
+{
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return;
+
+ __fpregs_load_activate();
+}
+EXPORT_SYMBOL_GPL(switch_fpu_return);
+
+#ifdef CONFIG_X86_DEBUG_FPU
+/*
+ * If current FPU state according to its tracking (loaded FPU ctx on this CPU)
+ * is not valid then we must have TIF_NEED_FPU_LOAD set so the context is loaded on
+ * return to userland.
+ */
+void fpregs_assert_state_consistent(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ return;
+ WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
+}
+EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
+#endif
+
+void fpregs_mark_activate(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ fpregs_activate(fpu);
+ fpu->last_cpu = smp_processor_id();
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
+}
+EXPORT_SYMBOL_GPL(fpregs_mark_activate);
+
+/*
* x87 math exception handling:
*/
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6abd83572b01..20d8fa7124c7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -239,8 +239,6 @@ static void __init fpu__init_system_ctx_switch(void)
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
-
- WARN_ON_FPU(current->thread.fpu.initialized);
}
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index bc02f5144b95..d652b939ccfb 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -15,16 +15,12 @@
*/
int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
- struct fpu *target_fpu = &target->thread.fpu;
-
- return target_fpu->initialized ? regset->n : 0;
+ return regset->n;
}
int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
- struct fpu *target_fpu = &target->thread.fpu;
-
- if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
return regset->n;
else
return 0;
@@ -269,11 +265,10 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
memcpy(&to[i], &from[i], sizeof(to[0]));
}
-void convert_to_fxsr(struct task_struct *tsk,
+void convert_to_fxsr(struct fxregs_state *fxsave,
const struct user_i387_ia32_struct *env)
{
- struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
int i;
@@ -350,7 +345,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
if (!ret)
- convert_to_fxsr(target, &env);
+ convert_to_fxsr(&target->thread.fpu.state.fxsave, &env);
/*
* update the header bit in the xsave header, indicating the
@@ -371,16 +366,9 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
{
struct task_struct *tsk = current;
- struct fpu *fpu = &tsk->thread.fpu;
- int fpvalid;
-
- fpvalid = fpu->initialized;
- if (fpvalid)
- fpvalid = !fpregs_get(tsk, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- ufpu, NULL);
- return fpvalid;
+ return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct),
+ ufpu, NULL);
}
EXPORT_SYMBOL(dump_fpu);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index f6a1d299627c..a4715458e972 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -144,9 +144,10 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
* buf == buf_fx for 64-bit frames and 32-bit fsave frame.
* buf != buf_fx for 32-bit frames with fxstate.
*
- * If the fpu, extended register state is live, save the state directly
- * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
- * copy the thread's fpu state to the user frame starting at 'buf_fx'.
+ * Try to save it directly to the user frame with disabled page fault handler.
+ * If this fails then do the slow path where the FPU state is first saved to
+ * task's fpu->state and then copy it to the user frame pointed by the aligned
+ * pointer 'buf_fx'.
*
* If this is a 32-bit frame with fxstate, put a fsave header before
* the aligned state at 'buf_fx'.
@@ -156,10 +157,9 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
*/
int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
{
- struct fpu *fpu = &current->thread.fpu;
- struct xregs_state *xsave = &fpu->state.xsave;
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
+ int ret;
ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -172,28 +172,34 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_32 __user *) buf) ? -1 : 1;
- if (fpu->initialized || using_compacted_format()) {
- /* Save the live register state to the user directly. */
- if (copy_fpregs_to_sigframe(buf_fx))
- return -1;
- /* Update the thread's fxstate to save the fsave header. */
- if (ia32_fxstate)
- copy_fxregs_to_kernel(fpu);
- } else {
- /*
- * It is a *bug* if kernel uses compacted-format for xsave
- * area and we copy it out directly to a signal frame. It
- * should have been handled above by saving the registers
- * directly.
- */
- if (boot_cpu_has(X86_FEATURE_XSAVES)) {
- WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
- return -1;
- }
-
- fpstate_sanitize_xstate(fpu);
- if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
- return -1;
+retry:
+ fpregs_lock();
+ /*
+ * Load the FPU register if they are not valid for the current task.
+ * With a valid FPU state we can attempt to save the state directly to
+ * userland's stack frame which will likely succeed. If it does not,
+ * resolve the fault in the user memory and try again.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ __fpregs_load_activate();
+
+ pagefault_disable();
+ ret = copy_fpregs_to_sigframe(buf_fx);
+ pagefault_enable();
+ fpregs_unlock();
+
+ if (ret) {
+ int aligned_size;
+ int nr_pages;
+
+ aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size;
+ nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE);
+
+ ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages,
+ NULL, FOLL_WRITE);
+ if (ret == nr_pages)
+ goto retry;
+ return -EFAULT;
}
/* Save the fsave header for the 32-bit frames. */
@@ -207,11 +213,11 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
}
static inline void
-sanitize_restored_xstate(struct task_struct *tsk,
+sanitize_restored_xstate(union fpregs_state *state,
struct user_i387_ia32_struct *ia32_env,
u64 xfeatures, int fx_only)
{
- struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct xregs_state *xsave = &state->xsave;
struct xstate_header *header = &xsave->header;
if (use_xsave()) {
@@ -238,17 +244,18 @@ sanitize_restored_xstate(struct task_struct *tsk,
*/
xsave->i387.mxcsr &= mxcsr_feature_mask;
- convert_to_fxsr(tsk, ia32_env);
+ if (ia32_env)
+ convert_to_fxsr(&state->fxsave, ia32_env);
}
}
/*
* Restore the extended state if present. Otherwise, restore the FP/SSE state.
*/
-static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
+static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
{
if (use_xsave()) {
- if ((unsigned long)buf % 64 || fx_only) {
+ if (fx_only) {
u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
return copy_user_to_fxregs(buf);
@@ -266,12 +273,15 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_
static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
{
+ struct user_i387_ia32_struct *envp = NULL;
int ia32_fxstate = (buf != buf_fx);
struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
int state_size = fpu_kernel_xstate_size;
+ struct user_i387_ia32_struct env;
u64 xfeatures = 0;
int fx_only = 0;
+ int ret = 0;
ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -284,8 +294,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
if (!access_ok(buf, size))
return -EACCES;
- fpu__initialize(fpu);
-
if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
@@ -308,61 +316,92 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}
}
+ /*
+ * The current state of the FPU registers does not matter. By setting
+ * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
+ * is not modified on context switch and that the xstate is considered
+ * to loaded again on return to userland (overriding last_cpu avoids the
+ * optimisation).
+ */
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ __fpu_invalidate_fpregs_state(fpu);
+
+ if ((unsigned long)buf_fx % 64)
+ fx_only = 1;
+ /*
+ * For 32-bit frames with fxstate, copy the fxstate so it can be
+ * reconstructed later.
+ */
if (ia32_fxstate) {
- /*
- * For 32-bit frames with fxstate, copy the user state to the
- * thread's fpu state, reconstruct fxstate from the fsave
- * header. Validate and sanitize the copied state.
- */
- struct user_i387_ia32_struct env;
- int err = 0;
+ ret = __copy_from_user(&env, buf, sizeof(env));
+ if (ret)
+ goto err_out;
+ envp = &env;
+ } else {
+ fpregs_lock();
+ pagefault_disable();
+ ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only);
+ pagefault_enable();
+ if (!ret) {
+ fpregs_mark_activate();
+ fpregs_unlock();
+ return 0;
+ }
+ fpregs_unlock();
+ }
- /*
- * Drop the current fpu which clears fpu->initialized. This ensures
- * that any context-switch during the copy of the new state,
- * avoids the intermediate state from getting restored/saved.
- * Thus avoiding the new restored state from getting corrupted.
- * We will be ready to restore/save the state only after
- * fpu->initialized is again set.
- */
- fpu__drop(fpu);
+ if (use_xsave() && !fx_only) {
+ u64 init_bv = xfeatures_mask & ~xfeatures;
if (using_compacted_format()) {
- err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
+ ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
} else {
- err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+ ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
- if (!err && state_size > offsetof(struct xregs_state, header))
- err = validate_xstate_header(&fpu->state.xsave.header);
+ if (!ret && state_size > offsetof(struct xregs_state, header))
+ ret = validate_xstate_header(&fpu->state.xsave.header);
}
+ if (ret)
+ goto err_out;
- if (err || __copy_from_user(&env, buf, sizeof(env))) {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
- err = -1;
- } else {
- sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
+ sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+
+ fpregs_lock();
+ if (unlikely(init_bv))
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+ ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
+
+ } else if (use_fxsr()) {
+ ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto err_out;
}
- local_bh_disable();
- fpu->initialized = 1;
- fpu__restore(fpu);
- local_bh_enable();
+ sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
- return err;
- } else {
- /*
- * For 64-bit frames and 32-bit fsave frames, restore the user
- * state to the registers directly (with exceptions handled).
- */
- user_fpu_begin();
- if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) {
- fpu__clear(fpu);
- return -1;
+ fpregs_lock();
+ if (use_xsave()) {
+ u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
}
+
+ ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave);
+ } else {
+ ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
+ if (ret)
+ goto err_out;
+ fpregs_lock();
+ ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
}
+ if (!ret)
+ fpregs_mark_activate();
+ fpregs_unlock();
- return 0;
+err_out:
+ if (ret)
+ fpu__clear(fpu);
+ return ret;
}
static inline int xstate_sigframe_size(void)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9cc108456d0b..b646cc349c1e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -805,20 +805,18 @@ void fpu__resume_cpu(void)
}
/*
- * Given an xstate feature mask, calculate where in the xsave
+ * Given an xstate feature nr, calculate where in the xsave
* buffer the state is. Callers should ensure that the buffer
* is valid.
*/
-static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
+static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
- int feature_nr = fls64(xstate_feature_mask) - 1;
-
- if (!xfeature_enabled(feature_nr)) {
+ if (!xfeature_enabled(xfeature_nr)) {
WARN_ON_FPU(1);
return NULL;
}
- return (void *)xsave + xstate_comp_offsets[feature_nr];
+ return (void *)xsave + xstate_comp_offsets[xfeature_nr];
}
/*
* Given the xsave area and a state inside, this function returns the
@@ -832,13 +830,13 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask
*
* Inputs:
* xstate: the thread's storage area for all FPU data
- * xstate_feature: state which is defined in xsave.h (e.g.
- * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
+ * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ * XFEATURE_SSE, etc...)
* Output:
* address of the state in the xsave area, or NULL if the
* field is not present in the xsave buffer.
*/
-void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
+void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
/*
* Do we even *have* xsave state?
@@ -851,11 +849,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
* have not enabled. Remember that pcntxt_mask is
* what we write to the XCR0 register.
*/
- WARN_ONCE(!(xfeatures_mask & xstate_feature),
+ WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
"get of unsupported state");
/*
* This assumes the last 'xsave*' instruction to
- * have requested that 'xstate_feature' be saved.
+ * have requested that 'xfeature_nr' be saved.
* If it did not, we might be seeing and old value
* of the field in the buffer.
*
@@ -864,10 +862,10 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
* or because the "init optimization" caused it
* to not be saved.
*/
- if (!(xsave->header.xfeatures & xstate_feature))
+ if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
return NULL;
- return __raw_xsave_addr(xsave, xstate_feature);
+ return __raw_xsave_addr(xsave, xfeature_nr);
}
EXPORT_SYMBOL_GPL(get_xsave_addr);
@@ -882,25 +880,23 @@ EXPORT_SYMBOL_GPL(get_xsave_addr);
* Note that this only works on the current task.
*
* Inputs:
- * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- * XFEATURE_MASK_SSE, etc...)
+ * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ * XFEATURE_SSE, etc...)
* Output:
* address of the state in the xsave area or NULL if the state
* is not present or is in its 'init state'.
*/
-const void *get_xsave_field_ptr(int xsave_state)
+const void *get_xsave_field_ptr(int xfeature_nr)
{
struct fpu *fpu = &current->thread.fpu;
- if (!fpu->initialized)
- return NULL;
/*
* fpu__save() takes the CPU's xstate registers
* and saves them off to the 'fpu memory buffer.
*/
fpu__save(fpu);
- return get_xsave_addr(&fpu->state.xsave, xsave_state);
+ return get_xsave_addr(&fpu->state.xsave, xfeature_nr);
}
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -1016,7 +1012,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
* Copy only in-use xstates:
*/
if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, 1 << i);
+ void *src = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1102,7 +1098,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
* Copy only in-use xstates:
*/
if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, 1 << i);
+ void *src = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1159,7 +1155,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
u64 mask = ((u64)1 << i);
if (hdr.xfeatures & mask) {
- void *dst = __raw_xsave_addr(xsave, 1 << i);
+ void *dst = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1213,7 +1209,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
u64 mask = ((u64)1 << i);
if (hdr.xfeatures & mask) {
- void *dst = __raw_xsave_addr(xsave, 1 << i);
+ void *dst = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index e47cd9390ab4..2a2e87717bad 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -17,6 +17,11 @@ static enum efi_secureboot_mode get_sb_mode(void)
size = sizeof(secboot);
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+ pr_info("ima: secureboot mode unknown, no efi\n");
+ return efi_secureboot_mode_unknown;
+ }
+
/* Get variable contents into buffer */
status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid,
NULL, &size, &secboot);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 95600a99ae93..9192d76085ba 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -130,6 +130,7 @@ void irq_ctx_init(int cpu)
cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
}
+#ifndef CONFIG_PREEMPT_RT_FULL
void do_softirq_own_stack(void)
{
struct irq_stack *irqstk;
@@ -146,6 +147,7 @@ void do_softirq_own_stack(void)
call_on_stack(__do_softirq, isp);
}
+#endif
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9db049f06f2f..8145f96aae34 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -101,7 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
dst->thread.vm86 = NULL;
#endif
- return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
+ return fpu__copy(dst, src);
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 70933193878c..15a80ad2bfd2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/io.h>
#include <linux/kdebug.h>
#include <linux/syscalls.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
#include <asm/ldt.h>
@@ -202,6 +203,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
}
EXPORT_SYMBOL_GPL(start_thread);
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ int i;
+
+ /*
+ * Clear @prev's kmap_atomic mappings
+ */
+ for (i = 0; i < prev_p->kmap_idx; i++) {
+ int idx = i + KM_TYPE_NR * smp_processor_id();
+ pte_t *ptep = kmap_pte - idx;
+
+ kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ }
+ /*
+ * Restore @next_p's kmap_atomic mappings
+ */
+ for (i = 0; i < next_p->kmap_idx; i++) {
+ int idx = i + KM_TYPE_NR * smp_processor_id();
+
+ if (!pte_none(next_p->kmap_pte[i]))
+ set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
+ }
+}
+#else
+static inline void
+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
+#endif
+
/*
* switch_to(x,y) should switch tasks from x to y.
@@ -241,7 +271,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_fpu, cpu);
/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -271,12 +302,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
switch_to_extra(prev_p, next_p);
+ switch_kmaps(prev_p, next_p);
+
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
- * the GDT and LDT are properly updated, and must be
- * done before fpu__restore(), so the TS bit is up
- * to date.
+ * the GDT and LDT are properly updated.
*/
arch_end_context_switch(next_p);
@@ -297,10 +328,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
- switch_fpu_finish(next_fpu, cpu);
-
this_cpu_write(current_task, next_p);
+ switch_fpu_finish(next_fpu);
+
/* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 026a43be9bd1..08ef543fa2e1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -528,7 +528,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_fpu, cpu);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -546,9 +547,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Leave lazy mode, flushing any hypercalls made here. This
* must be done after loading TLS entries in the GDT but before
- * loading segments that might reference them, and and it must
- * be done before fpu__restore(), so the TS bit is up to
- * date.
+ * loading segments that might reference them.
*/
arch_end_context_switch(next_p);
@@ -576,14 +575,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
x86_fsgsbase_load(prev, next);
- switch_fpu_finish(next_fpu, cpu);
-
/*
* Switch the PDA and FPU contexts.
*/
this_cpu_write(current_task, next_p);
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
+ switch_fpu_finish(next_fpu);
+
/* Reload sp0. */
update_task_stack(next_p);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 08dfd4c1a4f9..6f45f795690f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -246,7 +246,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp);
- struct fpu *fpu = &current->thread.fpu;
+ int ret;
/* redzone */
if (IS_ENABLED(CONFIG_X86_64))
@@ -265,11 +265,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
sp = (unsigned long) ka->sa.sa_restorer;
}
- if (fpu->initialized) {
- sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
- &buf_fx, &math_size);
- *fpstate = (void __user *)sp;
- }
+ sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
+ &buf_fx, &math_size);
+ *fpstate = (void __user *)sp;
sp = align_sigframe(sp - frame_size);
@@ -281,8 +279,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
return (void __user *)-1L;
/* save i387 and extended state */
- if (fpu->initialized &&
- copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
+ ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size);
+ if (ret < 0)
return (void __user *)-1L;
return (void __user *)sp;
@@ -763,8 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Ensure the signal handler starts with the new fpu state.
*/
- if (fpu->initialized)
- fpu__clear(fpu);
+ fpu__clear(fpu);
}
signal_setup_done(failed, ksig, stepping);
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9b7c4ca8f0a7..626853b2ac34 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -455,7 +455,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
* which is all zeros which indicates MPX was not
* responsible for the exception.
*/
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
goto exit_trap;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6939eba2001a..4249e65adadf 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2299,7 +2299,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS_PINNED_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
apic->lapic_timer.timer_advance_ns = 1000;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index df6e325b288b..32fe924346df 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6635,7 +6635,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
*/
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
- vcpu->arch.pkru = __read_pkru();
+ vcpu->arch.pkru = __read_pkru_ins();
if (vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vmx->host_pkru);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0bbb21a49082..75cebef6c5aa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3691,15 +3691,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
- int index = fls64(feature) - 1;
- void *src = get_xsave_addr(xsave, feature);
+ u64 xfeature_mask = valid & -valid;
+ int xfeature_nr = fls64(xfeature_mask) - 1;
+ void *src = get_xsave_addr(xsave, xfeature_nr);
if (src) {
u32 size, offset, ecx, edx;
- cpuid_count(XSTATE_CPUID, index,
+ cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
- if (feature == XFEATURE_MASK_PKRU)
+ if (xfeature_nr == XFEATURE_PKRU)
memcpy(dest + offset, &vcpu->arch.pkru,
sizeof(vcpu->arch.pkru));
else
@@ -3707,7 +3707,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
}
- valid -= feature;
+ valid -= xfeature_mask;
}
}
@@ -3734,22 +3734,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
- int index = fls64(feature) - 1;
- void *dest = get_xsave_addr(xsave, feature);
+ u64 xfeature_mask = valid & -valid;
+ int xfeature_nr = fls64(xfeature_mask) - 1;
+ void *dest = get_xsave_addr(xsave, xfeature_nr);
if (dest) {
u32 size, offset, ecx, edx;
- cpuid_count(XSTATE_CPUID, index,
+ cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
- if (feature == XFEATURE_MASK_PKRU)
+ if (xfeature_nr == XFEATURE_PKRU)
memcpy(&vcpu->arch.pkru, src + offset,
sizeof(vcpu->arch.pkru));
else
memcpy(dest, src + offset, size);
}
- valid -= feature;
+ valid -= xfeature_mask;
}
}
@@ -6969,6 +6969,14 @@ int kvm_arch_init(void *opaque)
goto out;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+#endif
+
r = -ENOMEM;
x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
__alignof__(struct fpu), SLAB_ACCOUNT,
@@ -7902,6 +7910,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
wait_lapic_expire(vcpu);
guest_enter_irqoff();
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -8160,22 +8172,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- preempt_disable();
+ fpregs_lock();
+
copy_fpregs_to_fpstate(&current->thread.fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
- preempt_enable();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+
trace_kvm_fpu(1);
}
/* When vcpu_run ends, restore user space FPU context. */
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- preempt_disable();
+ fpregs_lock();
+
copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
copy_kernel_to_fpregs(&current->thread.fpu.state);
- preempt_enable();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -8873,11 +8893,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (init_event)
kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_MASK_BNDREGS);
+ XFEATURE_BNDREGS);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_MASK_BNDCSR);
+ XFEATURE_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
if (init_event)
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 9e2ba7e667f6..a873da6b46d6 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -113,9 +113,6 @@ void math_emulate(struct math_emu_info *info)
unsigned long code_base = 0;
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
struct desc_struct code_descriptor;
- struct fpu *fpu = &current->thread.fpu;
-
- fpu__initialize(fpu);
#ifdef RE_ENTRANT_CHECKING
if (emulating) {
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 0d4bdcb84da5..97288bcfd923 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap);
*/
void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
+ pte_t pte = mk_pte(page, prot);
unsigned long vaddr;
int idx, type;
- preempt_disable();
+ preempt_disable_nort();
pagefault_disable();
if (!PageHighMem(page))
@@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
- set_pte(kmap_pte-idx, mk_pte(page, prot));
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_pte(kmap_pte-idx, pte);
arch_flush_lazy_mmu_mode();
return (void *)vaddr;
@@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr)
* is a bad idea also, in case the page changes cacheability
* attributes or becomes a protected page in a hypervisor.
*/
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = __pte(0);
+#endif
kpte_clear_flush(kmap_pte-idx, vaddr);
kmap_atomic_idx_pop();
arch_flush_lazy_mmu_mode();
@@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr)
#endif
pagefault_enable();
- preempt_enable();
+ preempt_enable_nort();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index b3294d36769d..c0ec8d430c02 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -59,6 +59,7 @@ EXPORT_SYMBOL_GPL(iomap_free);
void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
{
+ pte_t pte = pfn_pte(pfn, prot);
unsigned long vaddr;
int idx, type;
@@ -68,7 +69,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
+ WARN_ON(!pte_none(*(kmap_pte - idx)));
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_pte(kmap_pte - idx, pte);
arch_flush_lazy_mmu_mode();
return (void *)vaddr;
@@ -119,6 +125,9 @@ iounmap_atomic(void __iomem *kvaddr)
* is a bad idea also, in case the page changes cacheability
* attributes or becomes a protected page in a hypervisor.
*/
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = __pte(0);
+#endif
kpte_clear_flush(kmap_pte-idx, vaddr);
kmap_atomic_idx_pop();
}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index ea17ff6c8588..b82d12e3c90d 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
goto err_out;
}
/* get bndregs field from current task's xsave area */
- bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
+ bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS);
if (!bndregs) {
err = -EINVAL;
goto err_out;
@@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void)
* The bounds directory pointer is stored in a register
* only accessible if we first do an xsave.
*/
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
return MPX_INVALID_BOUNDS_DIR;
@@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void)
const struct mpx_bndcsr *bndcsr;
struct mm_struct *mm = current->mm;
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
return -EINVAL;
/*
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 047a77f6a10c..1dcfc91c8f0c 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,6 +18,7 @@
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/mmu_context.h> /* vma_pkey() */
+#include <asm/fpu/internal.h> /* init_fpstate */
int __execute_only_pkey(struct mm_struct *mm)
{
@@ -39,17 +40,12 @@ int __execute_only_pkey(struct mm_struct *mm)
* dance to set PKRU if we do not need to. Check it
* first and assume that if the execute-only pkey is
* write-disabled that we do not have to set it
- * ourselves. We need preempt off so that nobody
- * can make fpregs inactive.
+ * ourselves.
*/
- preempt_disable();
if (!need_to_set_mm_pkey &&
- current->thread.fpu.initialized &&
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
- preempt_enable();
return execute_only_pkey;
}
- preempt_enable();
/*
* Set up PKRU so that it denies access for everything
@@ -131,7 +127,6 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
-static
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
@@ -148,13 +143,6 @@ void copy_init_pkru_to_fpregs(void)
{
u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value);
/*
- * Any write to PKRU takes it out of the XSAVE 'init
- * state' which increases context switch cost. Avoid
- * writing 0 when PKRU was already 0.
- */
- if (!init_pkru_value_snapshot && !read_pkru())
- return;
- /*
* Override the PKRU state that came from 'init_fpstate'
* with the baseline from the process.
*/
@@ -174,6 +162,7 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
static ssize_t init_pkru_write_file(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
+ struct pkru_state *pk;
char buf[32];
ssize_t len;
u32 new_init_pkru;
@@ -196,6 +185,10 @@ static ssize_t init_pkru_write_file(struct file *file,
return -EINVAL;
WRITE_ONCE(init_pkru_value, new_init_pkru);
+ pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
+ if (!pk)
+ return -EINVAL;
+ pk->pkru = new_init_pkru;
return count;
}
diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h
index bb1fe6c1816e..8a22f1e7b6c9 100644
--- a/arch/xtensa/include/asm/spinlock_types.h
+++ b/arch/xtensa/include/asm/spinlock_types.h
@@ -2,10 +2,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int slock;
} arch_spinlock_t;