summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruce Ashfield <bruce.ashfield@gmail.com>2021-03-19 13:19:14 -0400
committerBruce Ashfield <bruce.ashfield@gmail.com>2021-03-19 13:19:14 -0400
commit537f0c12d3bae161ae874d615de20708d6ebfb73 (patch)
tree418331f9fea8875ac9e5f66b9555222ddd12ca71
parent3cfc9ae99969d8a6b3a3dc87b41196fa96e87dfa (diff)
downloadyocto-kernel-cache-537f0c12d3bae161ae874d615de20708d6ebfb73.tar.gz
yocto-kernel-cache-537f0c12d3bae161ae874d615de20708d6ebfb73.tar.bz2
yocto-kernel-cache-537f0c12d3bae161ae874d615de20708d6ebfb73.zip
highmem: Don't disable preemption on RT in kmap_atomic()
1/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: highmem: Don't disable preemption on RT in kmap_atomic() Date: Fri, 30 Oct 2020 13:59:06 +0100 Disabling preemption makes it impossible to acquire sleeping locks within kmap_atomic() section. For PREEMPT_RT it is sufficient to disable migration. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 2/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: timers: Move clearing of base::timer_running under base::lock Date: Sun, 6 Dec 2020 22:40:07 +0100 syzbot reported KCSAN data races vs. timer_base::timer_running being set to NULL without holding base::lock in expire_timers(). This looks innocent and most reads are clearly not problematic but for a non-RT kernel it's completely irrelevant whether the store happens before or after taking the lock. For an RT kernel moving the store under the lock requires an extra unlock/lock pair in the case that there is a waiter for the timer. But that's not the end of the world and definitely not worth the trouble of adding boatloads of comments and annotations to the code. Famous last words... Reported-by: syzbot+aa7c2385d46c5eba0b89@syzkaller.appspotmail.com Reported-by: syzbot+abea4558531bae1ba9fe@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: stable-rt@vger.kernel.org ] 3/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: kthread: Move prio/affinite change into the newly created thread Date: Mon, 9 Nov 2020 21:30:41 +0100 With enabled threaded interrupts the nouveau driver reported the following: | Chain exists of: | &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem | | Possible unsafe locking scenario: | | CPU0 CPU1 | ---- ---- | lock(&cpuset_rwsem); | lock(&device->mutex); | lock(&cpuset_rwsem); | lock(&mm->mmap_lock#2); The device->mutex is nvkm_device::mutex. Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing to do. Move the priority reset to the start of the newly created thread. Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()") Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de ] 4/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: genirq: Move prio assignment into the newly created thread Date: Mon, 9 Nov 2020 23:32:39 +0100 With enabled threaded interrupts the nouveau driver reported the following: | Chain exists of: | &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem | | Possible unsafe locking scenario: | | CPU0 CPU1 | ---- ---- | lock(&cpuset_rwsem); | lock(&device->mutex); | lock(&cpuset_rwsem); | lock(&mm->mmap_lock#2); The device->mutex is nvkm_device::mutex. Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing to do. Move the priority assignment to the start of the newly created thread. Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()") Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: Patch description] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de ] 5/191 [ Author: Valentin Schneider Email: valentin.schneider@arm.com Subject: notifier: Make atomic_notifiers use raw_spinlock Date: Sun, 22 Nov 2020 20:19:04 +0000 Booting a recent PREEMPT_RT kernel (v5.10-rc3-rt7-rebase) on my arm64 Juno leads to the idle task blocking on an RT sleeping spinlock down some notifier path: [ 1.809101] BUG: scheduling while atomic: swapper/5/0/0x00000002 [ 1.809116] Modules linked in: [ 1.809123] Preemption disabled at: [ 1.809125] secondary_start_kernel (arch/arm64/kernel/smp.c:227) [ 1.809146] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.10.0-rc3-rt7 #168 [ 1.809153] Hardware name: ARM Juno development board (r0) (DT) [ 1.809158] Call trace: [ 1.809160] dump_backtrace (arch/arm64/kernel/stacktrace.c:100 (discriminator 1)) [ 1.809170] show_stack (arch/arm64/kernel/stacktrace.c:198) [ 1.809178] dump_stack (lib/dump_stack.c:122) [ 1.809188] __schedule_bug (kernel/sched/core.c:4886) [ 1.809197] __schedule (./arch/arm64/include/asm/preempt.h:18 kernel/sched/core.c:4913 kernel/sched/core.c:5040) [ 1.809204] preempt_schedule_lock (kernel/sched/core.c:5365 (discriminator 1)) [ 1.809210] rt_spin_lock_slowlock_locked (kernel/locking/rtmutex.c:1072) [ 1.809217] rt_spin_lock_slowlock (kernel/locking/rtmutex.c:1110) [ 1.809224] rt_spin_lock (./include/linux/rcupdate.h:647 kernel/locking/rtmutex.c:1139) [ 1.809231] atomic_notifier_call_chain_robust (kernel/notifier.c:71 kernel/notifier.c:118 kernel/notifier.c:186) [ 1.809240] cpu_pm_enter (kernel/cpu_pm.c:39 kernel/cpu_pm.c:93) [ 1.809249] psci_enter_idle_state (drivers/cpuidle/cpuidle-psci.c:52 drivers/cpuidle/cpuidle-psci.c:129) [ 1.809258] cpuidle_enter_state (drivers/cpuidle/cpuidle.c:238) [ 1.809267] cpuidle_enter (drivers/cpuidle/cpuidle.c:353) [ 1.809275] do_idle (kernel/sched/idle.c:132 kernel/sched/idle.c:213 kernel/sched/idle.c:273) [ 1.809282] cpu_startup_entry (kernel/sched/idle.c:368 (discriminator 1)) [ 1.809288] secondary_start_kernel (arch/arm64/kernel/smp.c:273) Two points worth noting: 1) That this is conceptually the same issue as pointed out in: 313c8c16ee62 ("PM / CPU: replace raw_notifier with atomic_notifier") 2) Only the _robust() variant of atomic_notifier callchains suffer from this AFAICT only the cpu_pm_notifier_chain really needs to be changed, but singling it out would mean introducing a new (truly) non-blocking API. At the same time, callers that are fine with any blocking within the call chain should use blocking notifiers, so patching up all atomic_notifier's doesn't seem *too* crazy to me. Fixes: 70d932985757 ("notifier: Fix broken error handling pattern") Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com> Link: https://lkml.kernel.org/r/20201122201904.30940-1-valentin.schneider@arm.com Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 6/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: powerpc/mm: Move the linear_mapping_mutex to the ifdef where it is used Date: Fri, 19 Feb 2021 17:51:07 +0100 The mutex linear_mapping_mutex is defined at the of the file while its only two user are within the CONFIG_MEMORY_HOTPLUG block. A compile without CONFIG_MEMORY_HOTPLUG set fails on PREEMPT_RT because its mutex implementation is smart enough to realize that it is unused. Move the definition of linear_mapping_mutex to ifdef block where it is used. Fixes: 1f73ad3e8d755 ("powerpc/mm: print warning in arch_remove_linear_mapping()") Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 7/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: limit second loop of syslog_print_all Date: Wed, 17 Feb 2021 16:15:31 +0100 The second loop of syslog_print_all() subtracts lengths that were added in the first loop. With commit b031a684bfd0 ("printk: remove logbuf_lock writer-protection of ringbuffer") it is possible that records are (over)written during syslog_print_all(). This allows the possibility of the second loop subtracting lengths that were never added in the first loop. This situation can result in syslog_print_all() filling the buffer starting from a later record, even though there may have been room to fit the earlier record(s) as well. Fixes: b031a684bfd0 ("printk: remove logbuf_lock writer-protection of ringbuffer") Signed-off-by: John Ogness <john.ogness@linutronix.de> Reviewed-by: Petr Mladek <pmladek@suse.com> ] 8/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: kmsg_dump: remove unused fields Date: Mon, 21 Dec 2020 11:19:39 +0106 struct kmsg_dumper still contains some fields that were used to iterate the old ringbuffer. They are no longer used. Remove them and update the struct documentation. Signed-off-by: John Ogness <john.ogness@linutronix.de> Reviewed-by: Petr Mladek <pmladek@suse.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 9/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: refactor kmsg_dump_get_buffer() Date: Mon, 30 Nov 2020 01:41:56 +0106 kmsg_dump_get_buffer() requires nearly the same logic as syslog_print_all(), but uses different variable names and does not make use of the ringbuffer loop macros. Modify kmsg_dump_get_buffer() so that the implementation is as similar to syslog_print_all() as possible. A follow-up commit will move this common logic into a separate helper function. Signed-off-by: John Ogness <john.ogness@linutronix.de> Reviewed-by: Petr Mladek <pmladek@suse.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 10/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: consolidate kmsg_dump_get_buffer/syslog_print_all code Date: Wed, 13 Jan 2021 11:29:53 +0106 The logic for finding records to fit into a buffer is the same for kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper function find_first_fitting_seq() to handle this logic. Signed-off-by: John Ogness <john.ogness@linutronix.de> ] 11/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: introduce CONSOLE_LOG_MAX for improved multi-line support Date: Thu, 10 Dec 2020 12:48:01 +0106 Instead of using "LOG_LINE_MAX + PREFIX_MAX" for temporary buffer sizes, introduce CONSOLE_LOG_MAX. This represents the maximum size that is allowed to be printed to the console for a single record. Rather than setting CONSOLE_LOG_MAX to "LOG_LINE_MAX + PREFIX_MAX" (1024), increase it to 4096. With a larger buffer size, multi-line records that are nearly LOG_LINE_MAX in length will have a better chance of being fully printed. (When formatting a record for the console, each line of a multi-line record is prepended with a copy of the prefix.) Signed-off-by: John Ogness <john.ogness@linutronix.de> ] 12/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: use seqcount_latch for clear_seq Date: Mon, 30 Nov 2020 01:41:58 +0106 kmsg_dump_rewind_nolock() locklessly reads @clear_seq. However, this is not done atomically. Since @clear_seq is 64-bit, this cannot be an atomic operation for all platforms. Therefore, use a seqcount_latch to allow readers to always read a consistent value. Signed-off-by: John Ogness <john.ogness@linutronix.de> Reviewed-by: Petr Mladek <pmladek@suse.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 13/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: use atomic64_t for devkmsg_user.seq Date: Thu, 10 Dec 2020 15:33:40 +0106 @user->seq is indirectly protected by @logbuf_lock. Once @logbuf_lock is removed, @user->seq will be no longer safe from an atomicity point of view. In preparation for the removal of @logbuf_lock, change it to atomic64_t to provide this safety. Signed-off-by: John Ogness <john.ogness@linutronix.de> ] 14/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: add syslog_lock Date: Thu, 10 Dec 2020 16:58:02 +0106 The global variables @syslog_seq, @syslog_partial, @syslog_time and write access to @clear_seq are protected by @logbuf_lock. Once @logbuf_lock is removed, these variables will need their own synchronization method. Introduce @syslog_lock for this purpose. @syslog_lock is a raw_spin_lock for now. This simplifies the transition to removing @logbuf_lock. Once @logbuf_lock and the safe buffers are removed, @syslog_lock can change to spin_lock. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 15/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: introduce a kmsg_dump iterator Date: Fri, 18 Dec 2020 11:40:08 +0000 Rather than store the iterator information into the registered kmsg_dump structure, create a separate iterator structure. The kmsg_dump_iter structure can reside on the stack of the caller, thus allowing lockless use of the kmsg_dump functions. This is in preparation for removal of @logbuf_lock. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 16/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: um: synchronize kmsg_dumper Date: Mon, 21 Dec 2020 11:10:03 +0106 The kmsg_dumper can be called from any context and CPU, possibly from multiple CPUs simultaneously. Since a static buffer is used to retrieve the kernel logs, this buffer must be protected against simultaneous dumping. Cc: Richard Weinberger <richard@nod.at> Signed-off-by: John Ogness <john.ogness@linutronix.de> Reviewed-by: Petr Mladek <pmladek@suse.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 17/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: remove logbuf_lock Date: Tue, 26 Jan 2021 17:43:19 +0106 Since the ringbuffer is lockless, there is no need for it to be protected by @logbuf_lock. Remove @logbuf_lock. This means that printk_nmi_direct and printk_safe_flush_on_panic() no longer need to acquire any lock to run. @console_seq, @exclusive_console_stop_seq, @console_dropped are protected by @console_lock. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 18/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: kmsg_dump: remove _nolock() variants Date: Mon, 21 Dec 2020 10:27:58 +0106 kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is no need for _nolock() variants. Remove these functions and switch all callers of the _nolock() variants. The functions without _nolock() were chosen because they are already exported to kernel modules. Signed-off-by: John Ogness <john.ogness@linutronix.de> ] 19/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: kmsg_dump: use kmsg_dump_rewind Date: Wed, 17 Feb 2021 18:23:16 +0100 kmsg_dump() is open coding the kmsg_dump_rewind(). Call kmsg_dump_rewind() instead. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 20/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: console: remove unnecessary safe buffer usage Date: Wed, 17 Feb 2021 18:28:05 +0100 Upon registering a console, safe buffers are activated when setting up the sequence number to replay the log. However, these are already protected by @console_sem and @syslog_lock. Remove the unnecessary safe buffer usage. Signed-off-by: John Ogness <john.ogness@linutronix.de> ] 21/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: track/limit recursion Date: Fri, 11 Dec 2020 00:55:25 +0106 Limit printk() recursion to 1 level. This is enough to print a stacktrace for the printk call, should a WARN or BUG occur. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 22/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: remove safe buffers Date: Mon, 30 Nov 2020 01:42:00 +0106 With @logbuf_lock removed, the high level printk functions for storing messages are lockless. Messages can be stored from any context, so there is no need for the NMI and safe buffers anymore. Remove the NMI and safe buffers. In NMI or safe contexts, store the message immediately but still use irq_work to defer the console printing. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 23/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: convert @syslog_lock to spin_lock Date: Thu, 18 Feb 2021 17:37:41 +0100 Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 24/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: console: add write_atomic interface Date: Mon, 30 Nov 2020 01:42:01 +0106 Add a write_atomic() callback to the console. This is an optional function for console drivers. The function must be atomic (including NMI safe) for writing to the console. Console drivers must still implement the write() callback. The write_atomic() callback will only be used in special situations, such as when the kernel panics. Creating an NMI safe write_atomic() that must synchronize with write() requires a careful implementation of the console driver. To aid with the implementation, a set of console_atomic_*() functions are provided: void console_atomic_lock(unsigned int *flags); void console_atomic_unlock(unsigned int flags); These functions synchronize using a processor-reentrant spinlock (called a cpulock). Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 25/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: serial: 8250: implement write_atomic Date: Mon, 30 Nov 2020 01:42:02 +0106 Implement a non-sleeping NMI-safe write_atomic() console function in order to support emergency console printing. Since interrupts need to be disabled during transmit, all usage of the IER register is wrapped with access functions that use the console_atomic_lock() function to synchronize register access while tracking the state of the interrupts. This is necessary because write_atomic() can be called from an NMI context that has preempted write_atomic(). Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 26/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: relocate printk_delay() and vprintk_default() Date: Mon, 30 Nov 2020 01:42:03 +0106 Move printk_delay() and vprintk_default() "as is" further up so that they can be used by new functions in an upcoming commit. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 27/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: combine boot_delay_msec() into printk_delay() Date: Mon, 30 Nov 2020 01:42:04 +0106 boot_delay_msec() is always called immediately before printk_delay() so just combine the two. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 28/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: change @console_seq to atomic64_t Date: Mon, 30 Nov 2020 01:42:05 +0106 In preparation for atomic printing, change @console_seq to atomic so that it can be accessed without requiring @console_sem. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 29/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: introduce kernel sync mode Date: Mon, 30 Nov 2020 01:42:06 +0106 When the kernel performs an OOPS, enter into "sync mode": - only atomic consoles (write_atomic() callback) will print - printing occurs within vprintk_store() instead of console_unlock() CONSOLE_LOG_MAX is moved to printk.h to support the per-console buffer used in sync mode. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 30/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: move console printing to kthreads Date: Mon, 30 Nov 2020 01:42:07 +0106 Create a kthread for each console to perform console printing. Now all console printing is fully asynchronous except for the boot console and when the kernel enters sync mode (and there are atomic consoles available). The console_lock() and console_unlock() functions now only do what their name says... locking and unlocking of the console. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 31/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: remove deferred printing Date: Mon, 30 Nov 2020 01:42:08 +0106 Since printing occurs either atomically or from the printing kthread, there is no need for any deferring or tracking possible recursion paths. Remove all printk context tracking. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 32/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: add console handover Date: Mon, 30 Nov 2020 01:42:09 +0106 If earlyprintk is used, a boot console will print directly to the console immediately. The boot console will unregister itself as soon as a non-boot console registers. However, the non-boot console does not begin printing until its kthread has started. Since this happens much later, there is a long pause in the console output. If the ringbuffer is small, messages could even be dropped during the pause. Add a new CON_HANDOVER console flag to be used internally by printk in order to track which non-boot console took over from a boot console. If handover consoles have implemented write_atomic(), they are allowed to print directly to the console until their kthread can take over. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 33/191 [ Author: John Ogness Email: john.ogness@linutronix.de Subject: printk: add pr_flush() Date: Mon, 30 Nov 2020 01:42:10 +0106 Provide a function to allow waiting for console printers to catch up to the latest logged message. Use pr_flush() to give console printers a chance to finish in critical situations if no atomic console is available. For now pr_flush() is only used in the most common error paths: panic(), print_oops_end_marker(), report_bug(), kmsg_dump(). Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 34/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: kcov: Remove kcov include from sched.h and move it to its users. Date: Thu, 18 Feb 2021 18:31:24 +0100 The recent addition of in_serving_softirq() to kconv.h results in compile failure on PREEMPT_RT because it requires task_struct::softirq_disable_cnt. This is not available if kconv.h is included from sched.h. It is not needed to include kconv.h from sched.h. All but the net/ user already include the kconv header file. Move the include of the kconv.h header from sched.h it its users. Additionally include sched.h from kconv.h to ensure that everything task_struct related is available. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Johannes Berg <johannes@sipsolutions.net> Acked-by: Andrey Konovalov <andreyknvl@google.com> Link: https://lkml.kernel.org/r/20210218173124.iy5iyqv3a4oia4vv@linutronix.de Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 35/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: cgroup: use irqsave in cgroup_rstat_flush_locked() Date: Tue, 3 Jul 2018 18:19:48 +0200 All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock either with spin_lock_irq() or spin_lock_irqsave(). cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated() in IRQ context and therefore requires _irqsave() locking suffix in cgroup_rstat_flush_locked(). Since there is no difference between spin_lock_t and raw_spin_lock_t on !RT lockdep does not complain here. On RT lockdep complains because the interrupts were not disabled here and a deadlock is possible. Acquire the raw_spin_lock_t with disabled interrupts. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 36/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: workingset: replace IRQ-off check with a lockdep assert. Date: Mon, 11 Feb 2019 10:40:46 +0100 Commit 68d48e6a2df57 ("mm: workingset: add vmstat counter for shadow nodes") introduced an IRQ-off check to ensure that a lock is held which also disabled interrupts. This does not work the same way on -RT because none of the locks, that are held, disable interrupts. Replace this check with a lockdep assert which ensures that the lock is held. Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 37/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: shmem: Use raw_spinlock_t for ->stat_lock Date: Fri, 14 Aug 2020 18:53:34 +0200 Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is per-CPU. Access here is serialized by disabling preemption. If the pool is empty, it gets reloaded from `->next_ino'. Access here is serialized by ->stat_lock which is a spinlock_t and can not be acquired with disabled preemption. One way around it would make per-CPU ino_batch struct containing the inode number a local_lock_t. Another sollution is to promote ->stat_lock to a raw_spinlock_t. The critical sections are short. The mpol_put() should be moved outside of the critical section to avoid invoking the destrutor with disabled preemption. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 38/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: net: Move lockdep where it belongs Date: Tue, 8 Sep 2020 07:32:20 +0200 Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 39/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: tcp: Remove superfluous BH-disable around listening_hash Date: Mon, 12 Oct 2020 17:33:54 +0200 Commit 9652dc2eb9e40 ("tcp: relax listening_hash operations") removed the need to disable bottom half while acquiring listening_hash.lock. There are still two callers left which disable bottom half before the lock is acquired. Drop local_bh_disable() around __inet_hash() which acquires listening_hash->lock, invoke inet_ehash_nolisten() with disabled BH. inet_unhash() conditionally acquires listening_hash->lock. Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: https://lore.kernel.org/linux-rt-users/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de/ Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net ] 40/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: smp: Wake ksoftirqd on PREEMPT_RT instead do_softirq(). Date: Mon, 15 Feb 2021 18:44:12 +0100 The softirq implementation on PREEMPT_RT does not provide do_softirq(). The other user of do_softirq() is replaced with a local_bh_disable() + enable() around the possible raise-softirq invocation. This can not be done here because migration_cpu_stop() is invoked with disabled preemption. Wake the softirq thread on PREEMPT_RT if there are any pending softirqs. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 41/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tasklets: Replace barrier() with cpu_relax() in tasklet_unlock_wait() Date: Tue, 9 Mar 2021 09:42:04 +0100 A barrier() in a tight loop which waits for something to happen on a remote CPU is a pointless exercise. Replace it with cpu_relax() which allows HT siblings to make progress. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 42/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tasklets: Use static inlines for stub implementations Date: Tue, 9 Mar 2021 09:42:05 +0100 Inlines exist for a reason. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 43/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tasklets: Provide tasklet_disable_in_atomic() Date: Tue, 9 Mar 2021 09:42:06 +0100 Replacing the spin wait loops in tasklet_unlock_wait() with wait_var_event() is not possible as a handful of tasklet_disable() invocations are happening in atomic context. All other invocations are in teardown paths which can sleep. Provide tasklet_disable_in_atomic() and tasklet_unlock_spin_wait() to convert the few atomic use cases over, which allows to change tasklet_disable() and tasklet_unlock_wait() in a later step. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 44/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tasklets: Use spin wait in tasklet_disable() temporarily Date: Tue, 9 Mar 2021 09:42:07 +0100 To ease the transition use spin waiting in tasklet_disable() until all usage sites from atomic context have been cleaned up. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 45/191 [ Author: Peter Zijlstra Email: peterz@infradead.org Subject: tasklets: Replace spin wait in tasklet_unlock_wait() Date: Tue, 9 Mar 2021 09:42:08 +0100 tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This is wasting CPU cycles in a tight loop which is especially painful in a guest when the CPU running the tasklet is scheduled out. tasklet_unlock_wait() is invoked from tasklet_kill() which is used in teardown paths and not performance critical at all. Replace the spin wait with wait_var_event(). There are no users of tasklet_unlock_wait() which are invoked from atomic contexts. The usage in tasklet_disable() has been replaced temporarily with the spin waiting variant until the atomic users are fixed up and will be converted to the sleep wait variant later. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 46/191 [ Author: Peter Zijlstra Email: peterz@infradead.org Subject: tasklets: Replace spin wait in tasklet_kill() Date: Tue, 9 Mar 2021 09:42:09 +0100 tasklet_kill() spin waits for TASKLET_STATE_SCHED to be cleared invoking yield() from inside the loop. yield() is an ill defined mechanism and the result might still be wasting CPU cycles in a tight loop which is especially painful in a guest when the CPU running the tasklet is scheduled out. tasklet_kill() is used in teardown paths and not performance critical at all. Replace the spin wait with wait_var_event(). Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 47/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tasklets: Prevent tasklet_unlock_spin_wait() deadlock on RT Date: Tue, 9 Mar 2021 09:42:10 +0100 tasklet_unlock_spin_wait() spin waits for the TASKLET_STATE_SCHED bit in the tasklet state to be cleared. This works on !RT nicely because the corresponding execution can only happen on a different CPU. On RT softirq processing is preemptible, therefore a task preempting the softirq processing thread can spin forever. Prevent this by invoking local_bh_disable()/enable() inside the loop. In case that the softirq processing thread was preempted by the current task, current will block on the local lock which yields the CPU to the preempted softirq processing thread. If the tasklet is processed on a different CPU then the local_bh_disable()/enable() pair is just a waste of processor cycles. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 48/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net: jme: Replace link-change tasklet with work Date: Tue, 9 Mar 2021 09:42:11 +0100 The link change tasklet disables the tasklets for tx/rx processing while upating hw parameters and then enables the tasklets again. This update can also be pushed into a workqueue where it can be performed in preemptible context. This allows tasklet_disable() to become sleeping. Replace the linkch_task tasklet with a work. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 49/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net: sundance: Use tasklet_disable_in_atomic(). Date: Tue, 9 Mar 2021 09:42:12 +0100 tasklet_disable() is used in the timer callback. This might be distangled, but without access to the hardware that's a bit risky. Replace it with tasklet_disable_in_atomic() so tasklet_disable() can be changed to a sleep wait once all remaining atomic users are converted. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Denis Kirjanov <kda@linux-powerpc.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jakub Kicinski <kuba@kernel.org> Cc: netdev@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 50/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: ath9k: Use tasklet_disable_in_atomic() Date: Tue, 9 Mar 2021 09:42:13 +0100 All callers of ath9k_beacon_ensure_primary_slot() are preemptible / acquire a mutex except for this callchain: spin_lock_bh(&sc->sc_pcu_lock); ath_complete_reset() -> ath9k_calculate_summary_state() -> ath9k_beacon_ensure_primary_slot() It's unclear how that can be distangled, so use tasklet_disable_in_atomic() for now. This allows tasklet_disable() to become sleepable once the remaining atomic users are cleaned up. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: ath9k-devel@qca.qualcomm.com Cc: Kalle Valo <kvalo@codeaurora.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jakub Kicinski <kuba@kernel.org> Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Acked-by: Kalle Valo <kvalo@codeaurora.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 51/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: atm: eni: Use tasklet_disable_in_atomic() in the send() callback Date: Tue, 9 Mar 2021 09:42:14 +0100 The atmdev_ops::send callback which calls tasklet_disable() is invoked with bottom halfs disabled from net_device_ops::ndo_start_xmit(). All other invocations of tasklet_disable() in this driver happen in preemptible context. Change the send() call to use tasklet_disable_in_atomic() which allows tasklet_disable() to be made sleepable once the remaining atomic context usage sites are cleaned up. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Chas Williams <3chas3@gmail.com> Cc: linux-atm-general@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 52/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: PCI: hv: Use tasklet_disable_in_atomic() Date: Tue, 9 Mar 2021 09:42:15 +0100 The hv_compose_msi_msg() callback in irq_chip::irq_compose_msi_msg is invoked via irq_chip_compose_msi_msg(), which itself is always invoked from atomic contexts from the guts of the interrupt core code. There is no way to change this w/o rewriting the whole driver, so use tasklet_disable_in_atomic() which allows to make tasklet_disable() sleepable once the remaining atomic users are addressed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Haiyang Zhang <haiyangz@microsoft.com> Cc: Stephen Hemminger <sthemmin@microsoft.com> Cc: Wei Liu <wei.liu@kernel.org> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Cc: Rob Herring <robh@kernel.org> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: linux-hyperv@vger.kernel.org Cc: linux-pci@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 53/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: firewire: ohci: Use tasklet_disable_in_atomic() where required Date: Tue, 9 Mar 2021 09:42:16 +0100 tasklet_disable() is invoked in several places. Some of them are in atomic context which prevents a conversion of tasklet_disable() to a sleepable function. The atomic callchains are: ar_context_tasklet() ohci_cancel_packet() tasklet_disable() ... ohci_flush_iso_completions() tasklet_disable() The invocation of tasklet_disable() from at_context_flush() is always in preemptible context. Use tasklet_disable_in_atomic() for the two invocations in ohci_cancel_packet() and ohci_flush_iso_completions(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: linux1394-devel@lists.sourceforge.net Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 54/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tasklets: Switch tasklet_disable() to the sleep wait variant Date: Tue, 9 Mar 2021 09:42:17 +0100 -- NOT FOR IMMEDIATE MERGING -- Now that all users of tasklet_disable() are invoked from sleepable context, convert it to use tasklet_unlock_wait() which might sleep. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 55/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: softirq: Add RT specific softirq accounting Date: Tue, 9 Mar 2021 09:55:53 +0100 RT requires the softirq processing and local bottomhalf disabled regions to be preemptible. Using the normal preempt count based serialization is therefore not possible because this implicitely disables preemption. RT kernels use a per CPU local lock to serialize bottomhalfs. As local_bh_disable() can nest the lock can only be acquired on the outermost invocation of local_bh_disable() and released when the nest count becomes zero. Tasks which hold the local lock can be preempted so its required to keep track of the nest count per task. Add a RT only counter to task struct and adjust the relevant macros in preempt.h. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 56/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: irqtime: Make accounting correct on RT Date: Tue, 9 Mar 2021 09:55:54 +0100 vtime_account_irq and irqtime_account_irq() base checks on preempt_count() which fails on RT because preempt_count() does not contain the softirq accounting which is seperate on RT. These checks do not need the full preempt count as they only operate on the hard and softirq sections. Use irq_count() instead which provides the correct value on both RT and non RT kernels. The compiler is clever enough to fold the masking for !RT: 99b: 65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax - 9a2: 25 ff ff ff 7f and $0x7fffffff,%eax + 9a2: 25 00 ff ff 00 and $0xffff00,%eax Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 57/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: softirq: Move various protections into inline helpers Date: Tue, 9 Mar 2021 09:55:55 +0100 To allow reuse of the bulk of softirq processing code for RT and to avoid #ifdeffery all over the place, split protections for various code sections out into inline helpers so the RT variant can just replace them in one go. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 58/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: softirq: Make softirq control and processing RT aware Date: Tue, 9 Mar 2021 09:55:56 +0100 Provide a local lock based serialization for soft interrupts on RT which allows the local_bh_disabled() sections and servicing soft interrupts to be preemptible. Provide the necessary inline helpers which allow to reuse the bulk of the softirq processing code. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 59/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tick/sched: Prevent false positive softirq pending warnings on RT Date: Tue, 9 Mar 2021 09:55:57 +0100 On RT a task which has soft interrupts disabled can block on a lock and schedule out to idle while soft interrupts are pending. This triggers the warning in the NOHZ idle code which complains about going idle with pending soft interrupts. But as the task is blocked soft interrupt processing is temporarily blocked as well which means that such a warning is a false positive. To prevent that check the per CPU state which indicates that a scheduled out task has soft interrupts disabled. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 60/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: rcu: Prevent false positive softirq warning on RT Date: Tue, 9 Mar 2021 09:55:58 +0100 Soft interrupt disabled sections can legitimately be preempted or schedule out when blocking on a lock on RT enabled kernels so the RCU preempt check warning has to be disabled for RT kernels. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 61/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking/rtmutex: Remove cruft Date: Tue, 29 Sep 2020 15:21:17 +0200 Most of this is around since the very beginning. I'm not sure if this was used while the rtmutex-deadlock-tester was around but today it seems to only waste memory: - save_state: No users - name: Assigned and printed if a dead lock was detected. I'm keeping it but want to point out that lockdep has the same information. - file + line: Printed if ::name was NULL. This is only used for in-kernel locks so it ::name shouldn't be NULL and then ::file and ::line isn't used. - magic: Assigned to NULL by rt_mutex_destroy(). Remove members of rt_mutex which are not used. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 62/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking/rtmutex: Remove output from deadlock detector. Date: Tue, 29 Sep 2020 16:05:11 +0200 In commit f5694788ad8da ("rt_mutex: Add lockdep annotations") rtmutex gained lockdep annotation for rt_mutex_lock() and and related functions. lockdep will see the locking order and may complain about a deadlock before rtmutex' own mechanism gets a chance to detect it. The rtmutex deadlock detector will only complain locks with the RT_MUTEX_MIN_CHAINWALK and a waiter must be pending. That means it works only for in-kernel locks because the futex interface always uses RT_MUTEX_FULL_CHAINWALK. The requirement for an active waiter limits the detector to actual deadlocks and makes it possible to report potential deadlocks like lockdep does. It looks like lockdep is better suited for reporting deadlocks. Remove rtmutex' debug print on deadlock detection. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 63/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking/rtmutex: Move rt_mutex_init() outside of CONFIG_DEBUG_RT_MUTEXES Date: Tue, 29 Sep 2020 16:32:49 +0200 rt_mutex_init() only initializes lockdep if CONFIG_DEBUG_RT_MUTEXES is enabled. The static initializer (DEFINE_RT_MUTEX) does not have such a restriction. Move rt_mutex_init() outside of CONFIG_DEBUG_RT_MUTEXES. Move the remaining functions in this CONFIG_DEBUG_RT_MUTEXES block to the upper block. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 64/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking/rtmutex: Remove rt_mutex_timed_lock() Date: Wed, 7 Oct 2020 12:11:33 +0200 rt_mutex_timed_lock() has no callers since commit c051b21f71d1f ("rtmutex: Confine deadlock logic to futex") Remove rt_mutex_timed_lock(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 65/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: Handle the various new futex race conditions Date: Fri, 10 Jun 2011 11:04:15 +0200 RT opens a few new interesting race conditions in the rtmutex/futex combo due to futex hash bucket lock being a 'sleeping' spinlock and therefor not disabling preemption. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 66/191 [ Author: Steven Rostedt Email: rostedt@goodmis.org Subject: futex: Fix bug on when a requeued RT task times out Date: Tue, 14 Jul 2015 14:26:34 +0200 Requeue with timeout causes a bug with PREEMPT_RT. The bug comes from a timed out condition. TASK 1 TASK 2 ------ ------ futex_wait_requeue_pi() futex_wait_queue_me() <timed out> double_lock_hb(); raw_spin_lock(pi_lock); if (current->pi_blocked_on) { } else { current->pi_blocked_on = PI_WAKE_INPROGRESS; run_spin_unlock(pi_lock); spin_lock(hb->lock); <-- blocked! plist_for_each_entry_safe(this) { rt_mutex_start_proxy_lock(); task_blocks_on_rt_mutex(); BUG_ON(task->pi_blocked_on)!!!! The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to grab the hb->lock, which it fails to do so. As the hb->lock is a mutex, it will block and set the "pi_blocked_on" to the hb->lock. When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails because the task1's pi_blocked_on is no longer set to that, but instead, set to the hb->lock. The fix: When calling rt_mutex_start_proxy_lock() a check is made to see if the proxy tasks pi_blocked_on is set. If so, exit out early. Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies the proxy task that it is being requeued, and will handle things appropriately. Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 67/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: Make lock_killable work Date: Sat, 1 Apr 2017 12:50:59 +0200 Locking an rt mutex killable does not work because signal handling is restricted to TASK_INTERRUPTIBLE. Use signal_pending_state() unconditionally. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 68/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/spinlock: Split the lock types header Date: Wed, 29 Jun 2011 19:34:01 +0200 Split raw_spinlock into its own file and the remaining spinlock_t into its own non-RT header. The non-RT header will be replaced later by sleeping spinlocks. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 69/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: Avoid include hell Date: Wed, 29 Jun 2011 20:06:39 +0200 Include only the required raw types. This avoids pulling in the complete spinlock header which in turn requires rtmutex.h at some point. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 70/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: lockdep: Reduce header files in debug_locks.h Date: Fri, 14 Aug 2020 16:55:25 +0200 The inclusion of printk.h leads to circular dependency if spinlock_t is based on rt_mutex. Include only atomic.h (xchg()) and cache.h (__read_mostly). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 71/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking: split out the rbtree definition Date: Fri, 14 Aug 2020 17:08:41 +0200 rtmutex.h needs the definition for rb_root_cached. By including kernel.h we will get to spinlock.h which requires rtmutex.h again. Split out the required struct definition and move it into its own header file which can be included by rtmutex.h Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 72/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: Provide rt_mutex_slowlock_locked() Date: Thu, 12 Oct 2017 16:14:22 +0200 This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 73/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: export lockdep-less version of rt_mutex's lock, trylock and unlock Date: Thu, 12 Oct 2017 16:36:39 +0200 Required for lock implementation ontop of rtmutex. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 74/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: sched: Add saved_state for tasks blocked on sleeping locks Date: Sat, 25 Jun 2011 09:21:04 +0200 Spinlocks are state preserving in !RT. RT changes the state when a task gets blocked on a lock. So we need to remember the state before the lock contention. If a regular wakeup (not a RTmutex related wakeup) happens, the saved_state is updated to running. When the lock sleep is done, the saved state is restored. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 75/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: add sleeping lock implementation Date: Thu, 12 Oct 2017 17:11:19 +0200 Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 76/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking/rtmutex: Allow rt_mutex_trylock() on PREEMPT_RT Date: Wed, 2 Dec 2015 11:34:07 +0100 Non PREEMPT_RT kernel can deadlock on rt_mutex_trylock() in softirq context. On PREEMPT_RT the softirq context is handled in thread context. This avoids the deadlock in the slow path and PI-boosting will be done on the correct thread. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 77/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: add mutex implementation based on rtmutex Date: Thu, 12 Oct 2017 17:17:03 +0200 Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 78/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: add rwsem implementation based on rtmutex Date: Thu, 12 Oct 2017 17:28:34 +0200 The RT specific R/W semaphore implementation restricts the number of readers to one because a writer cannot block on multiple readers and inherit its priority or budget. The single reader restricting is painful in various ways: - Performance bottleneck for multi-threaded applications in the page fault path (mmap sem) - Progress blocker for drivers which are carefully crafted to avoid the potential reader/writer deadlock in mainline. The analysis of the writer code paths shows, that properly written RT tasks should not take them. Syscalls like mmap(), file access which take mmap sem write locked have unbound latencies which are completely unrelated to mmap sem. Other R/W sem users like graphics drivers are not suitable for RT tasks either. So there is little risk to hurt RT tasks when the RT rwsem implementation is changed in the following way: - Allow concurrent readers - Make writers block until the last reader left the critical section. This blocking is not subject to priority/budget inheritance. - Readers blocked on a writer inherit their priority/budget in the normal way. There is a drawback with this scheme. R/W semaphores become writer unfair though the applications which have triggered writer starvation (mostly on mmap_sem) in the past are not really the typical workloads running on a RT system. So while it's unlikely to hit writer starvation, it's possible. If there are unexpected workloads on RT systems triggering it, we need to rethink the approach. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 79/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: add rwlock implementation based on rtmutex Date: Thu, 12 Oct 2017 17:18:06 +0200 The implementation is bias-based, similar to the rwsem implementation. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 80/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: locking/rtmutex: wire up RT's locking Date: Thu, 12 Oct 2017 17:31:14 +0200 Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 81/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking/rtmutex: add ww_mutex addon for mutex-rt Date: Thu, 12 Oct 2017 17:34:38 +0200 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 82/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking/rtmutex: Use custom scheduling function for spin-schedule() Date: Tue, 6 Oct 2020 13:07:17 +0200 PREEMPT_RT builds the rwsem, mutex, spinlock and rwlock typed locks on top of a rtmutex lock. While blocked task->pi_blocked_on is set (tsk_is_pi_blocked()) and task needs to schedule away while waiting. The schedule process must distinguish between blocking on a regular sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock and rwlock): - rwsem and mutex must flush block requests (blk_schedule_flush_plug()) even if blocked on a lock. This can not deadlock because this also happens for non-RT. There should be a warning if the scheduling point is within a RCU read section. - spinlock and rwlock must not flush block requests. This will deadlock if the callback attempts to acquire a lock which is already acquired. Similarly to being preempted, there should be no warning if the scheduling point is within a RCU read section. Add preempt_schedule_lock() which is invoked if scheduling is required while blocking on a PREEMPT_RT-only sleeping lock. Remove tsk_is_pi_blocked() from the scheduler path which is no longer needed with the additional scheduler entry point. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 83/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: signal: Revert ptrace preempt magic Date: Wed, 21 Sep 2011 19:57:12 +0200 Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more than a bandaid around the ptrace design trainwreck. It's not a correctness issue, it's merily a cosmetic bandaid. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 84/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: preempt: Provide preempt_*_(no)rt variants Date: Fri, 24 Jul 2009 12:38:56 +0200 RT needs a few preempt_disable/enable points which are not necessary otherwise. Implement variants to avoid #ifdeffery. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 85/191 [ Author: Ingo Molnar Email: mingo@elte.hu Subject: mm/vmstat: Protect per cpu variables with preempt disable on RT Date: Fri, 3 Jul 2009 08:30:13 -0500 Disable preemption on -RT for the vmstat code. On vanila the code runs in IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the same ressources is not updated in parallel due to preemption. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 86/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm/memcontrol: Disable preemption in __mod_memcg_lruvec_state() Date: Wed, 28 Oct 2020 18:15:32 +0100 The callers expect disabled preemption/interrupts while invoking __mod_memcg_lruvec_state(). This works mainline because a lock of somekind is acquired. Use preempt_disable_rt() where per-CPU variables are accessed and a stable pointer is expected. This is also done in __mod_zone_page_state() for the same reason. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 87/191 [ Author: Ahmed S. Darwish Email: a.darwish@linutronix.de Subject: xfrm: Use sequence counter with associated spinlock Date: Wed, 10 Jun 2020 12:53:22 +0200 A sequence counter write side critical section must be protected by some form of locking to serialize writers. A plain seqcount_t does not contain the information of which lock must be held when entering a write side critical section. Use the new seqcount_spinlock_t data type, which allows to associate a spinlock with the sequence counter. This enables lockdep to verify that the spinlock used for writer serialization is held when the write side critical section is entered. If lockdep is disabled this lock association is compiled out and has neither storage size nor runtime overhead. Upstream-status: The xfrm locking used for seqcoun writer serialization appears to be broken. If that's the case, a proper fix will need to be submitted upstream. (e.g. make the seqcount per network namespace?) Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 88/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: u64_stats: Disable preemption on 32bit-UP/SMP with RT during updates Date: Mon, 17 Aug 2020 12:28:10 +0200 On RT the seqcount_t is required even on UP because the softirq can be preempted. The IRQ handler is threaded so it is also preemptible. Disable preemption on 32bit-RT during value updates. There is no need to disable interrupts on RT because the handler is run threaded. Therefore disabling preemption is enough to guarantee that the update is not interruped. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 89/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: fs/dcache: use swait_queue instead of waitqueue Date: Wed, 14 Sep 2016 14:35:49 +0200 __d_lookup_done() invokes wake_up_all() while holding a hlist_bl_lock() which disables preemption. As a workaround convert it to swait. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 90/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: fs/dcache: disable preemption on i_dir_seq's write side Date: Fri, 20 Oct 2017 11:29:53 +0200 i_dir_seq is an opencoded seqcounter. Based on the code it looks like we could have two writers in parallel despite the fact that the d_lock is held. The problem is that during the write process on RT the preemption is still enabled and if this process is interrupted by a reader with RT priority then we lock up. To avoid that lock up I am disabling the preemption during the update. The rename of i_dir_seq is here to ensure to catch new write sides in future. Cc: stable-rt@vger.kernel.org Reported-by: Oleg.Karfich@wago.com Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 91/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net/Qdisc: use a seqlock instead seqcount Date: Wed, 14 Sep 2016 17:36:35 +0200 The seqcount disables preemption on -RT while it is held which can't remove. Also we don't want the reader to spin for ages if the writer is scheduled out. The seqlock on the other hand will serialize / sleep on the lock while writer is active. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 92/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net: Properly annotate the try-lock for the seqlock Date: Tue, 8 Sep 2020 16:57:11 +0200 In patch ("net/Qdisc: use a seqlock instead seqcount") the seqcount has been replaced with a seqlock to allow to reader to boost the preempted writer. The try_write_seqlock() acquired the lock with a try-lock but the seqcount annotation was "lock". Opencode write_seqcount_t_begin() and use the try-lock annotation for lockdep. Reported-by: Mike Galbraith <efault@gmx.de> Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 93/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: kconfig: Disable config options which are not RT compatible Date: Sun, 24 Jul 2011 12:11:43 +0200 Disable stuff which is known to have issues on RT Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 94/191 [ Author: Ingo Molnar Email: mingo@elte.hu Subject: mm: Allow only SLUB on RT Date: Fri, 3 Jul 2009 08:44:03 -0500 Memory allocation disables interrupts as part of the allocation and freeing process. For -RT it is important that this section remain short and don't depend on the size of the request or an internal state of the memory allocator. At the beginning the SLAB memory allocator was adopted for RT's needs and it required substantial changes. Later, with the addition of the SLUB memory allocator we adopted this one as well and the changes were smaller. More important, due to the design of the SLUB allocator it performs better and its worst case latency was smaller. In the end only SLUB remained supported. Disable SLAB and SLOB on -RT. Only SLUB is adopted to -RT needs. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 95/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: sched: Disable CONFIG_RT_GROUP_SCHED on RT Date: Mon, 18 Jul 2011 17:03:52 +0200 Carsten reported problems when running: taskset 01 chrt -f 1 sleep 1 from within rc.local on a F15 machine. The task stays running and never gets on the run queue because some of the run queues have rt_throttled=1 which does not go away. Works nice from a ssh login shell. Disabling CONFIG_RT_GROUP_SCHED solves that as well. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 96/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net/core: disable NET_RX_BUSY_POLL on RT Date: Sat, 27 May 2017 19:02:06 +0200 napi_busy_loop() disables preemption and performs a NAPI poll. We can't acquire sleeping locks with disabled preemption so we would have to work around this and add explicit locking for synchronisation against ksoftirqd. Without explicit synchronisation a low priority process would "own" the NAPI state (by setting NAPIF_STATE_SCHED) and could be scheduled out (no preempt_disable() and BH is preemptible on RT). In case a network packages arrives then the interrupt handler would set NAPIF_STATE_MISSED and the system would wait until the task owning the NAPI would be scheduled in again. Should a task with RT priority busy poll then it would consume the CPU instead allowing tasks with lower priority to run. The NET_RX_BUSY_POLL is disabled by default (the system wide sysctls for poll/read are set to zero) so disable NET_RX_BUSY_POLL on RT to avoid wrong locking context on RT. Should this feature be considered useful on RT systems then it could be enabled again with proper locking and synchronisation. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 97/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: efi: Disable runtime services on RT Date: Thu, 26 Jul 2018 15:03:16 +0200 Based on meassurements the EFI functions get_variable / get_next_variable take up to 2us which looks okay. The functions get_time, set_time take around 10ms. Those 10ms are too much. Even one ms would be too much. Ard mentioned that SetVariable might even trigger larger latencies if the firware will erase flash blocks on NOR. The time-functions are used by efi-rtc and can be triggered during runtimed (either via explicit read/write or ntp sync). The variable write could be used by pstore. These functions can be disabled without much of a loss. The poweroff / reboot hooks may be provided by PSCI. Disable EFI's runtime wrappers. This was observed on "EFI v2.60 by SoftIron Overdrive 1000". Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 98/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: efi: Allow efi=runtime Date: Thu, 26 Jul 2018 15:06:10 +0200 In case the command line option "efi=noruntime" is default at built-time, the user could overwrite its state by `efi=runtime' and allow it again. Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 99/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: rt: Add local irq locks Date: Mon, 20 Jun 2011 09:03:47 +0200 Introduce locallock. For !RT this maps to preempt_disable()/ local_irq_disable() so there is not much that changes. For RT this will map to a spinlock. This makes preemption possible and locked "ressource" gets the lockdep anotation it wouldn't have otherwise. The locks are recursive for owner == current. Also, all locks user migrate_disable() which ensures that the task is not migrated to another CPU while the lock is held and the owner is preempted. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 100/191 [ Author: Oleg Nesterov Email: oleg@redhat.com Subject: signal/x86: Delay calling signals in atomic Date: Tue, 14 Jul 2015 14:26:34 +0200 On x86_64 we must disable preemption before we enable interrupts for stack faults, int3 and debugging, because the current task is using a per CPU debug stack defined by the IST. If we schedule out, another task can come in and use the same stack and cause the stack to be corrupted and crash the kernel on return. When CONFIG_PREEMPT_RT is enabled, spin_locks become mutexes, and one of these is the spin lock used in signal handling. Some of the debug code (int3) causes do_trap() to send a signal. This function calls a spin lock that has been converted to a mutex and has the possibility to sleep. If this happens, the above issues with the corrupted stack is possible. Instead of calling the signal right away, for PREEMPT_RT and x86_64, the signal information is stored on the stacks task_struct and TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume code will send the signal when preemption is enabled. [ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT to ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ] Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: also needed on 32bit as per Yang Shi <yang.shi@linaro.org>] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 101/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: kernel/sched: add {put|get}_cpu_light() Date: Sat, 27 May 2017 19:02:06 +0200 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 102/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: trace: Add migrate-disabled counter to tracing output Date: Sun, 17 Jul 2011 21:56:42 +0200 Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 103/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking: don't check for __LINUX_SPINLOCK_TYPES_H on -RT archs Date: Fri, 4 Aug 2017 17:40:42 +0200 Upstream uses arch_spinlock_t within spinlock_t and requests that spinlock_types.h header file is included first. On -RT we have the rt_mutex with its raw_lock wait_lock which needs architectures' spinlock_types.h header file for its definition. However we need rt_mutex first because it is used to build the spinlock_t so that check does not work for us. Therefore I am dropping that check. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 104/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: mm: sl[au]b: Change list_lock to raw_spinlock_t Date: Mon, 28 May 2018 15:24:22 +0200 The list_lock is used with used with IRQs off on PREEMPT_RT. Make it a raw_spinlock_t otherwise the interrupts won't be disabled on PREEMPT_RT. The locking rules remain unchanged. The lock is updated for SLAB and SLUB since both share the same header file for struct kmem_cache_node defintion. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 105/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: slub: Make object_map_lock a raw_spinlock_t Date: Thu, 16 Jul 2020 18:47:50 +0200 The variable object_map is protected by object_map_lock. The lock is always acquired in debug code and within already atomic context Make object_map_lock a raw_spinlock_t. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 106/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: mm: slub: Enable irqs for __GFP_WAIT Date: Wed, 9 Jan 2013 12:08:15 +0100 SYSTEM_RUNNING might be too late for enabling interrupts. Allocations with GFP_WAIT can happen before that. So use this as an indicator. [bigeasy: Add warning on RT for allocations in atomic context. Don't enable interrupts on allocations during SYSTEM_SUSPEND. This is done during suspend by ACPI, noticed by Liwei Song <liwei.song@windriver.com> ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 107/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: slub: Move discard_slab() invocations out of IRQ-off sections Date: Fri, 26 Feb 2021 15:14:15 +0100 discard_slab() gives the memory back to the page-allocator. Some of its invocation occur from IRQ-disabled sections which were disabled by SLUB. An example is the deactivate_slab() invocation from within ___slab_alloc() or put_cpu_partial(). Instead of giving the memory back directly, put the pages on a list and process it once the caller is out of the known IRQ-off region. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 108/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: slub: Move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context Date: Fri, 26 Feb 2021 17:11:55 +0100 flush_all() flushes a specific SLAB cache on each CPU (where the cache is present). The discard_delayed()/__free_slab() invocation happens within IPI handler and is problematic for PREEMPT_RT. The flush operation is not a frequent operation or a hot path. The per-CPU flush operation can be moved to within a workqueue. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 109/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: slub: Don't resize the location tracking cache on PREEMPT_RT Date: Fri, 26 Feb 2021 17:26:04 +0100 The location tracking cache has a size of a page and is resized if its current size is too small. This allocation happens with disabled interrupts and can't happen on PREEMPT_RT. Should one page be too small, then we have to allocate more at the beginning. The only downside is that less callers will be visible. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 110/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: page_alloc: Use migrate_disable() in drain_local_pages_wq() Date: Thu, 2 Jul 2020 14:27:23 +0200 drain_local_pages_wq() disables preemption to avoid CPU migration during CPU hotplug and can't use cpus_read_lock(). Using migrate_disable() works here, too. The scheduler won't take the CPU offline until the task left the migrate-disable section. Use migrate_disable() in drain_local_pages_wq(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 111/191 [ Author: Ingo Molnar Email: mingo@elte.hu Subject: mm: page_alloc: Use a local_lock instead of explicit local_irq_save(). Date: Fri, 3 Jul 2009 08:29:37 -0500 The page-allocator disables interrupts for a few reasons: - Decouple interrupt the irqsave operation from spin_lock() so it can be extended over the actual lock region and cover other areas. Areas like counters increments where the preemptible version can be avoided. - Access to the per-CPU pcp from struct zone. Replace the irqsave with a local-lock. The counters are expected to be always modified with disabled preemption and no access from interrupt context. Contains fixes from: Peter Zijlstra <a.p.zijlstra@chello.nl> Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 112/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: slub: Don't enable partial CPU caches on PREEMPT_RT by default Date: Tue, 2 Mar 2021 18:58:04 +0100 SLUB's partial CPU caches lead to higher latencies in a hackbench benchmark. Don't enable partial CPU caches by default on PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 113/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm: memcontrol: Provide a local_lock for per-CPU memcg_stock Date: Tue, 18 Aug 2020 10:30:00 +0200 The interrupts are disabled to ensure CPU-local access to the per-CPU variable `memcg_stock'. As the code inside the interrupt disabled section acquires regular spinlocks, which are converted to 'sleeping' spinlocks on a PREEMPT_RT kernel, this conflicts with the RT semantics. Convert it to a local_lock which allows RT kernels to substitute them with a real per CPU lock. On non RT kernels this maps to local_irq_save() as before, but provides also lockdep coverage of the critical region. No functional change. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 114/191 [ Author: Yang Shi Email: yang.shi@windriver.com Subject: mm/memcontrol: Don't call schedule_work_on in preemption disabled context Date: Wed, 30 Oct 2013 11:48:33 -0700 The following trace is triggered when running ltp oom test cases: BUG: sleeping function called from invalid context at kernel/rtmutex.c:659 in_atomic(): 1, irqs_disabled(): 0, pid: 17188, name: oom03 Preemption disabled at:[<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0 CPU: 2 PID: 17188 Comm: oom03 Not tainted 3.10.10-rt3 #2 Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 08/18/2010 ffff88007684d730 ffff880070df9b58 ffffffff8169918d ffff880070df9b70 ffffffff8106db31 ffff88007688b4a0 ffff880070df9b88 ffffffff8169d9c0 ffff88007688b4a0 ffff880070df9bc8 ffffffff81059da1 0000000170df9bb0 Call Trace: [<ffffffff8169918d>] dump_stack+0x19/0x1b [<ffffffff8106db31>] __might_sleep+0xf1/0x170 [<ffffffff8169d9c0>] rt_spin_lock+0x20/0x50 [<ffffffff81059da1>] queue_work_on+0x61/0x100 [<ffffffff8112b361>] drain_all_stock+0xe1/0x1c0 [<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0 [<ffffffff8112beda>] __mem_cgroup_try_charge+0x41a/0xc40 [<ffffffff810f1c91>] ? release_pages+0x1b1/0x1f0 [<ffffffff8106f200>] ? sched_exec+0x40/0xb0 [<ffffffff8112cc87>] mem_cgroup_charge_common+0x37/0x70 [<ffffffff8112e2c6>] mem_cgroup_newpage_charge+0x26/0x30 [<ffffffff8110af68>] handle_pte_fault+0x618/0x840 [<ffffffff8103ecf6>] ? unpin_current_cpu+0x16/0x70 [<ffffffff81070f94>] ? migrate_enable+0xd4/0x200 [<ffffffff8110cde5>] handle_mm_fault+0x145/0x1e0 [<ffffffff810301e1>] __do_page_fault+0x1a1/0x4c0 [<ffffffff8169c9eb>] ? preempt_schedule_irq+0x4b/0x70 [<ffffffff8169e3b7>] ? retint_kernel+0x37/0x40 [<ffffffff8103053e>] do_page_fault+0xe/0x10 [<ffffffff8169e4c2>] page_fault+0x22/0x30 So, to prevent schedule_work_on from being called in preempt disabled context, replace the pair of get/put_cpu() to get/put_cpu_light(). Signed-off-by: Yang Shi <yang.shi@windriver.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 115/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: mm/memcontrol: Replace local_irq_disable with local locks Date: Wed, 28 Jan 2015 17:14:16 +0100 There are a few local_irq_disable() which then take sleeping locks. This patch converts them local locks. [bigeasy: Move unlock after memcg_check_events() in mem_cgroup_swapout(), pointed out by Matt Fleming <matt@codeblueprint.co.uk>] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 116/191 [ Author: Mike Galbraith Email: umgwanakikbuti@gmail.com Subject: mm/zsmalloc: copy with get_cpu_var() and locking Date: Tue, 22 Mar 2016 11:16:09 +0100 get_cpu_var() disables preemption and triggers a might_sleep() splat later. This is replaced with get_locked_var(). This bitspinlocks are replaced with a proper mutex which requires a slightly larger struct to allocate. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> [bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then fixed the size magic] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 117/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: x86: kvm Require const tsc for RT Date: Sun, 6 Nov 2011 12:26:18 +0100 Non constant TSC is a nightmare on bare metal already, but with virtualization it becomes a complete disaster because the workarounds are horrible latency wise. That's also a preliminary for running RT in a guest on top of a RT host. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 118/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: wait.h: include atomic.h Date: Mon, 28 Oct 2013 12:19:57 +0100 | CC init/main.o |In file included from include/linux/mmzone.h:9:0, | from include/linux/gfp.h:4, | from include/linux/kmod.h:22, | from include/linux/module.h:13, | from init/main.c:15: |include/linux/wait.h: In function ‘wait_on_atomic_t’: |include/linux/wait.h:982:2: error: implicit declaration of function ‘atomic_read’ [-Werror=implicit-function-declaration] | if (atomic_read(val) == 0) | ^ This pops up on ARM. Non-RT gets its atomic.h include from spinlock.h Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 119/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: sched: Limit the number of task migrations per batch Date: Mon, 6 Jun 2011 12:12:51 +0200 Put an upper limit on the number of tasks which are migrated per batch to avoid large latencies. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 120/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: sched: Move mmdrop to RCU on RT Date: Mon, 6 Jun 2011 12:20:33 +0200 Takes sleeping locks and calls into the memory allocator, so nothing we want to do in task switch and oder atomic contexts. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 121/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: kernel/sched: move stack + kprobe clean up to __put_task_struct() Date: Mon, 21 Nov 2016 19:31:08 +0100 There is no need to free the stack before the task struct (except for reasons mentioned in commit 68f24b08ee89 ("sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK")). This also comes handy on -RT because we can't free memory in preempt disabled region. vfree_atomic() delays the memory cleanup to a worker. Since we move everything to the RCU callback, we can also free it immediately. Cc: stable-rt@vger.kernel.org #for kprobe_flush_task() Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 122/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: sched: Do not account rcu_preempt_depth on RT in might_sleep() Date: Tue, 7 Jun 2011 09:19:06 +0200 RT changes the rcu_preempt_depth semantics, so we cannot check for it in might_sleep(). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 123/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: sched: Disable TTWU_QUEUE on RT Date: Tue, 13 Sep 2011 16:42:35 +0200 The queued remote wakeup mechanism can introduce rather large latencies if the number of migrated tasks is high. Disable it for RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 124/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: softirq: Check preemption after reenabling interrupts Date: Sun, 13 Nov 2011 17:17:09 +0100 raise_softirq_irqoff() disables interrupts and wakes the softirq daemon, but after reenabling interrupts there is no preemption check, so the execution of the softirq thread might be delayed arbitrarily. In principle we could add that check to local_irq_enable/restore, but that's overkill as the rasie_softirq_irqoff() sections are the only ones which show this behaviour. Reported-by: Carsten Emde <cbe@osadl.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 125/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: softirq: Disable softirq stacks for RT Date: Mon, 18 Jul 2011 13:59:17 +0200 Disable extra stacks for softirqs. We want to preempt softirqs and having them on special IRQ-stack does not make this easier. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 126/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net/core: use local_bh_disable() in netif_rx_ni() Date: Fri, 16 Jun 2017 19:03:16 +0200 In 2004 netif_rx_ni() gained a preempt_disable() section around netif_rx() and its do_softirq() + testing for it. The do_softirq() part is required because netif_rx() raises the softirq but does not invoke it. The preempt_disable() is required to remain on the same CPU which added the skb to the per-CPU list. All this can be avoided be putting this into a local_bh_disable()ed section. The local_bh_enable() part will invoke do_softirq() if required. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 127/191 [ Author: Grygorii Strashko Email: Grygorii.Strashko@linaro.org Subject: pid.h: include atomic.h Date: Tue, 21 Jul 2015 19:43:56 +0300 This patch fixes build error: CC kernel/pid_namespace.o In file included from kernel/pid_namespace.c:11:0: include/linux/pid.h: In function 'get_pid': include/linux/pid.h:78:3: error: implicit declaration of function 'atomic_inc' [-Werror=implicit-function-declaration] atomic_inc(&pid->count); ^ which happens when CONFIG_PROVE_LOCKING=n CONFIG_DEBUG_SPINLOCK=n CONFIG_DEBUG_MUTEXES=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PID_NS=y Vanilla gets this via spinlock.h. Signed-off-by: Grygorii Strashko <Grygorii.Strashko@linaro.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 128/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: ptrace: fix ptrace vs tasklist_lock race Date: Thu, 29 Aug 2013 18:21:04 +0200 As explained by Alexander Fyodorov <halcy@yandex.ru>: |read_lock(&tasklist_lock) in ptrace_stop() is converted to mutex on RT kernel, |and it can remove __TASK_TRACED from task->state (by moving it to |task->saved_state). If parent does wait() on child followed by a sys_ptrace |call, the following race can happen: | |- child sets __TASK_TRACED in ptrace_stop() |- parent does wait() which eventually calls wait_task_stopped() and returns | child's pid |- child blocks on read_lock(&tasklist_lock) in ptrace_stop() and moves | __TASK_TRACED flag to saved_state |- parent calls sys_ptrace, which calls ptrace_check_attach() and wait_task_inactive() The patch is based on his initial patch where an additional check is added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is taken in case the caller is interrupted between looking into ->state and ->saved_state. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 129/191 [ Author: Oleg Nesterov Email: oleg@redhat.com Subject: ptrace: fix ptrace_unfreeze_traced() race with rt-lock Date: Tue, 3 Nov 2020 12:39:01 +0100 The patch "ptrace: fix ptrace vs tasklist_lock race" changed ptrace_freeze_traced() to take task->saved_state into account, but ptrace_unfreeze_traced() has the same problem and needs a similar fix: it should check/update both ->state and ->saved_state. Reported-by: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Fixes: "ptrace: fix ptrace vs tasklist_lock race" Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: stable-rt@vger.kernel.org ] 130/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: rcu: Delay RCU-selftests Date: Wed, 10 Mar 2021 15:09:02 +0100 Delay RCU-selftests until ksoftirqd is up and running. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 131/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: locking: Make spinlock_t and rwlock_t a RCU section on RT Date: Tue, 19 Nov 2019 09:25:04 +0100 On !RT a locked spinlock_t and rwlock_t disables preemption which implies a RCU read section. There is code that relies on that behaviour. Add an explicit RCU read section on RT while a sleeping lock (a lock which would disables preemption on !RT) acquired. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 132/191 [ Author: Scott Wood Email: swood@redhat.com Subject: rcutorture: Avoid problematic critical section nesting on RT Date: Wed, 11 Sep 2019 17:57:29 +0100 rcutorture was generating some nesting scenarios that are not reasonable. Constrain the state selection to avoid them. Example #1: 1. preempt_disable() 2. local_bh_disable() 3. preempt_enable() 4. local_bh_enable() On PREEMPT_RT, BH disabling takes a local lock only when called in non-atomic context. Thus, atomic context must be retained until after BH is re-enabled. Likewise, if BH is initially disabled in non-atomic context, it cannot be re-enabled in atomic context. Example #2: 1. rcu_read_lock() 2. local_irq_disable() 3. rcu_read_unlock() 4. local_irq_enable() If the thread is preempted between steps 1 and 2, rcu_read_unlock_special.b.blocked will be set, but it won't be acted on in step 3 because IRQs are disabled. Thus, reporting of the quiescent state will be delayed beyond the local_irq_enable(). For now, these scenarios will continue to be tested on non-PREEMPT_RT kernels, until debug checks are added to ensure that they are not happening elsewhere. Signed-off-by: Scott Wood <swood@redhat.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 133/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: mm/vmalloc: Another preempt disable region which sucks Date: Tue, 12 Jul 2011 11:39:36 +0200 Avoid the preempt disable version of get_cpu_var(). The inner-lock should provide enough serialisation. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 134/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: block/mq: do not invoke preempt_disable() Date: Tue, 14 Jul 2015 14:26:34 +0200 preempt_disable() and get_cpu() don't play well together with the sleeping locks it tries to allocate later. It seems to be enough to replace it with get_cpu_light() and migrate_disable(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 135/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: md: raid5: Make raid5_percpu handling RT aware Date: Tue, 6 Apr 2010 16:51:31 +0200 __raid_run_ops() disables preemption with get_cpu() around the access to the raid5_percpu variables. That causes scheduling while atomic spews on RT. Serialize the access to the percpu data with a lock and keep the code preemptible. Reported-by: Udo van den Heuvel <udovdh@xs4all.nl> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> ] 136/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: scsi/fcoe: Make RT aware. Date: Sat, 12 Nov 2011 14:00:48 +0100 Do not disable preemption while taking sleeping locks. All user look safe for migrate_diable() only. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 137/191 [ Author: Mike Galbraith Email: umgwanakikbuti@gmail.com Subject: sunrpc: Make svc_xprt_do_enqueue() use get_cpu_light() Date: Wed, 18 Feb 2015 16:05:28 +0100 |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 |in_atomic(): 1, irqs_disabled(): 0, pid: 3194, name: rpc.nfsd |Preemption disabled at:[<ffffffffa06bf0bb>] svc_xprt_received+0x4b/0xc0 [sunrpc] |CPU: 6 PID: 3194 Comm: rpc.nfsd Not tainted 3.18.7-rt1 #9 |Hardware name: MEDION MS-7848/MS-7848, BIOS M7848W08.404 11/06/2014 | ffff880409630000 ffff8800d9a33c78 ffffffff815bdeb5 0000000000000002 | 0000000000000000 ffff8800d9a33c98 ffffffff81073c86 ffff880408dd6008 | ffff880408dd6000 ffff8800d9a33cb8 ffffffff815c3d84 ffff88040b3ac000 |Call Trace: | [<ffffffff815bdeb5>] dump_stack+0x4f/0x9e | [<ffffffff81073c86>] __might_sleep+0xe6/0x150 | [<ffffffff815c3d84>] rt_spin_lock+0x24/0x50 | [<ffffffffa06beec0>] svc_xprt_do_enqueue+0x80/0x230 [sunrpc] | [<ffffffffa06bf0bb>] svc_xprt_received+0x4b/0xc0 [sunrpc] | [<ffffffffa06c03ed>] svc_add_new_perm_xprt+0x6d/0x80 [sunrpc] | [<ffffffffa06b2693>] svc_addsock+0x143/0x200 [sunrpc] | [<ffffffffa072e69c>] write_ports+0x28c/0x340 [nfsd] | [<ffffffffa072d2ac>] nfsctl_transaction_write+0x4c/0x80 [nfsd] | [<ffffffff8117ee83>] vfs_write+0xb3/0x1d0 | [<ffffffff8117f889>] SyS_write+0x49/0xb0 | [<ffffffff815c4556>] system_call_fastpath+0x16/0x1b Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 138/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: rt: Introduce cpu_chill() Date: Wed, 7 Mar 2012 20:51:03 +0100 Retry loops on RT might loop forever when the modifying side was preempted. Add cpu_chill() to replace cpu_relax(). cpu_chill() defaults to cpu_relax() for non RT. On RT it puts the looping task to sleep for a tick so the preempted task can make progress. Steven Rostedt changed it to use a hrtimer instead of msleep(): | |Ulrich Obergfell pointed out that cpu_chill() calls msleep() which is woken |up by the ksoftirqd running the TIMER softirq. But as the cpu_chill() is |called from softirq context, it may block the ksoftirqd() from running, in |which case, it may never wake up the msleep() causing the deadlock. + bigeasy later changed to schedule_hrtimeout() |If a task calls cpu_chill() and gets woken up by a regular or spurious |wakeup and has a signal pending, then it exits the sleep loop in |do_nanosleep() and sets up the restart block. If restart->nanosleep.type is |not TI_NONE then this results in accessing a stale user pointer from a |previously interrupted syscall and a copy to user based on the stale |pointer or a BUG() when 'type' is not supported in nanosleep_copyout(). + bigeasy: add PF_NOFREEZE: | [....] Waiting for /dev to be fully populated... | ===================================== | [ BUG: udevd/229 still has locks held! ] | 3.12.11-rt17 #23 Not tainted | ------------------------------------- | 1 lock held by udevd/229: | #0: (&type->i_mutex_dir_key#2){+.+.+.}, at: lookup_slow+0x28/0x98 | | stack backtrace: | CPU: 0 PID: 229 Comm: udevd Not tainted 3.12.11-rt17 #23 | (unwind_backtrace+0x0/0xf8) from (show_stack+0x10/0x14) | (show_stack+0x10/0x14) from (dump_stack+0x74/0xbc) | (dump_stack+0x74/0xbc) from (do_nanosleep+0x120/0x160) | (do_nanosleep+0x120/0x160) from (hrtimer_nanosleep+0x90/0x110) | (hrtimer_nanosleep+0x90/0x110) from (cpu_chill+0x30/0x38) | (cpu_chill+0x30/0x38) from (dentry_kill+0x158/0x1ec) | (dentry_kill+0x158/0x1ec) from (dput+0x74/0x15c) | (dput+0x74/0x15c) from (lookup_real+0x4c/0x50) | (lookup_real+0x4c/0x50) from (__lookup_hash+0x34/0x44) | (__lookup_hash+0x34/0x44) from (lookup_slow+0x38/0x98) | (lookup_slow+0x38/0x98) from (path_lookupat+0x208/0x7fc) | (path_lookupat+0x208/0x7fc) from (filename_lookup+0x20/0x60) | (filename_lookup+0x20/0x60) from (user_path_at_empty+0x50/0x7c) | (user_path_at_empty+0x50/0x7c) from (user_path_at+0x14/0x1c) | (user_path_at+0x14/0x1c) from (vfs_fstatat+0x48/0x94) | (vfs_fstatat+0x48/0x94) from (SyS_stat64+0x14/0x30) | (SyS_stat64+0x14/0x30) from (ret_fast_syscall+0x0/0x48) Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 139/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: fs: namespace: Use cpu_chill() in trylock loops Date: Wed, 7 Mar 2012 21:00:34 +0100 Retry loops on RT might loop forever when the modifying side was preempted. Use cpu_chill() instead of cpu_relax() to let the system make progress. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 140/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: debugobjects: Make RT aware Date: Sun, 17 Jul 2011 21:41:35 +0200 Avoid filling the pool / allocating memory with irqs off(). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 141/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: net: Use skbufhead with raw lock Date: Tue, 12 Jul 2011 15:38:34 +0200 Use the rps lock as rawlock so we can keep irq-off regions. It looks low latency. However we can't kfree() from this context therefore we defer this to the softirq and use the tofree_queue list for it (similar to process_queue). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 142/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net: Dequeue in dev_cpu_dead() without the lock Date: Wed, 16 Sep 2020 16:15:39 +0200 Upstream uses skb_dequeue() to acquire lock of `input_pkt_queue'. The reason is to synchronize against a remote CPU which still thinks that the CPU is online enqueues packets to this CPU. There are no guarantees that the packet is enqueued before the callback is run, it just hope. RT however complains about an not initialized lock because it uses another lock for `input_pkt_queue' due to the IRQ-off nature of the context. Use the unlocked dequeue version for `input_pkt_queue'. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 143/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: net: dev: always take qdisc's busylock in __dev_xmit_skb() Date: Wed, 30 Mar 2016 13:36:29 +0200 The root-lock is dropped before dev_hard_start_xmit() is invoked and after setting the __QDISC___STATE_RUNNING bit. If this task is now pushed away by a task with a higher priority then the task with the higher priority won't be able to submit packets to the NIC directly instead they will be enqueued into the Qdisc. The NIC will remain idle until the task(s) with higher priority leave the CPU and the task with lower priority gets back and finishes the job. If we take always the busylock we ensure that the RT task can boost the low-prio task and submit the packet. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 144/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: irqwork: push most work into softirq context Date: Tue, 23 Jun 2015 15:32:51 +0200 Initially we defered all irqwork into softirq because we didn't want the latency spikes if perf or another user was busy and delayed the RT task. The NOHZ trigger (nohz_full_kick_work) was the first user that did not work as expected if it did not run in the original irqwork context so we had to bring it back somehow for it. push_irq_work_func is the second one that requires this. This patch adds the IRQ_WORK_HARD_IRQ which makes sure the callback runs in raw-irq context. Everything else is defered into softirq context. Without -RT we have the orignal behavior. This patch incorporates tglx orignal work which revoked a little bringing back the arch_irq_work_raise() if possible and a few fixes from Steven Rostedt and Mike Galbraith, [bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a hard and soft variant] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 145/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: crypto: limit more FPU-enabled sections Date: Thu, 30 Nov 2017 13:40:10 +0100 Those crypto drivers use SSE/AVX/… for their crypto work and in order to do so in kernel they need to enable the "FPU" in kernel mode which disables preemption. There are two problems with the way they are used: - the while loop which processes X bytes may create latency spikes and should be avoided or limited. - the cipher-walk-next part may allocate/free memory and may use kmap_atomic(). The whole kernel_fpu_begin()/end() processing isn't probably that cheap. It most likely makes sense to process as much of those as possible in one go. The new *_fpu_sched_rt() schedules only if a RT task is pending. Probably we should measure the performance those ciphers in pure SW mode and with this optimisations to see if it makes sense to keep them for RT. This kernel_fpu_resched() makes the code more preemptible which might hurt performance. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 146/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: crypto: cryptd - add a lock instead preempt_disable/local_bh_disable Date: Thu, 26 Jul 2018 18:52:00 +0200 cryptd has a per-CPU lock which protected with local_bh_disable() and preempt_disable(). Add an explicit spin_lock to make the locking context more obvious and visible to lockdep. Since it is a per-CPU lock, there should be no lock contention on the actual spinlock. There is a small race-window where we could be migrated to another CPU after the cpu_queue has been obtain. This is not a problem because the actual ressource is protected by the spinlock. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 147/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: panic: skip get_random_bytes for RT_FULL in init_oops_id Date: Tue, 14 Jul 2015 14:26:34 +0200 Disable on -RT. If this is invoked from irq-context we will have problems to acquire the sleeping lock. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 148/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: x86: stackprotector: Avoid random pool on rt Date: Thu, 16 Dec 2010 14:25:18 +0100 CPU bringup calls into the random pool to initialize the stack canary. During boot that works nicely even on RT as the might sleep checks are disabled. During CPU hotplug the might sleep checks trigger. Making the locks in random raw is a major PITA, so avoid the call on RT is the only sensible solution. This is basically the same randomness which we get during boot where the random pool has no entropy and we rely on the TSC randomnness. Reported-by: Carsten Emde <carsten.emde@osadl.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 149/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: random: Make it work on rt Date: Tue, 21 Aug 2012 20:38:50 +0200 Delegate the random insertion to the forced threaded interrupt handler. Store the return IP of the hard interrupt handler in the irq descriptor and feed it into the random generator as a source of entropy. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 150/191 [ Author: Priyanka Jain Email: Priyanka.Jain@freescale.com Subject: net: Remove preemption disabling in netif_rx() Date: Thu, 17 May 2012 09:35:11 +0530 1)enqueue_to_backlog() (called from netif_rx) should be bind to a particluar CPU. This can be achieved by disabling migration. No need to disable preemption 2)Fixes crash "BUG: scheduling while atomic: ksoftirqd" in case of RT. If preemption is disabled, enqueue_to_backog() is called in atomic context. And if backlog exceeds its count, kfree_skb() is called. But in RT, kfree_skb() might gets scheduled out, so it expects non atomic context. -Replace preempt_enable(), preempt_disable() with migrate_enable(), migrate_disable() respectively -Replace get_cpu(), put_cpu() with get_cpu_light(), put_cpu_light() respectively Signed-off-by: Priyanka Jain <Priyanka.Jain@freescale.com> Acked-by: Rajan Srivastava <Rajan.Srivastava@freescale.com> Cc: <rostedt@goodmis.orgn> Link: http://lkml.kernel.org/r/1337227511-2271-1-git-send-email-Priyanka.Jain@freescale.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: Remove assumption about migrate_disable() from the description.] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 151/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: lockdep: Make it RT aware Date: Sun, 17 Jul 2011 18:51:23 +0200 teach lockdep that we don't really do softirqs on -RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 152/191 [ Author: Yong Zhang Email: yong.zhang@windriver.com Subject: lockdep: selftest: Only do hardirq context test for raw spinlock Date: Mon, 16 Apr 2012 15:01:56 +0800 On -rt there is no softirq context any more and rwlock is sleepable, disable softirq context test and rwlock+irq test. Signed-off-by: Yong Zhang <yong.zhang0@gmail.com> Cc: Yong Zhang <yong.zhang@windriver.com> Link: http://lkml.kernel.org/r/1334559716-18447-3-git-send-email-yong.zhang0@gmail.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 153/191 [ Author: Josh Cartwright Email: josh.cartwright@ni.com Subject: lockdep: selftest: fix warnings due to missing PREEMPT_RT conditionals Date: Wed, 28 Jan 2015 13:08:45 -0600 "lockdep: Selftest: Only do hardirq context test for raw spinlock" disabled the execution of certain tests with PREEMPT_RT, but did not prevent the tests from still being defined. This leads to warnings like: ./linux/lib/locking-selftest.c:574:1: warning: 'irqsafe1_hard_rlock_12' defined but not used [-Wunused-function] ./linux/lib/locking-selftest.c:574:1: warning: 'irqsafe1_hard_rlock_21' defined but not used [-Wunused-function] ./linux/lib/locking-selftest.c:577:1: warning: 'irqsafe1_hard_wlock_12' defined but not used [-Wunused-function] ./linux/lib/locking-selftest.c:577:1: warning: 'irqsafe1_hard_wlock_21' defined but not used [-Wunused-function] ./linux/lib/locking-selftest.c:580:1: warning: 'irqsafe1_soft_spin_12' defined but not used [-Wunused-function] ... Fixed by wrapping the test definitions in #ifndef CONFIG_PREEMPT_RT conditionals. Signed-off-by: Josh Cartwright <josh.cartwright@ni.com> Signed-off-by: Xander Huff <xander.huff@ni.com> Acked-by: Gratian Crisan <gratian.crisan@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 154/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: lockdep: disable self-test Date: Tue, 17 Oct 2017 16:36:18 +0200 The self-test wasn't always 100% accurate for RT. We disabled a few tests which failed because they had a different semantic for RT. Some still reported false positives. Now the selftest locks up the system during boot and it needs to be investigated… Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 155/191 [ Author: Mike Galbraith Email: umgwanakikbuti@gmail.com Subject: drm,radeon,i915: Use preempt_disable/enable_rt() where recommended Date: Sat, 27 Feb 2016 08:09:11 +0100 DRM folks identified the spots, so use them. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: linux-rt-users <linux-rt-users@vger.kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 156/191 [ Author: Mike Galbraith Email: umgwanakikbuti@gmail.com Subject: drm/i915: Don't disable interrupts on PREEMPT_RT during atomic updates Date: Sat, 27 Feb 2016 09:01:42 +0100 Commit 8d7849db3eab7 ("drm/i915: Make sprite updates atomic") started disabling interrupts across atomic updates. This breaks on PREEMPT_RT because within this section the code attempt to acquire spinlock_t locks which are sleeping locks on PREEMPT_RT. According to the comment the interrupts are disabled to avoid random delays and not required for protection or synchronisation. Don't disable interrupts on PREEMPT_RT during atomic updates. [bigeasy: drop local locks, commit message] Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 157/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: drm/i915: disable tracing on -RT Date: Thu, 6 Dec 2018 09:52:20 +0100 Luca Abeni reported this: | BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003 | CPU: 1 PID: 15203 Comm: kworker/u8:2 Not tainted 4.19.1-rt3 #10 | Call Trace: | rt_spin_lock+0x3f/0x50 | gen6_read32+0x45/0x1d0 [i915] | g4x_get_vblank_counter+0x36/0x40 [i915] | trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915] The tracing events use trace_i915_pipe_update_start() among other events use functions acquire spin locks. A few trace points use intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also might acquire a sleeping lock. Based on this I don't see any other way than disable trace points on RT. Cc: stable-rt@vger.kernel.org Reported-by: Luca Abeni <lucabe72@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 158/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with NOTRACE Date: Wed, 19 Dec 2018 10:47:02 +0100 The order of the header files is important. If this header file is included after tracepoint.h was included then the NOTRACE here becomes a nop. Currently this happens for two .c files which use the tracepoitns behind DRM_I915_LOW_LEVEL_TRACEPOINTS. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 159/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: drm/i915/gt: Only disable interrupts for the timeline lock on !force-threaded Date: Tue, 7 Jul 2020 12:25:11 +0200 According to commit d67739268cf0e ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe") the intrrupts are disabled the code may be called from an interrupt handler and from preemptible context. With `force_irqthreads' set the timeline mutex is never observed in IRQ context so it is not neede to disable interrupts. Disable only interrupts if not in `force_irqthreads' mode. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 160/191 [ Author: Mike Galbraith Email: efault@gmx.de Subject: cpuset: Convert callback_lock to raw_spinlock_t Date: Sun, 8 Jan 2017 09:32:25 +0100 The two commits below add up to a cpuset might_sleep() splat for RT: 8447a0fee974 cpuset: convert callback_mutex to a spinlock 344736f29b35 cpuset: simplify cpuset_node_allowed API BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:995 in_atomic(): 0, irqs_disabled(): 1, pid: 11718, name: cset CPU: 135 PID: 11718 Comm: cset Tainted: G E 4.10.0-rt1-rt #4 Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0056.R01.1409242327 09/24/2014 Call Trace: ? dump_stack+0x5c/0x81 ? ___might_sleep+0xf4/0x170 ? rt_spin_lock+0x1c/0x50 ? __cpuset_node_allowed+0x66/0xc0 ? ___slab_alloc+0x390/0x570 <disables IRQs> ? anon_vma_fork+0x8f/0x140 ? copy_page_range+0x6cf/0xb00 ? anon_vma_fork+0x8f/0x140 ? __slab_alloc.isra.74+0x5a/0x81 ? anon_vma_fork+0x8f/0x140 ? kmem_cache_alloc+0x1b5/0x1f0 ? anon_vma_fork+0x8f/0x140 ? copy_process.part.35+0x1670/0x1ee0 ? _do_fork+0xdd/0x3f0 ? _do_fork+0xdd/0x3f0 ? do_syscall_64+0x61/0x170 ? entry_SYSCALL64_slow_path+0x25/0x25 The later ensured that a NUMA box WILL take callback_lock in atomic context by removing the allocator and reclaim path __GFP_HARDWALL usage which prevented such contexts from taking callback_mutex. One option would be to reinstate __GFP_HARDWALL protections for RT, however, as the 8447a0fee974 changelog states: The callback_mutex is only used to synchronize reads/updates of cpusets' flags and cpu/node masks. These operations should always proceed fast so there's no reason why we can't use a spinlock instead of the mutex. Cc: stable-rt@vger.kernel.org Signed-off-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 161/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: x86: Allow to enable RT Date: Wed, 7 Aug 2019 18:15:38 +0200 Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 162/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: mm/scatterlist: Do not disable irqs on RT Date: Fri, 3 Jul 2009 08:44:34 -0500 For -RT it is enough to keep pagefault disabled (which is currently handled by kmap_atomic()). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 163/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: sched: Add support for lazy preemption Date: Fri, 26 Oct 2012 18:50:54 +0100 It has become an obsession to mitigate the determinism vs. throughput loss of RT. Looking at the mainline semantics of preemption points gives a hint why RT sucks throughput wise for ordinary SCHED_OTHER tasks. One major issue is the wakeup of tasks which are right away preempting the waking task while the waking task holds a lock on which the woken task will block right after having preempted the wakee. In mainline this is prevented due to the implicit preemption disable of spin/rw_lock held regions. On RT this is not possible due to the fully preemptible nature of sleeping spinlocks. Though for a SCHED_OTHER task preempting another SCHED_OTHER task this is really not a correctness issue. RT folks are concerned about SCHED_FIFO/RR tasks preemption and not about the purely fairness driven SCHED_OTHER preemption latencies. So I introduced a lazy preemption mechanism which only applies to SCHED_OTHER tasks preempting another SCHED_OTHER task. Aside of the existing preempt_count each tasks sports now a preempt_lazy_count which is manipulated on lock acquiry and release. This is slightly incorrect as for lazyness reasons I coupled this on migrate_disable/enable so some other mechanisms get the same treatment (e.g. get_cpu_light). Now on the scheduler side instead of setting NEED_RESCHED this sets NEED_RESCHED_LAZY in case of a SCHED_OTHER/SCHED_OTHER preemption and therefor allows to exit the waking task the lock held region before the woken task preempts. That also works better for cross CPU wakeups as the other side can stay in the adaptive spinning loop. For RT class preemption there is no change. This simply sets NEED_RESCHED and forgoes the lazy preemption counter. Initial test do not expose any observable latency increasement, but history shows that I've been proven wrong before :) The lazy preemption mode is per default on, but with CONFIG_SCHED_DEBUG enabled it can be disabled via: # echo NO_PREEMPT_LAZY >/sys/kernel/debug/sched_features and reenabled via # echo PREEMPT_LAZY >/sys/kernel/debug/sched_features The test results so far are very machine and workload dependent, but there is a clear trend that it enhances the non RT workload performance. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 164/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: x86/entry: Use should_resched() in idtentry_exit_cond_resched() Date: Tue, 30 Jun 2020 11:45:14 +0200 The TIF_NEED_RESCHED bit is inlined on x86 into the preemption counter. By using should_resched(0) instead of need_resched() the same check can be performed which uses the same variable as 'preempt_count()` which was issued before. Use should_resched(0) instead need_resched(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 165/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: x86: Support for lazy preemption Date: Thu, 1 Nov 2012 11:03:47 +0100 Implement the x86 pieces for lazy preempt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 166/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: arm: Add support for lazy preemption Date: Wed, 31 Oct 2012 12:04:11 +0100 Implement the arm pieces for lazy preempt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 167/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: powerpc: Add support for lazy preemption Date: Thu, 1 Nov 2012 10:14:11 +0100 Implement the powerpc pieces for lazy preempt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 168/191 [ Author: Anders Roxell Email: anders.roxell@linaro.org Subject: arch/arm64: Add lazy preempt support Date: Thu, 14 May 2015 17:52:17 +0200 arm64 is missing support for PREEMPT_RT. The main feature which is lacking is support for lazy preemption. The arch-specific entry code, thread information structure definitions, and associated data tables have to be extended to provide this support. Then the Kconfig file has to be extended to indicate the support is available, and also to indicate that support for full RT preemption is now available. Signed-off-by: Anders Roxell <anders.roxell@linaro.org> ] 169/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: jump-label: disable if stop_machine() is used Date: Wed, 8 Jul 2015 17:14:48 +0200 Some architectures are using stop_machine() while switching the opcode which leads to latency spikes. The architectures which use stop_machine() atm: - ARM stop machine - s390 stop machine The architecures which use other sorcery: - MIPS - X86 - powerpc - sparc - arm64 Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: only ARM for now] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 170/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: leds: trigger: disable CPU trigger on -RT Date: Thu, 23 Jan 2014 14:45:59 +0100 as it triggers: |CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.8-rt10 #141 |[<c0014aa4>] (unwind_backtrace+0x0/0xf8) from [<c0012788>] (show_stack+0x1c/0x20) |[<c0012788>] (show_stack+0x1c/0x20) from [<c043c8dc>] (dump_stack+0x20/0x2c) |[<c043c8dc>] (dump_stack+0x20/0x2c) from [<c004c5e8>] (__might_sleep+0x13c/0x170) |[<c004c5e8>] (__might_sleep+0x13c/0x170) from [<c043f270>] (__rt_spin_lock+0x28/0x38) |[<c043f270>] (__rt_spin_lock+0x28/0x38) from [<c043fa00>] (rt_read_lock+0x68/0x7c) |[<c043fa00>] (rt_read_lock+0x68/0x7c) from [<c036cf74>] (led_trigger_event+0x2c/0x5c) |[<c036cf74>] (led_trigger_event+0x2c/0x5c) from [<c036e0bc>] (ledtrig_cpu+0x54/0x5c) |[<c036e0bc>] (ledtrig_cpu+0x54/0x5c) from [<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c) |[<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c) from [<c00590b8>] (cpu_startup_entry+0xa8/0x234) |[<c00590b8>] (cpu_startup_entry+0xa8/0x234) from [<c043b2cc>] (rest_init+0xb8/0xe0) |[<c043b2cc>] (rest_init+0xb8/0xe0) from [<c061ebe0>] (start_kernel+0x2c4/0x380) Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 171/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tty/serial/omap: Make the locking RT aware Date: Thu, 28 Jul 2011 13:32:57 +0200 The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and non-RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 172/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: tty/serial/pl011: Make the locking work on RT Date: Tue, 8 Jan 2013 21:36:51 +0100 The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and non-RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 173/191 [ Author: Yadi.hu Email: yadi.hu@windriver.com Subject: ARM: enable irq in translation/section permission fault handlers Date: Wed, 10 Dec 2014 10:32:09 +0800 Probably happens on all ARM, with CONFIG_PREEMPT_RT CONFIG_DEBUG_ATOMIC_SLEEP This simple program.... int main() { *((char*)0xc0001000) = 0; }; [ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658 [ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a [ 512.743217] INFO: lockdep is turned off. [ 512.743360] irq event stamp: 0 [ 512.743482] hardirqs last enabled at (0): [< (null)>] (null) [ 512.743714] hardirqs last disabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0 [ 512.744013] softirqs last enabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0 [ 512.744303] softirqs last disabled at (0): [< (null)>] (null) [ 512.744631] [<c041872c>] (unwind_backtrace+0x0/0x104) [ 512.745001] [<c09af0c4>] (dump_stack+0x20/0x24) [ 512.745355] [<c0462490>] (__might_sleep+0x1dc/0x1e0) [ 512.745717] [<c09b6770>] (rt_spin_lock+0x34/0x6c) [ 512.746073] [<c0441bf0>] (do_force_sig_info+0x34/0xf0) [ 512.746457] [<c0442668>] (force_sig_info+0x18/0x1c) [ 512.746829] [<c041d880>] (__do_user_fault+0x9c/0xd8) [ 512.747185] [<c041d938>] (do_bad_area+0x7c/0x94) [ 512.747536] [<c041d990>] (do_sect_fault+0x40/0x48) [ 512.747898] [<c040841c>] (do_DataAbort+0x40/0xa0) [ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8) Oxc0000000 belongs to kernel address space, user task can not be allowed to access it. For above condition, correct result is that test case should receive a “segment fault” and exits but not stacks. the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in prefetch/data abort handlers"),it deletes irq enable block in Data abort assemble code and move them into page/breakpiont/alignment fault handlers instead. But author does not enable irq in translation/section permission fault handlers. ARM disables irq when it enters exception/ interrupt mode, if kernel doesn't enable irq, it would be still disabled during translation/section permission fault. We see the above splat because do_force_sig_info is still called with IRQs off, and that code eventually does a: spin_lock_irqsave(&t->sighand->siglock, flags); As this is architecture independent code, and we've not seen any other need for other arch to have the siglock converted to raw lock, we can conclude that we should enable irq for ARM translation/section permission exception. Signed-off-by: Yadi.hu <yadi.hu@windriver.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 174/191 [ Author: Josh Cartwright Email: joshc@ni.com Subject: genirq: update irq_set_irqchip_state documentation Date: Thu, 11 Feb 2016 11:54:00 -0600 On -rt kernels, the use of migrate_disable()/migrate_enable() is sufficient to guarantee a task isn't moved to another CPU. Update the irq_set_irqchip_state() documentation to reflect this. Signed-off-by: Josh Cartwright <joshc@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 175/191 [ Author: Josh Cartwright Email: joshc@ni.com Subject: KVM: arm/arm64: downgrade preempt_disable()d region to migrate_disable() Date: Thu, 11 Feb 2016 11:54:01 -0600 kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating the vgic and timer states to prevent the calling task from migrating to another CPU. It does so to prevent the task from writing to the incorrect per-CPU GIC distributor registers. On -rt kernels, it's possible to maintain the same guarantee with the use of migrate_{disable,enable}(), with the added benefit that the migrate-disabled region is preemptible. Update kvm_arch_vcpu_ioctl_run() to do so. Cc: Christoffer Dall <christoffer.dall@linaro.org> Reported-by: Manish Jaggi <Manish.Jaggi@caviumnetworks.com> Signed-off-by: Josh Cartwright <joshc@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 176/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: arm64: fpsimd: Delay freeing memory in fpsimd_flush_thread() Date: Wed, 25 Jul 2018 14:02:38 +0200 fpsimd_flush_thread() invokes kfree() via sve_free() within a preempt disabled section which is not working on -RT. Delay freeing of memory until preemption is enabled again. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 177/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: x86: Enable RT also on 32bit Date: Thu, 7 Nov 2019 17:49:20 +0100 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 178/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: ARM: Allow to enable RT Date: Fri, 11 Oct 2019 13:14:29 +0200 Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 179/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: ARM64: Allow to enable RT Date: Fri, 11 Oct 2019 13:14:35 +0200 Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 180/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: powerpc: traps: Use PREEMPT_RT Date: Fri, 26 Jul 2019 11:30:49 +0200 Add PREEMPT_RT to the backtrace if enabled. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 181/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: powerpc/pseries/iommu: Use a locallock instead local_irq_save() Date: Tue, 26 Mar 2019 18:31:54 +0100 The locallock protects the per-CPU variable tce_page. The function attempts to allocate memory while tce_page is protected (by disabling interrupts). Use local_irq_save() instead of local_irq_disable(). Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 182/191 [ Author: Bogdan Purcareata Email: bogdan.purcareata@freescale.com Subject: powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT Date: Fri, 24 Apr 2015 15:53:13 +0000 While converting the openpic emulation code to use a raw_spinlock_t enables guests to run on RT, there's still a performance issue. For interrupts sent in directed delivery mode with a multiple CPU mask, the emulated openpic will loop through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop through all the pending interrupts for that VCPU. This is done while holding the raw_lock, meaning that in all this time the interrupts and preemption are disabled on the host Linux. A malicious user app can max both these number and cause a DoS. This temporary fix is sent for two reasons. First is so that users who want to use the in-kernel MPIC emulation are aware of the potential latencies, thus making sure that the hardware MPIC and their usage scenario does not involve interrupts sent in directed delivery mode, and the number of possible pending interrupts is kept small. Secondly, this should incentivize the development of a proper openpic emulation that would be better suited for RT. Acked-by: Scott Wood <scottwood@freescale.com> Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 183/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: powerpc/stackprotector: work around stack-guard init from atomic Date: Tue, 26 Mar 2019 18:31:29 +0100 This is invoked from the secondary CPU in atomic context. On x86 we use tsc instead. On Power we XOR it against mftb() so lets use stack address as the initial value. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 184/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: powerpc: Avoid recursive header includes Date: Fri, 8 Jan 2021 19:48:21 +0100 - The include of bug.h leads to an include of printk.h which gets back to spinlock.h and complains then about missing xchg(). Remove bug.h and add bits.h which is needed for BITS_PER_BYTE. - Avoid the "please don't include this file directly" error from rwlock-rt. Allow an include from/with rtmutex.h. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 185/191 [ Author: Sebastian Andrzej Siewior Email: bigeasy@linutronix.de Subject: POWERPC: Allow to enable RT Date: Fri, 11 Oct 2019 13:14:41 +0200 Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 186/191 [ Author: Mike Galbraith Email: umgwanakikbuti@gmail.com Subject: drivers/block/zram: Replace bit spinlocks with rtmutex for -rt Date: Thu, 31 Mar 2016 04:08:28 +0200 They're nondeterministic, and lead to ___might_sleep() splats in -rt. OTOH, they're a lot less wasteful than an rtmutex per page. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 187/191 [ Author: Haris Okanovic Email: haris.okanovic@ni.com Subject: tpm_tis: fix stall after iowrite*()s Date: Tue, 15 Aug 2017 15:13:08 -0500 ioread8() operations to TPM MMIO addresses can stall the cpu when immediately following a sequence of iowrite*()'s to the same region. For example, cyclitest measures ~400us latency spikes when a non-RT usermode application communicates with an SPI-based TPM chip (Intel Atom E3940 system, PREEMPT_RT kernel). The spikes are caused by a stalling ioread8() operation following a sequence of 30+ iowrite8()s to the same address. I believe this happens because the write sequence is buffered (in cpu or somewhere along the bus), and gets flushed on the first LOAD instruction (ioread*()) that follows. The enclosed change appears to fix this issue: read the TPM chip's access register (status code) after every iowrite*() operation to amortize the cost of flushing data to chip across multiple instructions. Signed-off-by: Haris Okanovic <haris.okanovic@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 188/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: signals: Allow RT tasks to cache one sigqueue struct Date: Fri, 3 Jul 2009 08:44:56 -0500 Allow realtime tasks to cache one sigqueue in task struct. This avoids an allocation which can cause latencies or fail. Ideally the sigqueue is cached after first sucessfull delivery and will be available for next signal delivery. This works under the assumption that the RT task has never an unprocessed singal while one is about to be queued. The caching is not used for SIGQUEUE_PREALLOC because this kind of sigqueue is handled differently (and not used for regular signal delivery). [bigeasy: With a fix from Matt Fleming <matt@codeblueprint.co.uk>] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ] 189/191 [ Author: Ingo Molnar Email: mingo@elte.hu Subject: genirq: Disable irqpoll on -rt Date: Fri, 3 Jul 2009 08:29:57 -0500 Creates long latencies for no value Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] 190/191 [ Author: Clark Williams Email: williams@redhat.com Subject: sysfs: Add /sys/kernel/realtime entry Date: Sat, 30 Jul 2011 21:55:53 -0500 Add a /sys/kernel entry to indicate that the kernel is a realtime kernel. Clark says that he needs this for udev rules, udev needs to evaluate if its a PREEMPT_RT kernel a few thousand times and parsing uname output is too slow or so. Are there better solutions? Should it exist and return 0 on !-rt? Signed-off-by: Clark Williams <williams@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> ] 191/191 [ Author: Thomas Gleixner Email: tglx@linutronix.de Subject: Add localversion for -RT release Date: Fri, 8 Jul 2011 20:25:16 +0200 Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ] Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
-rw-r--r--features/rt/ARM-Allow-to-enable-RT.patch35
-rw-r--r--features/rt/ARM-enable-irq-in-translation-section-permission-fau.patch94
-rw-r--r--features/rt/ARM64-Allow-to-enable-RT.patch35
-rw-r--r--features/rt/Add-localversion-for-RT-release.patch21
-rw-r--r--features/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch58
-rw-r--r--features/rt/PCI-hv-Use-tasklet_disable_in_atomic.patch45
-rw-r--r--features/rt/POWERPC-Allow-to-enable-RT.patch35
-rw-r--r--features/rt/arch-arm64-Add-lazy-preempt-support.patch170
-rw-r--r--features/rt/arm-Add-support-for-lazy-preemption.patch136
-rw-r--r--features/rt/arm64-fpsimd-Delay-freeing-memory-in-fpsimd_flush_th.patch65
-rw-r--r--features/rt/ath9k-Use-tasklet_disable_in_atomic.patch47
-rw-r--r--features/rt/atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch41
-rw-r--r--features/rt/block-mq-do-not-invoke-preempt_disable.patch39
-rw-r--r--features/rt/cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch49
-rw-r--r--features/rt/console-add-write_atomic-interface.patch162
-rw-r--r--features/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch328
-rw-r--r--features/rt/crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch84
-rw-r--r--features/rt/crypto-limit-more-FPU-enabled-sections.patch73
-rw-r--r--features/rt/debugobjects-Make-RT-aware.patch31
-rw-r--r--features/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch92
-rw-r--r--features/rt/drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch79
-rw-r--r--features/rt/drm-i915-disable-tracing-on-RT.patch46
-rw-r--r--features/rt/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch51
-rw-r--r--features/rt/drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch32
-rw-r--r--features/rt/drm-radeon-i915-Use-preempt_disable-enable_rt-where-.patch60
-rw-r--r--features/rt/efi-Allow-efi-runtime.patch31
-rw-r--r--features/rt/efi-Disable-runtime-services-on-RT.patch45
-rw-r--r--features/rt/firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch60
-rw-r--r--features/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch98
-rw-r--r--features/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch262
-rw-r--r--features/rt/fs-namespace-Use-cpu_chill-in-trylock-loops.patch43
-rw-r--r--features/rt/futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch117
-rw-r--r--features/rt/genirq-Disable-irqpoll-on-rt.patch42
-rw-r--r--features/rt/genirq-Move-prio-assignment-into-the-newly-created-t.patch61
-rw-r--r--features/rt/genirq-update-irq_set_irqchip_state-documentation.patch31
-rw-r--r--features/rt/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch84
-rw-r--r--features/rt/irqtime-Make-accounting-correct-on-RT.patch53
-rw-r--r--features/rt/irqwork-push-most-work-into-softirq-context.patch200
-rw-r--r--features/rt/jump-label-disable-if-stop_machine-is-used.patch41
-rw-r--r--features/rt/kconfig-Disable-config-options-which-are-not-RT-comp.patch29
-rw-r--r--features/rt/kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch109
-rw-r--r--features/rt/kernel-sched-add-put-get-_cpu_light.patch27
-rw-r--r--features/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch80
-rw-r--r--features/rt/kthread-Move-prio-affinite-change-into-the-newly-cre.patch85
-rw-r--r--features/rt/leds-trigger-disable-CPU-trigger-on-RT.patch39
-rw-r--r--features/rt/lockdep-Make-it-RT-aware.patch77
-rw-r--r--features/rt/lockdep-Reduce-header-files-in-debug_locks.h.patch32
-rw-r--r--features/rt/lockdep-disable-self-test.patch34
-rw-r--r--features/rt/lockdep-selftest-Only-do-hardirq-context-test-for-ra.patch61
-rw-r--r--features/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch148
-rw-r--r--features/rt/locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch125
-rw-r--r--features/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch165
-rw-r--r--features/rt/locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch36
-rw-r--r--features/rt/locking-rtmutex-Avoid-include-hell.patch29
-rw-r--r--features/rt/locking-rtmutex-Handle-the-various-new-futex-race-co.patch255
-rw-r--r--features/rt/locking-rtmutex-Make-lock_killable-work.patch49
-rw-r--r--features/rt/locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch59
-rw-r--r--features/rt/locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch144
-rw-r--r--features/rt/locking-rtmutex-Remove-cruft.patch98
-rw-r--r--features/rt/locking-rtmutex-Remove-output-from-deadlock-detector.patch311
-rw-r--r--features/rt/locking-rtmutex-Remove-rt_mutex_timed_lock.patch97
-rw-r--r--features/rt/locking-rtmutex-Use-custom-scheduling-function-for-s.patch242
-rw-r--r--features/rt/locking-rtmutex-add-mutex-implementation-based-on-rt.patch384
-rw-r--r--features/rt/locking-rtmutex-add-rwlock-implementation-based-on-r.patch557
-rw-r--r--features/rt/locking-rtmutex-add-rwsem-implementation-based-on-rt.patch454
-rw-r--r--features/rt/locking-rtmutex-add-sleeping-lock-implementation.patch1232
-rw-r--r--features/rt/locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch455
-rw-r--r--features/rt/locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch129
-rw-r--r--features/rt/locking-rtmutex-wire-up-RT-s-locking.patch346
-rw-r--r--features/rt/locking-spinlock-Split-the-lock-types-header.patch252
-rw-r--r--features/rt/locking-split-out-the-rbtree-definition.patch119
-rw-r--r--features/rt/md-raid5-Make-raid5_percpu-handling-RT-aware.patch68
-rw-r--r--features/rt/mm-Allow-only-SLUB-on-RT.patch46
-rw-r--r--features/rt/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch43
-rw-r--r--features/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch73
-rw-r--r--features/rt/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch143
-rw-r--r--features/rt/mm-memcontrol-Replace-local_irq_disable-with-local-l.patch122
-rw-r--r--features/rt/mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch210
-rw-r--r--features/rt/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch38
-rw-r--r--features/rt/mm-scatterlist-Do-not-disable-irqs-on-RT.patch29
-rw-r--r--features/rt/mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch602
-rw-r--r--features/rt/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch32
-rw-r--r--features/rt/mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch35
-rw-r--r--features/rt/mm-slub-Enable-irqs-for-__GFP_WAIT.patch76
-rw-r--r--features/rt/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch49
-rw-r--r--features/rt/mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch416
-rw-r--r--features/rt/mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch120
-rw-r--r--features/rt/mm-vmalloc-Another-preempt-disable-region-which-suck.patch72
-rw-r--r--features/rt/mm-vmstat-Protect-per-cpu-variables-with-preempt-dis.patch144
-rw-r--r--features/rt/mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch48
-rw-r--r--features/rt/mm-zsmalloc-copy-with-get_cpu_var-and-locking.patch211
-rw-r--r--features/rt/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch36
-rw-r--r--features/rt/net-Move-lockdep-where-it-belongs.patch46
-rw-r--r--features/rt/net-Properly-annotate-the-try-lock-for-the-seqlock.patch69
-rw-r--r--features/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch298
-rw-r--r--features/rt/net-Remove-preemption-disabling-in-netif_rx.patch67
-rw-r--r--features/rt/net-Use-skbufhead-with-raw-lock.patch73
-rw-r--r--features/rt/net-core-disable-NET_RX_BUSY_POLL-on-RT.patch43
-rw-r--r--features/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch40
-rw-r--r--features/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch41
-rw-r--r--features/rt/net-jme-Replace-link-change-tasklet-with-work.patch87
-rw-r--r--features/rt/net-sundance-Use-tasklet_disable_in_atomic.patch38
-rw-r--r--features/rt/notifier-Make-atomic_notifiers-use-raw_spinlock.patch131
-rw-r--r--features/rt/panic-skip-get_random_bytes-for-RT_FULL-in-init_oops.patch33
-rw-r--r--features/rt/pid.h-include-atomic.h.patch42
-rw-r--r--features/rt/powerpc-Add-support-for-lazy-preemption.patch215
-rw-r--r--features/rt/powerpc-Avoid-recursive-header-includes.patch47
-rw-r--r--features/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch44
-rw-r--r--features/rt/powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch43
-rw-r--r--features/rt/powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch116
-rw-r--r--features/rt/powerpc-stackprotector-work-around-stack-guard-init-.patch35
-rw-r--r--features/rt/powerpc-traps-Use-PREEMPT_RT.patch38
-rw-r--r--features/rt/preempt-Provide-preempt_-_-no-rt-variants.patch52
-rw-r--r--features/rt/printk-add-console-handover.patch75
-rw-r--r--features/rt/printk-add-pr_flush.patch210
-rw-r--r--features/rt/printk-add-syslog_lock.patch158
-rw-r--r--features/rt/printk-change-console_seq-to-atomic64_t.patch131
-rw-r--r--features/rt/printk-combine-boot_delay_msec-into-printk_delay.patch43
-rw-r--r--features/rt/printk-console-remove-unnecessary-safe-buffer-usage.patch47
-rw-r--r--features/rt/printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch146
-rw-r--r--features/rt/printk-convert-syslog_lock-to-spin_lock.patch118
-rw-r--r--features/rt/printk-introduce-CONSOLE_LOG_MAX-for-improved-multi-.patch94
-rw-r--r--features/rt/printk-introduce-a-kmsg_dump-iterator.patch560
-rw-r--r--features/rt/printk-introduce-kernel-sync-mode.patch308
-rw-r--r--features/rt/printk-kmsg_dump-remove-_nolock-variants.patch225
-rw-r--r--features/rt/printk-kmsg_dump-remove-unused-fields.patch42
-rw-r--r--features/rt/printk-kmsg_dump-use-kmsg_dump_rewind.patch41
-rw-r--r--features/rt/printk-limit-second-loop-of-syslog_print_all.patch55
-rw-r--r--features/rt/printk-move-console-printing-to-kthreads.patch846
-rw-r--r--features/rt/printk-refactor-kmsg_dump_get_buffer.patch144
-rw-r--r--features/rt/printk-relocate-printk_delay-and-vprintk_default.patch88
-rw-r--r--features/rt/printk-remove-deferred-printing.patch432
-rw-r--r--features/rt/printk-remove-logbuf_lock.patch485
-rw-r--r--features/rt/printk-remove-safe-buffers.patch875
-rw-r--r--features/rt/printk-track-limit-recursion.patch142
-rw-r--r--features/rt/printk-use-atomic64_t-for-devkmsg_user.seq.patch111
-rw-r--r--features/rt/printk-use-seqcount_latch-for-clear_seq.patch146
-rw-r--r--features/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch165
-rw-r--r--features/rt/ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch64
-rw-r--r--features/rt/random-Make-it-work-on-rt.patch185
-rw-r--r--features/rt/rcu-Delay-RCU-selftests.patch75
-rw-r--r--features/rt/rcu-Prevent-false-positive-softirq-warning-on-RT.patch34
-rw-r--r--features/rt/rcutorture-Avoid-problematic-critical-section-nestin.patch195
-rw-r--r--features/rt/rt-Add-local-irq-locks.patch210
-rw-r--r--features/rt/rt-Introduce-cpu_chill.patch121
-rw-r--r--features/rt/rt.scc191
-rw-r--r--features/rt/sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch115
-rw-r--r--features/rt/sched-Add-support-for-lazy-preemption.patch690
-rw-r--r--features/rt/sched-Disable-CONFIG_RT_GROUP_SCHED-on-RT.patch34
-rw-r--r--features/rt/sched-Disable-TTWU_QUEUE-on-RT.patch37
-rw-r--r--features/rt/sched-Do-not-account-rcu_preempt_depth-on-RT-in-migh.patch56
-rw-r--r--features/rt/sched-Limit-the-number-of-task-migrations-per-batch.patch32
-rw-r--r--features/rt/sched-Move-mmdrop-to-RCU-on-RT.patch114
-rw-r--r--features/rt/scsi-fcoe-Make-RT-aware.patch115
-rw-r--r--features/rt/serial-8250-implement-write_atomic.patch499
-rw-r--r--features/rt/shmem-Use-raw_spinlock_t-for-stat_lock.patch146
-rw-r--r--features/rt/signal-Revert-ptrace-preempt-magic.patch38
-rw-r--r--features/rt/signal-x86-Delay-calling-signals-in-atomic.patch139
-rw-r--r--features/rt/signals-Allow-RT-tasks-to-cache-one-sigqueue-struct.patch216
-rw-r--r--features/rt/smp-Wake-ksoftirqd-on-PREEMPT_RT-instead-do_softirq.patch47
-rw-r--r--features/rt/softirq-Add-RT-specific-softirq-accounting.patch74
-rw-r--r--features/rt/softirq-Check-preemption-after-reenabling-interrupts.patch150
-rw-r--r--features/rt/softirq-Disable-softirq-stacks-for-RT.patch174
-rw-r--r--features/rt/softirq-Make-softirq-control-and-processing-RT-aware.patch266
-rw-r--r--features/rt/softirq-Move-various-protections-into-inline-helpers.patch107
-rw-r--r--features/rt/sunrpc-Make-svc_xprt_do_enqueue-use-get_cpu_light.patch59
-rw-r--r--features/rt/sysfs-Add-sys-kernel-realtime-entry.patch53
-rw-r--r--features/rt/tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch108
-rw-r--r--features/rt/tasklets-Provide-tasklet_disable_in_atomic.patch67
-rw-r--r--features/rt/tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch34
-rw-r--r--features/rt/tasklets-Replace-spin-wait-in-tasklet_kill.patch73
-rw-r--r--features/rt/tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch89
-rw-r--r--features/rt/tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch34
-rw-r--r--features/rt/tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch32
-rw-r--r--features/rt/tasklets-Use-static-inlines-for-stub-implementations.patch34
-rw-r--r--features/rt/tcp-Remove-superfluous-BH-disable-around-listening_h.patch108
-rw-r--r--features/rt/tick-sched-Prevent-false-positive-softirq-pending-wa.patch83
-rw-r--r--features/rt/timers-Move-clearing-of-base-timer_running-under-bas.patch62
-rw-r--r--features/rt/tpm_tis-fix-stall-after-iowrite-s.patch83
-rw-r--r--features/rt/trace-Add-migrate-disabled-counter-to-tracing-output.patch122
-rw-r--r--features/rt/tty-serial-omap-Make-the-locking-RT-aware.patch48
-rw-r--r--features/rt/tty-serial-pl011-Make-the-locking-work-on-RT.patch59
-rw-r--r--features/rt/u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch151
-rw-r--r--features/rt/um-synchronize-kmsg_dumper.patch60
-rw-r--r--features/rt/wait.h-include-atomic.h.patch41
-rw-r--r--features/rt/x86-Allow-to-enable-RT.patch27
-rw-r--r--features/rt/x86-Enable-RT-also-on-32bit.patch33
-rw-r--r--features/rt/x86-Support-for-lazy-preemption.patch155
-rw-r--r--features/rt/x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch34
-rw-r--r--features/rt/x86-kvm-Require-const-tsc-for-RT.patch37
-rw-r--r--features/rt/x86-stackprotector-Avoid-random-pool-on-rt.patch50
-rw-r--r--features/rt/xfrm-Use-sequence-counter-with-associated-spinlock.patch65
192 files changed, 25158 insertions, 0 deletions
diff --git a/features/rt/ARM-Allow-to-enable-RT.patch b/features/rt/ARM-Allow-to-enable-RT.patch
new file mode 100644
index 00000000..cf78c0cb
--- /dev/null
+++ b/features/rt/ARM-Allow-to-enable-RT.patch
@@ -0,0 +1,35 @@
+From b543f40eada157accc05e9e419a6ffc0168647d6 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 11 Oct 2019 13:14:29 +0200
+Subject: [PATCH 178/191] ARM: Allow to enable RT
+
+Allow to select RT.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm/Kconfig | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index fe49c7cb76bd..876e4d478e57 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -31,6 +31,7 @@ config ARM
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
+ select ARCH_SUPPORTS_ATOMIC_RMW
++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ select ARCH_USE_BUILTIN_BSWAP
+ select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
+@@ -123,6 +124,7 @@ config ARM
+ select OLD_SIGSUSPEND3
+ select PCI_SYSCALL if PCI
+ select PERF_USE_VMALLOC
++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
+ select RTC_LIB
+ select SET_FS
+ select SYS_SUPPORTS_APM_EMULATION
+--
+2.19.1
+
diff --git a/features/rt/ARM-enable-irq-in-translation-section-permission-fau.patch b/features/rt/ARM-enable-irq-in-translation-section-permission-fau.patch
new file mode 100644
index 00000000..47dd0be4
--- /dev/null
+++ b/features/rt/ARM-enable-irq-in-translation-section-permission-fau.patch
@@ -0,0 +1,94 @@
+From ced2a927997124c1cceecd5da04917ad03992490 Mon Sep 17 00:00:00 2001
+From: "Yadi.hu" <yadi.hu@windriver.com>
+Date: Wed, 10 Dec 2014 10:32:09 +0800
+Subject: [PATCH 173/191] ARM: enable irq in translation/section permission
+ fault handlers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Probably happens on all ARM, with
+CONFIG_PREEMPT_RT
+CONFIG_DEBUG_ATOMIC_SLEEP
+
+This simple program....
+
+int main() {
+ *((char*)0xc0001000) = 0;
+};
+
+[ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
+[ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a
+[ 512.743217] INFO: lockdep is turned off.
+[ 512.743360] irq event stamp: 0
+[ 512.743482] hardirqs last enabled at (0): [< (null)>] (null)
+[ 512.743714] hardirqs last disabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
+[ 512.744013] softirqs last enabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
+[ 512.744303] softirqs last disabled at (0): [< (null)>] (null)
+[ 512.744631] [<c041872c>] (unwind_backtrace+0x0/0x104)
+[ 512.745001] [<c09af0c4>] (dump_stack+0x20/0x24)
+[ 512.745355] [<c0462490>] (__might_sleep+0x1dc/0x1e0)
+[ 512.745717] [<c09b6770>] (rt_spin_lock+0x34/0x6c)
+[ 512.746073] [<c0441bf0>] (do_force_sig_info+0x34/0xf0)
+[ 512.746457] [<c0442668>] (force_sig_info+0x18/0x1c)
+[ 512.746829] [<c041d880>] (__do_user_fault+0x9c/0xd8)
+[ 512.747185] [<c041d938>] (do_bad_area+0x7c/0x94)
+[ 512.747536] [<c041d990>] (do_sect_fault+0x40/0x48)
+[ 512.747898] [<c040841c>] (do_DataAbort+0x40/0xa0)
+[ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8)
+
+Oxc0000000 belongs to kernel address space, user task can not be
+allowed to access it. For above condition, correct result is that
+test case should receive a “segment fault” and exits but not stacks.
+
+the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in
+prefetch/data abort handlers"),it deletes irq enable block in Data
+abort assemble code and move them into page/breakpiont/alignment fault
+handlers instead. But author does not enable irq in translation/section
+permission fault handlers. ARM disables irq when it enters exception/
+interrupt mode, if kernel doesn't enable irq, it would be still disabled
+during translation/section permission fault.
+
+We see the above splat because do_force_sig_info is still called with
+IRQs off, and that code eventually does a:
+
+ spin_lock_irqsave(&t->sighand->siglock, flags);
+
+As this is architecture independent code, and we've not seen any other
+need for other arch to have the siglock converted to raw lock, we can
+conclude that we should enable irq for ARM translation/section
+permission exception.
+
+Signed-off-by: Yadi.hu <yadi.hu@windriver.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm/mm/fault.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index da78740faf7b..07f79e533a29 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ if (addr < TASK_SIZE)
+ return do_page_fault(addr, fsr, regs);
+
++ if (interrupts_enabled(regs))
++ local_irq_enable();
++
+ if (user_mode(regs))
+ goto bad_area;
+
+@@ -479,6 +482,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ static int
+ do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
++ if (interrupts_enabled(regs))
++ local_irq_enable();
++
+ do_bad_area(addr, fsr, regs);
+ return 0;
+ }
+--
+2.19.1
+
diff --git a/features/rt/ARM64-Allow-to-enable-RT.patch b/features/rt/ARM64-Allow-to-enable-RT.patch
new file mode 100644
index 00000000..3f265dc0
--- /dev/null
+++ b/features/rt/ARM64-Allow-to-enable-RT.patch
@@ -0,0 +1,35 @@
+From 49cc082d1b5e913dd79529b9fbed93801c11af68 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 11 Oct 2019 13:14:35 +0200
+Subject: [PATCH 179/191] ARM64: Allow to enable RT
+
+Allow to select RT.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm64/Kconfig | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index 93a4dd767825..9cc92459812c 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -78,6 +78,7 @@ config ARM64
+ select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
+ select ARCH_SUPPORTS_NUMA_BALANCING
++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+ select ARCH_WANT_DEFAULT_BPF_JIT
+ select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+@@ -203,6 +204,7 @@ config ARM64
+ select PCI_DOMAINS_GENERIC if PCI
+ select PCI_ECAM if (ACPI && PCI)
+ select PCI_SYSCALL if PCI
++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
+ select POWER_RESET
+ select POWER_SUPPLY
+ select SPARSE_IRQ
+--
+2.19.1
+
diff --git a/features/rt/Add-localversion-for-RT-release.patch b/features/rt/Add-localversion-for-RT-release.patch
new file mode 100644
index 00000000..f18bce5a
--- /dev/null
+++ b/features/rt/Add-localversion-for-RT-release.patch
@@ -0,0 +1,21 @@
+From 98e2d66a6a93107c8552dfa1023d83c9f757dfd5 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 8 Jul 2011 20:25:16 +0200
+Subject: [PATCH 191/191] Add localversion for -RT release
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ localversion-rt | 1 +
+ 1 file changed, 1 insertion(+)
+ create mode 100644 localversion-rt
+
+diff --git a/localversion-rt b/localversion-rt
+new file mode 100644
+index 000000000000..6f206be67cd2
+--- /dev/null
++++ b/localversion-rt
+@@ -0,0 +1 @@
++-rt1
+--
+2.19.1
+
diff --git a/features/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch b/features/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch
new file mode 100644
index 00000000..b42375b5
--- /dev/null
+++ b/features/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch
@@ -0,0 +1,58 @@
+From 67396da80e5247521bb3a295c0a17ea970b143d6 Mon Sep 17 00:00:00 2001
+From: Josh Cartwright <joshc@ni.com>
+Date: Thu, 11 Feb 2016 11:54:01 -0600
+Subject: [PATCH 175/191] KVM: arm/arm64: downgrade preempt_disable()d region
+ to migrate_disable()
+
+kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating
+the vgic and timer states to prevent the calling task from migrating to
+another CPU. It does so to prevent the task from writing to the
+incorrect per-CPU GIC distributor registers.
+
+On -rt kernels, it's possible to maintain the same guarantee with the
+use of migrate_{disable,enable}(), with the added benefit that the
+migrate-disabled region is preemptible. Update
+kvm_arch_vcpu_ioctl_run() to do so.
+
+Cc: Christoffer Dall <christoffer.dall@linaro.org>
+Reported-by: Manish Jaggi <Manish.Jaggi@caviumnetworks.com>
+Signed-off-by: Josh Cartwright <joshc@ni.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm64/kvm/arm.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
+index 7f06ba76698d..f9318256d6fb 100644
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -737,7 +737,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ * involves poking the GIC, which must be done in a
+ * non-preemptible context.
+ */
+- preempt_disable();
++ migrate_disable();
+
+ kvm_pmu_flush_hwstate(vcpu);
+
+@@ -786,7 +786,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ kvm_timer_sync_user(vcpu);
+ kvm_vgic_sync_hwstate(vcpu);
+ local_irq_enable();
+- preempt_enable();
++ migrate_enable();
+ continue;
+ }
+
+@@ -858,7 +858,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ /* Exit types that need handling before we can be preempted */
+ handle_exit_early(vcpu, ret);
+
+- preempt_enable();
++ migrate_enable();
+
+ /*
+ * The ARMv8 architecture doesn't give the hypervisor
+--
+2.19.1
+
diff --git a/features/rt/PCI-hv-Use-tasklet_disable_in_atomic.patch b/features/rt/PCI-hv-Use-tasklet_disable_in_atomic.patch
new file mode 100644
index 00000000..497e17e0
--- /dev/null
+++ b/features/rt/PCI-hv-Use-tasklet_disable_in_atomic.patch
@@ -0,0 +1,45 @@
+From 9fc84c65954200bfb07bc299f48f8fed7905ba92 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 9 Mar 2021 09:42:15 +0100
+Subject: [PATCH 052/191] PCI: hv: Use tasklet_disable_in_atomic()
+
+The hv_compose_msi_msg() callback in irq_chip::irq_compose_msi_msg is
+invoked via irq_chip_compose_msi_msg(), which itself is always invoked from
+atomic contexts from the guts of the interrupt core code.
+
+There is no way to change this w/o rewriting the whole driver, so use
+tasklet_disable_in_atomic() which allows to make tasklet_disable()
+sleepable once the remaining atomic users are addressed.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: "K. Y. Srinivasan" <kys@microsoft.com>
+Cc: Haiyang Zhang <haiyangz@microsoft.com>
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
+Cc: Wei Liu <wei.liu@kernel.org>
+Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Cc: Rob Herring <robh@kernel.org>
+Cc: Bjorn Helgaas <bhelgaas@google.com>
+Cc: linux-hyperv@vger.kernel.org
+Cc: linux-pci@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/pci/controller/pci-hyperv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
+index 27a17a1e4a7c..a313708bcf75 100644
+--- a/drivers/pci/controller/pci-hyperv.c
++++ b/drivers/pci/controller/pci-hyperv.c
+@@ -1458,7 +1458,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ * Prevents hv_pci_onchannelcallback() from running concurrently
+ * in the tasklet.
+ */
+- tasklet_disable(&channel->callback_event);
++ tasklet_disable_in_atomic(&channel->callback_event);
+
+ /*
+ * Since this function is called with IRQ locks held, can't
+--
+2.19.1
+
diff --git a/features/rt/POWERPC-Allow-to-enable-RT.patch b/features/rt/POWERPC-Allow-to-enable-RT.patch
new file mode 100644
index 00000000..b0222f0a
--- /dev/null
+++ b/features/rt/POWERPC-Allow-to-enable-RT.patch
@@ -0,0 +1,35 @@
+From 76fabe2ab5dd6e98cc51f7289944552db0f5480e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 11 Oct 2019 13:14:41 +0200
+Subject: [PATCH 185/191] POWERPC: Allow to enable RT
+
+Allow to select RT.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/powerpc/Kconfig | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index bbee9b2f2bc7..a4fe480b4e9d 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -147,6 +147,7 @@ config PPC
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64
++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ select ARCH_USE_BUILTIN_BSWAP
+ select ARCH_USE_CMPXCHG_LOCKREF if PPC64
+ select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS
+@@ -240,6 +241,7 @@ config PPC
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select HAVE_IRQ_TIME_ACCOUNTING
++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
+ select HAVE_RSEQ
+ select IOMMU_HELPER if PPC64
+ select IRQ_DOMAIN
+--
+2.19.1
+
diff --git a/features/rt/arch-arm64-Add-lazy-preempt-support.patch b/features/rt/arch-arm64-Add-lazy-preempt-support.patch
new file mode 100644
index 00000000..cc7a56d7
--- /dev/null
+++ b/features/rt/arch-arm64-Add-lazy-preempt-support.patch
@@ -0,0 +1,170 @@
+From f4aa6daf2286c2e06200d7909bb812c7ceb09e69 Mon Sep 17 00:00:00 2001
+From: Anders Roxell <anders.roxell@linaro.org>
+Date: Thu, 14 May 2015 17:52:17 +0200
+Subject: [PATCH 168/191] arch/arm64: Add lazy preempt support
+
+arm64 is missing support for PREEMPT_RT. The main feature which is
+lacking is support for lazy preemption. The arch-specific entry code,
+thread information structure definitions, and associated data tables
+have to be extended to provide this support. Then the Kconfig file has
+to be extended to indicate the support is available, and also to
+indicate that support for full RT preemption is now available.
+
+Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
+---
+ arch/arm64/Kconfig | 1 +
+ arch/arm64/include/asm/preempt.h | 25 ++++++++++++++++++++++++-
+ arch/arm64/include/asm/thread_info.h | 8 +++++++-
+ arch/arm64/kernel/asm-offsets.c | 1 +
+ arch/arm64/kernel/entry.S | 13 +++++++++++--
+ arch/arm64/kernel/signal.c | 2 +-
+ 6 files changed, 45 insertions(+), 5 deletions(-)
+
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index 5656e7aacd69..93a4dd767825 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -182,6 +182,7 @@ config ARM64
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_REGS_AND_STACK_ACCESS_API
++ select HAVE_PREEMPT_LAZY
+ select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_FUTEX_CMPXCHG if FUTEX
+ select MMU_GATHER_RCU_TABLE_FREE
+diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
+index f06a23898540..994f997b1572 100644
+--- a/arch/arm64/include/asm/preempt.h
++++ b/arch/arm64/include/asm/preempt.h
+@@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_and_test(void)
+ * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+ * pair.
+ */
+- return !pc || !READ_ONCE(ti->preempt_count);
++ if (!pc || !READ_ONCE(ti->preempt_count))
++ return true;
++#ifdef CONFIG_PREEMPT_LAZY
++ if ((pc & ~PREEMPT_NEED_RESCHED))
++ return false;
++ if (current_thread_info()->preempt_lazy_count)
++ return false;
++ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
++ return false;
++#endif
+ }
+
+ static inline bool should_resched(int preempt_offset)
+ {
++#ifdef CONFIG_PREEMPT_LAZY
++ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
++ if (pc == preempt_offset)
++ return true;
++
++ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
++ return false;
++
++ if (current_thread_info()->preempt_lazy_count)
++ return false;
++ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ return pc == preempt_offset;
++#endif
+ }
+
+ #ifdef CONFIG_PREEMPTION
+diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
+index 9f4e3b266f21..d3fa570c7235 100644
+--- a/arch/arm64/include/asm/thread_info.h
++++ b/arch/arm64/include/asm/thread_info.h
+@@ -26,6 +26,7 @@ struct thread_info {
+ #ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ u64 ttbr0; /* saved TTBR0_EL1 */
+ #endif
++ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
+ union {
+ u64 preempt_count; /* 0 => preemptible, <0 => bug */
+ struct {
+@@ -65,6 +66,7 @@ void arch_release_task_struct(struct task_struct *tsk);
+ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
+ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
+ #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY 7
+ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */
+ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
+ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
+@@ -95,8 +97,10 @@ void arch_release_task_struct(struct task_struct *tsk);
+ #define _TIF_SVE (1 << TIF_SVE)
+ #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
+ #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
+
+-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++ _TIF_SIGPENDING | \
+ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
+ _TIF_NOTIFY_SIGNAL)
+@@ -105,6 +109,8 @@ void arch_release_task_struct(struct task_struct *tsk);
+ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_EMU)
+
++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
++
+ #ifdef CONFIG_SHADOW_CALL_STACK
+ #define INIT_SCS \
+ .scs_base = init_shadow_call_stack, \
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index a36e2fc330d4..b94354a3af96 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -30,6 +30,7 @@ int main(void)
+ BLANK();
+ DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
+ DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
++ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count));
+ #ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+ #endif
+diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
+index a31a0a713c85..63e1ad8c2ea8 100644
+--- a/arch/arm64/kernel/entry.S
++++ b/arch/arm64/kernel/entry.S
+@@ -678,9 +678,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ mrs x0, daif
+ orr x24, x24, x0
+ alternative_else_nop_endif
+- cbnz x24, 1f // preempt count != 0 || NMI return path
+- bl arm64_preempt_schedule_irq // irq en/disable is done inside
++
++ cbz x24, 1f // (need_resched + count) == 0
++ cbnz w24, 2f // count != 0
++
++ ldr w24, [tsk, #TSK_TI_PREEMPT_LAZY] // get preempt lazy count
++ cbnz w24, 2f // preempt lazy count != 0
++
++ ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
++ tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
+ 1:
++ bl arm64_preempt_schedule_irq // irq en/disable is done inside
++2:
+ #endif
+
+ mov x0, sp
+diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
+index 6237486ff6bb..ab411f336c39 100644
+--- a/arch/arm64/kernel/signal.c
++++ b/arch/arm64/kernel/signal.c
+@@ -915,7 +915,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
+ unsigned long thread_flags)
+ {
+ do {
+- if (thread_flags & _TIF_NEED_RESCHED) {
++ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
+ /* Unmask Debug and SError for the next task */
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+--
+2.19.1
+
diff --git a/features/rt/arm-Add-support-for-lazy-preemption.patch b/features/rt/arm-Add-support-for-lazy-preemption.patch
new file mode 100644
index 00000000..b6e2718c
--- /dev/null
+++ b/features/rt/arm-Add-support-for-lazy-preemption.patch
@@ -0,0 +1,136 @@
+From 71f7a0c901c01218f119d2e16cb22e1069a61900 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 31 Oct 2012 12:04:11 +0100
+Subject: [PATCH 166/191] arm: Add support for lazy preemption
+
+Implement the arm pieces for lazy preempt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig | 1 +
+ arch/arm/include/asm/thread_info.h | 6 +++++-
+ arch/arm/kernel/asm-offsets.c | 1 +
+ arch/arm/kernel/entry-armv.S | 19 ++++++++++++++++---
+ arch/arm/kernel/signal.c | 3 ++-
+ 5 files changed, 25 insertions(+), 5 deletions(-)
+
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index 5da96f5df48f..5179eb3a35d5 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -107,6 +107,7 @@ config ARM
+ select HAVE_PERF_EVENTS
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
++ select HAVE_PREEMPT_LAZY
+ select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
+ select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RSEQ
+diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
+index 70d4cbc49ae1..b86418b4dfef 100644
+--- a/arch/arm/include/asm/thread_info.h
++++ b/arch/arm/include/asm/thread_info.h
+@@ -54,6 +54,7 @@ struct cpu_context_save {
+ struct thread_info {
+ unsigned long flags; /* low level flags */
+ int preempt_count; /* 0 => preemptable, <0 => bug */
++ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
+ mm_segment_t addr_limit; /* address limit */
+ struct task_struct *task; /* main task structure */
+ __u32 cpu; /* cpu */
+@@ -146,6 +147,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
+ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */
+ #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY 9
+
+ #define TIF_USING_IWMMXT 17
+ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */
+@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP)
+ #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
+
+ /* Checks for any syscall work in entry-common.S */
+@@ -169,7 +172,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ /*
+ * Change these and you break ASM code in entry-common.S
+ */
+-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++ _TIF_SIGPENDING | \
+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ _TIF_NOTIFY_SIGNAL)
+
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index be8050b0c3df..884e40a525ce 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -42,6 +42,7 @@ int main(void)
+ BLANK();
+ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+ DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
++ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
+ DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
+ DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
+index 0ea8529a4872..fa0d155d21b3 100644
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -206,11 +206,18 @@ __irq_svc:
+
+ #ifdef CONFIG_PREEMPTION
+ ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
+- ldr r0, [tsk, #TI_FLAGS] @ get flags
+ teq r8, #0 @ if preempt count != 0
++ bne 1f @ return from exeption
++ ldr r0, [tsk, #TI_FLAGS] @ get flags
++ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
++ blne svc_preempt @ preempt!
++
++ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
++ teq r8, #0 @ if preempt lazy count != 0
+ movne r0, #0 @ force flags to 0
+- tst r0, #_TIF_NEED_RESCHED
++ tst r0, #_TIF_NEED_RESCHED_LAZY
+ blne svc_preempt
++1:
+ #endif
+
+ svc_exit r5, irq = 1 @ return from exception
+@@ -225,8 +232,14 @@ svc_preempt:
+ 1: bl preempt_schedule_irq @ irq en/disable is done inside
+ ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
+ tst r0, #_TIF_NEED_RESCHED
++ bne 1b
++ tst r0, #_TIF_NEED_RESCHED_LAZY
+ reteq r8 @ go again
+- b 1b
++ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
++ teq r0, #0 @ if preempt lazy count != 0
++ beq 1b
++ ret r8 @ go again
++
+ #endif
+
+ __und_fault:
+diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
+index a3a38d0a4c85..f04ccf19ab1f 100644
+--- a/arch/arm/kernel/signal.c
++++ b/arch/arm/kernel/signal.c
+@@ -649,7 +649,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+ */
+ trace_hardirqs_off();
+ do {
+- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
++ if (likely(thread_flags & (_TIF_NEED_RESCHED |
++ _TIF_NEED_RESCHED_LAZY))) {
+ schedule();
+ } else {
+ if (unlikely(!user_mode(regs)))
+--
+2.19.1
+
diff --git a/features/rt/arm64-fpsimd-Delay-freeing-memory-in-fpsimd_flush_th.patch b/features/rt/arm64-fpsimd-Delay-freeing-memory-in-fpsimd_flush_th.patch
new file mode 100644
index 00000000..75e9f204
--- /dev/null
+++ b/features/rt/arm64-fpsimd-Delay-freeing-memory-in-fpsimd_flush_th.patch
@@ -0,0 +1,65 @@
+From c17705510939368390130106b37160bb2ff08e1c Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 25 Jul 2018 14:02:38 +0200
+Subject: [PATCH 176/191] arm64: fpsimd: Delay freeing memory in
+ fpsimd_flush_thread()
+
+fpsimd_flush_thread() invokes kfree() via sve_free() within a preempt disabled
+section which is not working on -RT.
+
+Delay freeing of memory until preemption is enabled again.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm64/kernel/fpsimd.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
+index 062b21f30f94..0ea2df6554e5 100644
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -226,6 +226,16 @@ static void sve_free(struct task_struct *task)
+ __sve_free(task);
+ }
+
++static void *sve_free_atomic(struct task_struct *task)
++{
++ void *sve_state = task->thread.sve_state;
++
++ WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
++
++ task->thread.sve_state = NULL;
++ return sve_state;
++}
++
+ /*
+ * TIF_SVE controls whether a task can use SVE without trapping while
+ * in userspace, and also the way a task's FPSIMD/SVE state is stored
+@@ -1022,6 +1032,7 @@ void fpsimd_thread_switch(struct task_struct *next)
+ void fpsimd_flush_thread(void)
+ {
+ int vl, supported_vl;
++ void *mem = NULL;
+
+ if (!system_supports_fpsimd())
+ return;
+@@ -1034,7 +1045,7 @@ void fpsimd_flush_thread(void)
+
+ if (system_supports_sve()) {
+ clear_thread_flag(TIF_SVE);
+- sve_free(current);
++ mem = sve_free_atomic(current);
+
+ /*
+ * Reset the task vector length as required.
+@@ -1068,6 +1079,7 @@ void fpsimd_flush_thread(void)
+ }
+
+ put_cpu_fpsimd_context();
++ kfree(mem);
+ }
+
+ /*
+--
+2.19.1
+
diff --git a/features/rt/ath9k-Use-tasklet_disable_in_atomic.patch b/features/rt/ath9k-Use-tasklet_disable_in_atomic.patch
new file mode 100644
index 00000000..d14560b6
--- /dev/null
+++ b/features/rt/ath9k-Use-tasklet_disable_in_atomic.patch
@@ -0,0 +1,47 @@
+From 0fdf02e085f667a14612fe8555ffffcdc7b3044e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 9 Mar 2021 09:42:13 +0100
+Subject: [PATCH 050/191] ath9k: Use tasklet_disable_in_atomic()
+
+All callers of ath9k_beacon_ensure_primary_slot() are preemptible /
+acquire a mutex except for this callchain:
+
+ spin_lock_bh(&sc->sc_pcu_lock);
+ ath_complete_reset()
+ -> ath9k_calculate_summary_state()
+ -> ath9k_beacon_ensure_primary_slot()
+
+It's unclear how that can be distangled, so use tasklet_disable_in_atomic()
+for now. This allows tasklet_disable() to become sleepable once the
+remaining atomic users are cleaned up.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: ath9k-devel@qca.qualcomm.com
+Cc: Kalle Valo <kvalo@codeaurora.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: linux-wireless@vger.kernel.org
+Cc: netdev@vger.kernel.org
+Acked-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/net/wireless/ath/ath9k/beacon.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
+index 71e2ada86793..72e2e71aac0e 100644
+--- a/drivers/net/wireless/ath/ath9k/beacon.c
++++ b/drivers/net/wireless/ath/ath9k/beacon.c
+@@ -251,7 +251,7 @@ void ath9k_beacon_ensure_primary_slot(struct ath_softc *sc)
+ int first_slot = ATH_BCBUF;
+ int slot;
+
+- tasklet_disable(&sc->bcon_tasklet);
++ tasklet_disable_in_atomic(&sc->bcon_tasklet);
+
+ /* Find first taken slot. */
+ for (slot = 0; slot < ATH_BCBUF; slot++) {
+--
+2.19.1
+
diff --git a/features/rt/atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch b/features/rt/atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch
new file mode 100644
index 00000000..475b8b02
--- /dev/null
+++ b/features/rt/atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch
@@ -0,0 +1,41 @@
+From 3a6d1acf6c1ff722cb9479fba99a7ea0c6c50ee7 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 9 Mar 2021 09:42:14 +0100
+Subject: [PATCH 051/191] atm: eni: Use tasklet_disable_in_atomic() in the
+ send() callback
+
+The atmdev_ops::send callback which calls tasklet_disable() is invoked with
+bottom halfs disabled from net_device_ops::ndo_start_xmit(). All other
+invocations of tasklet_disable() in this driver happen in preemptible
+context.
+
+Change the send() call to use tasklet_disable_in_atomic() which allows
+tasklet_disable() to be made sleepable once the remaining atomic context
+usage sites are cleaned up.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Chas Williams <3chas3@gmail.com>
+Cc: linux-atm-general@lists.sourceforge.net
+Cc: netdev@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/atm/eni.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
+index b574cce98dc3..422753d52244 100644
+--- a/drivers/atm/eni.c
++++ b/drivers/atm/eni.c
+@@ -2054,7 +2054,7 @@ static int eni_send(struct atm_vcc *vcc,struct sk_buff *skb)
+ }
+ submitted++;
+ ATM_SKB(skb)->vcc = vcc;
+- tasklet_disable(&ENI_DEV(vcc->dev)->task);
++ tasklet_disable_in_atomic(&ENI_DEV(vcc->dev)->task);
+ res = do_tx(skb);
+ tasklet_enable(&ENI_DEV(vcc->dev)->task);
+ if (res == enq_ok) return 0;
+--
+2.19.1
+
diff --git a/features/rt/block-mq-do-not-invoke-preempt_disable.patch b/features/rt/block-mq-do-not-invoke-preempt_disable.patch
new file mode 100644
index 00000000..c869f8a1
--- /dev/null
+++ b/features/rt/block-mq-do-not-invoke-preempt_disable.patch
@@ -0,0 +1,39 @@
+From daef692486de5a4273e1fdff8121fca46992bc5b Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 14 Jul 2015 14:26:34 +0200
+Subject: [PATCH 134/191] block/mq: do not invoke preempt_disable()
+
+preempt_disable() and get_cpu() don't play well together with the sleeping
+locks it tries to allocate later.
+It seems to be enough to replace it with get_cpu_light() and migrate_disable().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ block/blk-mq.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/block/blk-mq.c b/block/blk-mq.c
+index d4d7c1caa439..2003865f725b 100644
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -1560,14 +1560,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
+ return;
+
+ if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
+- int cpu = get_cpu();
++ int cpu = get_cpu_light();
+ if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+ __blk_mq_run_hw_queue(hctx);
+- put_cpu();
++ put_cpu_light();
+ return;
+ }
+
+- put_cpu();
++ put_cpu_light();
+ }
+
+ kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
+--
+2.19.1
+
diff --git a/features/rt/cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch b/features/rt/cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch
new file mode 100644
index 00000000..a215e848
--- /dev/null
+++ b/features/rt/cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch
@@ -0,0 +1,49 @@
+From 79683d22afc9a06abcf0b901d55a36b471da6a43 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 3 Jul 2018 18:19:48 +0200
+Subject: [PATCH 035/191] cgroup: use irqsave in cgroup_rstat_flush_locked()
+
+All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock
+either with spin_lock_irq() or spin_lock_irqsave().
+cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which
+is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated()
+in IRQ context and therefore requires _irqsave() locking suffix in
+cgroup_rstat_flush_locked().
+Since there is no difference between spin_lock_t and raw_spin_lock_t
+on !RT lockdep does not complain here. On RT lockdep complains because
+the interrupts were not disabled here and a deadlock is possible.
+
+Acquire the raw_spin_lock_t with disabled interrupts.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/cgroup/rstat.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
+index d51175cedfca..b424f3157b34 100644
+--- a/kernel/cgroup/rstat.c
++++ b/kernel/cgroup/rstat.c
+@@ -149,8 +149,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
+ raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
+ cpu);
+ struct cgroup *pos = NULL;
++ unsigned long flags;
+
+- raw_spin_lock(cpu_lock);
++ raw_spin_lock_irqsave(cpu_lock, flags);
+ while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
+ struct cgroup_subsys_state *css;
+
+@@ -162,7 +163,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
+ css->ss->css_rstat_flush(css, cpu);
+ rcu_read_unlock();
+ }
+- raw_spin_unlock(cpu_lock);
++ raw_spin_unlock_irqrestore(cpu_lock, flags);
+
+ /* if @may_sleep, play nice and yield if necessary */
+ if (may_sleep && (need_resched() ||
+--
+2.19.1
+
diff --git a/features/rt/console-add-write_atomic-interface.patch b/features/rt/console-add-write_atomic-interface.patch
new file mode 100644
index 00000000..3495b552
--- /dev/null
+++ b/features/rt/console-add-write_atomic-interface.patch
@@ -0,0 +1,162 @@
+From 92323b9fa8b3aad4e8ba5e08e23fdf18e9ad1545 Mon Sep 17 00:00:00 2001
+From: John Ogness <john.ogness@linutronix.de>
+Date: Mon, 30 Nov 2020 01:42:01 +0106
+Subject: [PATCH 024/191] console: add write_atomic interface
+
+Add a write_atomic() callback to the console. This is an optional
+function for console drivers. The function must be atomic (including
+NMI safe) for writing to the console.
+
+Console drivers must still implement the write() callback. The
+write_atomic() callback will only be used in special situations,
+such as when the kernel panics.
+
+Creating an NMI safe write_atomic() that must synchronize with
+write() requires a careful implementation of the console driver. To
+aid with the implementation, a set of console_atomic_*() functions
+are provided:
+
+ void console_atomic_lock(unsigned int *flags);
+ void console_atomic_unlock(unsigned int flags);
+
+These functions synchronize using a processor-reentrant spinlock
+(called a cpulock).
+
+Signed-off-by: John Ogness <john.ogness@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/console.h | 4 ++
+ kernel/printk/printk.c | 100 ++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 104 insertions(+)
+
+diff --git a/include/linux/console.h b/include/linux/console.h
+index 20874db50bc8..919c8d43d986 100644
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -140,6 +140,7 @@ static inline int con_debug_leave(void)
+ struct console {
+ char name[16];
+ void (*write)(struct console *, const char *, unsigned);
++ void (*write_atomic)(struct console *co, const char *s, unsigned int count);
+ int (*read)(struct console *, char *, unsigned);
+ struct tty_driver *(*device)(struct console *, int *);
+ void (*unblank)(void);
+@@ -229,4 +230,7 @@ extern void console_init(void);
+ void dummycon_register_output_notifier(struct notifier_block *nb);
+ void dummycon_unregister_output_notifier(struct notifier_block *nb);
+
++extern void console_atomic_lock(unsigned int *flags);
++extern void console_atomic_unlock(unsigned int flags);
++
+ #endif /* _LINUX_CONSOLE_H */
+diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
+index 3e0b9bf28e12..5af6f757818f 100644
+--- a/kernel/printk/printk.c
++++ b/kernel/printk/printk.c
+@@ -3546,3 +3546,103 @@ void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
+ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
+
+ #endif
++
++struct prb_cpulock {
++ atomic_t owner;
++ unsigned long __percpu *irqflags;
++};
++
++#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \
++static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \
++static struct prb_cpulock name = { \
++ .owner = ATOMIC_INIT(-1), \
++ .irqflags = &_##name##_percpu_irqflags, \
++}
++
++static bool __prb_trylock(struct prb_cpulock *cpu_lock,
++ unsigned int *cpu_store)
++{
++ unsigned long *flags;
++ unsigned int cpu;
++
++ cpu = get_cpu();
++
++ *cpu_store = atomic_read(&cpu_lock->owner);
++ /* memory barrier to ensure the current lock owner is visible */
++ smp_rmb();
++ if (*cpu_store == -1) {
++ flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
++ local_irq_save(*flags);
++ if (atomic_try_cmpxchg_acquire(&cpu_lock->owner,
++ cpu_store, cpu)) {
++ return true;
++ }
++ local_irq_restore(*flags);
++ } else if (*cpu_store == cpu) {
++ return true;
++ }
++
++ put_cpu();
++ return false;
++}
++
++/*
++ * prb_lock: Perform a processor-reentrant spin lock.
++ * @cpu_lock: A pointer to the lock object.
++ * @cpu_store: A "flags" pointer to store lock status information.
++ *
++ * If no processor has the lock, the calling processor takes the lock and
++ * becomes the owner. If the calling processor is already the owner of the
++ * lock, this function succeeds immediately. If lock is locked by another
++ * processor, this function spins until the calling processor becomes the
++ * owner.
++ *
++ * It is safe to call this function from any context and state.
++ */
++static void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store)
++{
++ for (;;) {
++ if (__prb_trylock(cpu_lock, cpu_store))
++ break;
++ cpu_relax();
++ }
++}
++
++/*
++ * prb_unlock: Perform a processor-reentrant spin unlock.
++ * @cpu_lock: A pointer to the lock object.
++ * @cpu_store: A "flags" object storing lock status information.
++ *
++ * Release the lock. The calling processor must be the owner of the lock.
++ *
++ * It is safe to call this function from any context and state.
++ */
++static void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store)
++{
++ unsigned long *flags;
++ unsigned int cpu;
++
++ cpu = atomic_read(&cpu_lock->owner);
++ atomic_set_release(&cpu_lock->owner, cpu_store);
++
++ if (cpu_store == -1) {
++ flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
++ local_irq_restore(*flags);
++ }
++
++ put_cpu();
++}
++
++DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock);
++
++void console_atomic_lock(unsigned int *flags)
++{
++ prb_lock(&printk_cpulock, flags);
++}
++EXPORT_SYMBOL(console_atomic_lock);
++
++void console_atomic_unlock(unsigned int flags)
++{
++ prb_unlock(&printk_cpulock, flags);
++}
++EXPORT_SYMBOL(console_atomic_unlock);
+--
+2.19.1
+
diff --git a/features/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch b/features/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch
new file mode 100644
index 00000000..115f4072
--- /dev/null
+++ b/features/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch
@@ -0,0 +1,328 @@
+From f2031242ce940802f8703826b47dfd2e1290f97c Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <efault@gmx.de>
+Date: Sun, 8 Jan 2017 09:32:25 +0100
+Subject: [PATCH 160/191] cpuset: Convert callback_lock to raw_spinlock_t
+
+The two commits below add up to a cpuset might_sleep() splat for RT:
+
+8447a0fee974 cpuset: convert callback_mutex to a spinlock
+344736f29b35 cpuset: simplify cpuset_node_allowed API
+
+BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:995
+in_atomic(): 0, irqs_disabled(): 1, pid: 11718, name: cset
+CPU: 135 PID: 11718 Comm: cset Tainted: G E 4.10.0-rt1-rt #4
+Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0056.R01.1409242327 09/24/2014
+Call Trace:
+ ? dump_stack+0x5c/0x81
+ ? ___might_sleep+0xf4/0x170
+ ? rt_spin_lock+0x1c/0x50
+ ? __cpuset_node_allowed+0x66/0xc0
+ ? ___slab_alloc+0x390/0x570 <disables IRQs>
+ ? anon_vma_fork+0x8f/0x140
+ ? copy_page_range+0x6cf/0xb00
+ ? anon_vma_fork+0x8f/0x140
+ ? __slab_alloc.isra.74+0x5a/0x81
+ ? anon_vma_fork+0x8f/0x140
+ ? kmem_cache_alloc+0x1b5/0x1f0
+ ? anon_vma_fork+0x8f/0x140
+ ? copy_process.part.35+0x1670/0x1ee0
+ ? _do_fork+0xdd/0x3f0
+ ? _do_fork+0xdd/0x3f0
+ ? do_syscall_64+0x61/0x170
+ ? entry_SYSCALL64_slow_path+0x25/0x25
+
+The later ensured that a NUMA box WILL take callback_lock in atomic
+context by removing the allocator and reclaim path __GFP_HARDWALL
+usage which prevented such contexts from taking callback_mutex.
+
+One option would be to reinstate __GFP_HARDWALL protections for
+RT, however, as the 8447a0fee974 changelog states:
+
+The callback_mutex is only used to synchronize reads/updates of cpusets'
+flags and cpu/node masks. These operations should always proceed fast so
+there's no reason why we can't use a spinlock instead of the mutex.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/cgroup/cpuset.c | 70 +++++++++++++++++++++---------------------
+ 1 file changed, 35 insertions(+), 35 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 5258b68153e0..fb8715c1edfc 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -345,7 +345,7 @@ void cpuset_read_unlock(void)
+ percpu_up_read(&cpuset_rwsem);
+ }
+
+-static DEFINE_SPINLOCK(callback_lock);
++static DEFINE_RAW_SPINLOCK(callback_lock);
+
+ static struct workqueue_struct *cpuset_migrate_mm_wq;
+
+@@ -1280,7 +1280,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
+ * Newly added CPUs will be removed from effective_cpus and
+ * newly deleted ones will be added back to effective_cpus.
+ */
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ if (adding) {
+ cpumask_or(parent->subparts_cpus,
+ parent->subparts_cpus, tmp->addmask);
+@@ -1299,7 +1299,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
+ }
+
+ parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ return cmd == partcmd_update;
+ }
+@@ -1404,7 +1404,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
+ continue;
+ rcu_read_unlock();
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+
+ cpumask_copy(cp->effective_cpus, tmp->new_cpus);
+ if (cp->nr_subparts_cpus &&
+@@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
+ = cpumask_weight(cp->subparts_cpus);
+ }
+ }
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ WARN_ON(!is_in_v2_mode() &&
+ !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
+@@ -1553,7 +1553,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+ return -EINVAL;
+ }
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+
+ /*
+@@ -1564,7 +1564,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+ cs->cpus_allowed);
+ cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
+ }
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ update_cpumasks_hier(cs, &tmp);
+
+@@ -1758,9 +1758,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
+ continue;
+ rcu_read_unlock();
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ cp->effective_mems = *new_mems;
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ WARN_ON(!is_in_v2_mode() &&
+ !nodes_equal(cp->mems_allowed, cp->effective_mems));
+@@ -1828,9 +1828,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+ if (retval < 0)
+ goto done;
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ cs->mems_allowed = trialcs->mems_allowed;
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ /* use trialcs->mems_allowed as a temp variable */
+ update_nodemasks_hier(cs, &trialcs->mems_allowed);
+@@ -1921,9 +1921,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+ spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
+ || (is_spread_page(cs) != is_spread_page(trialcs)));
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ cs->flags = trialcs->flags;
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
+ rebuild_sched_domains_locked();
+@@ -2432,7 +2432,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
+ cpuset_filetype_t type = seq_cft(sf)->private;
+ int ret = 0;
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+
+ switch (type) {
+ case FILE_CPULIST:
+@@ -2454,7 +2454,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
+ ret = -EINVAL;
+ }
+
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+ return ret;
+ }
+
+@@ -2767,14 +2767,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+
+ cpuset_inc();
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ if (is_in_v2_mode()) {
+ cpumask_copy(cs->effective_cpus, parent->effective_cpus);
+ cs->effective_mems = parent->effective_mems;
+ cs->use_parent_ecpus = true;
+ parent->child_ecpus_count++;
+ }
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
+ goto out_unlock;
+@@ -2801,12 +2801,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+ }
+ rcu_read_unlock();
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ cs->mems_allowed = parent->mems_allowed;
+ cs->effective_mems = parent->mems_allowed;
+ cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+ cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+ out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ put_online_cpus();
+@@ -2862,7 +2862,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
+ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+ {
+ percpu_down_write(&cpuset_rwsem);
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+
+ if (is_in_v2_mode()) {
+ cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
+@@ -2873,7 +2873,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+ top_cpuset.mems_allowed = top_cpuset.effective_mems;
+ }
+
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+ percpu_up_write(&cpuset_rwsem);
+ }
+
+@@ -2970,12 +2970,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
+ {
+ bool is_empty;
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ cpumask_copy(cs->cpus_allowed, new_cpus);
+ cpumask_copy(cs->effective_cpus, new_cpus);
+ cs->mems_allowed = *new_mems;
+ cs->effective_mems = *new_mems;
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ /*
+ * Don't call update_tasks_cpumask() if the cpuset becomes empty,
+@@ -3012,10 +3012,10 @@ hotplug_update_tasks(struct cpuset *cs,
+ if (nodes_empty(*new_mems))
+ *new_mems = parent_cs(cs)->effective_mems;
+
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ cpumask_copy(cs->effective_cpus, new_cpus);
+ cs->effective_mems = *new_mems;
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+
+ if (cpus_updated)
+ update_tasks_cpumask(cs);
+@@ -3170,7 +3170,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
+
+ /* synchronize cpus_allowed to cpu_active_mask */
+ if (cpus_updated) {
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ if (!on_dfl)
+ cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
+ /*
+@@ -3190,17 +3190,17 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
+ }
+ }
+ cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+ /* we don't mess with cpumasks of tasks in top_cpuset */
+ }
+
+ /* synchronize mems_allowed to N_MEMORY */
+ if (mems_updated) {
+- spin_lock_irq(&callback_lock);
++ raw_spin_lock_irq(&callback_lock);
+ if (!on_dfl)
+ top_cpuset.mems_allowed = new_mems;
+ top_cpuset.effective_mems = new_mems;
+- spin_unlock_irq(&callback_lock);
++ raw_spin_unlock_irq(&callback_lock);
+ update_tasks_nodemask(&top_cpuset);
+ }
+
+@@ -3301,11 +3301,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&callback_lock, flags);
++ raw_spin_lock_irqsave(&callback_lock, flags);
+ rcu_read_lock();
+ guarantee_online_cpus(task_cs(tsk), pmask);
+ rcu_read_unlock();
+- spin_unlock_irqrestore(&callback_lock, flags);
++ raw_spin_unlock_irqrestore(&callback_lock, flags);
+ }
+
+ /**
+@@ -3366,11 +3366,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
+ nodemask_t mask;
+ unsigned long flags;
+
+- spin_lock_irqsave(&callback_lock, flags);
++ raw_spin_lock_irqsave(&callback_lock, flags);
+ rcu_read_lock();
+ guarantee_online_mems(task_cs(tsk), &mask);
+ rcu_read_unlock();
+- spin_unlock_irqrestore(&callback_lock, flags);
++ raw_spin_unlock_irqrestore(&callback_lock, flags);
+
+ return mask;
+ }
+@@ -3462,14 +3462,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
+ return true;
+
+ /* Not hardwall and node outside mems_allowed: scan up cpusets */
+- spin_lock_irqsave(&callback_lock, flags);
++ raw_spin_lock_irqsave(&callback_lock, flags);
+
+ rcu_read_lock();
+ cs = nearest_hardwall_ancestor(task_cs(current));
+ allowed = node_isset(node, cs->mems_allowed);
+ rcu_read_unlock();
+
+- spin_unlock_irqrestore(&callback_lock, flags);
++ raw_spin_unlock_irqrestore(&callback_lock, flags);
+ return allowed;
+ }
+
+--
+2.19.1
+
diff --git a/features/rt/crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch b/features/rt/crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch
new file mode 100644
index 00000000..68cc1e5a
--- /dev/null
+++ b/features/rt/crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch
@@ -0,0 +1,84 @@
+From bdce470739405260aab0f96686cd9b15cecde177 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 26 Jul 2018 18:52:00 +0200
+Subject: [PATCH 146/191] crypto: cryptd - add a lock instead
+ preempt_disable/local_bh_disable
+
+cryptd has a per-CPU lock which protected with local_bh_disable() and
+preempt_disable().
+Add an explicit spin_lock to make the locking context more obvious and
+visible to lockdep. Since it is a per-CPU lock, there should be no lock
+contention on the actual spinlock.
+There is a small race-window where we could be migrated to another CPU
+after the cpu_queue has been obtain. This is not a problem because the
+actual ressource is protected by the spinlock.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ crypto/cryptd.c | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/crypto/cryptd.c b/crypto/cryptd.c
+index a1bea0f4baa8..5f8ca8c1f59c 100644
+--- a/crypto/cryptd.c
++++ b/crypto/cryptd.c
+@@ -36,6 +36,7 @@ static struct workqueue_struct *cryptd_wq;
+ struct cryptd_cpu_queue {
+ struct crypto_queue queue;
+ struct work_struct work;
++ spinlock_t qlock;
+ };
+
+ struct cryptd_queue {
+@@ -105,6 +106,7 @@ static int cryptd_init_queue(struct cryptd_queue *queue,
+ cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
+ crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
+ INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
++ spin_lock_init(&cpu_queue->qlock);
+ }
+ pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen);
+ return 0;
+@@ -129,8 +131,10 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
+ struct cryptd_cpu_queue *cpu_queue;
+ refcount_t *refcnt;
+
+- cpu = get_cpu();
+- cpu_queue = this_cpu_ptr(queue->cpu_queue);
++ cpu_queue = raw_cpu_ptr(queue->cpu_queue);
++ spin_lock_bh(&cpu_queue->qlock);
++ cpu = smp_processor_id();
++
+ err = crypto_enqueue_request(&cpu_queue->queue, request);
+
+ refcnt = crypto_tfm_ctx(request->tfm);
+@@ -146,7 +150,7 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
+ refcount_inc(refcnt);
+
+ out_put_cpu:
+- put_cpu();
++ spin_unlock_bh(&cpu_queue->qlock);
+
+ return err;
+ }
+@@ -162,16 +166,11 @@ static void cryptd_queue_worker(struct work_struct *work)
+ cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
+ /*
+ * Only handle one request at a time to avoid hogging crypto workqueue.
+- * preempt_disable/enable is used to prevent being preempted by
+- * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
+- * cryptd_enqueue_request() being accessed from software interrupts.
+ */
+- local_bh_disable();
+- preempt_disable();
++ spin_lock_bh(&cpu_queue->qlock);
+ backlog = crypto_get_backlog(&cpu_queue->queue);
+ req = crypto_dequeue_request(&cpu_queue->queue);
+- preempt_enable();
+- local_bh_enable();
++ spin_unlock_bh(&cpu_queue->qlock);
+
+ if (!req)
+ return;
+--
+2.19.1
+
diff --git a/features/rt/crypto-limit-more-FPU-enabled-sections.patch b/features/rt/crypto-limit-more-FPU-enabled-sections.patch
new file mode 100644
index 00000000..b807de7f
--- /dev/null
+++ b/features/rt/crypto-limit-more-FPU-enabled-sections.patch
@@ -0,0 +1,73 @@
+From 7773b9611cf2e303a1744ba642f49ba967c24ad0 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 30 Nov 2017 13:40:10 +0100
+Subject: [PATCH 145/191] crypto: limit more FPU-enabled sections
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Those crypto drivers use SSE/AVX/… for their crypto work and in order to
+do so in kernel they need to enable the "FPU" in kernel mode which
+disables preemption.
+There are two problems with the way they are used:
+- the while loop which processes X bytes may create latency spikes and
+ should be avoided or limited.
+- the cipher-walk-next part may allocate/free memory and may use
+ kmap_atomic().
+
+The whole kernel_fpu_begin()/end() processing isn't probably that cheap.
+It most likely makes sense to process as much of those as possible in one
+go. The new *_fpu_sched_rt() schedules only if a RT task is pending.
+
+Probably we should measure the performance those ciphers in pure SW
+mode and with this optimisations to see if it makes sense to keep them
+for RT.
+
+This kernel_fpu_resched() makes the code more preemptible which might hurt
+performance.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/x86/include/asm/fpu/api.h | 1 +
+ arch/x86/kernel/fpu/core.c | 12 ++++++++++++
+ 2 files changed, 13 insertions(+)
+
+diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
+index ed33a14188f6..716727dfd1af 100644
+--- a/arch/x86/include/asm/fpu/api.h
++++ b/arch/x86/include/asm/fpu/api.h
+@@ -28,6 +28,7 @@ extern void kernel_fpu_begin_mask(unsigned int kfpu_mask);
+ extern void kernel_fpu_end(void);
+ extern bool irq_fpu_usable(void);
+ extern void fpregs_mark_activate(void);
++extern void kernel_fpu_resched(void);
+
+ /* Code that is unaware of kernel_fpu_begin_mask() can use this */
+ static inline void kernel_fpu_begin(void)
+diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
+index 571220ac8bea..d315d45b64fa 100644
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -159,6 +159,18 @@ void kernel_fpu_end(void)
+ }
+ EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
++void kernel_fpu_resched(void)
++{
++ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
++
++ if (should_resched(PREEMPT_OFFSET)) {
++ kernel_fpu_end();
++ cond_resched();
++ kernel_fpu_begin();
++ }
++}
++EXPORT_SYMBOL_GPL(kernel_fpu_resched);
++
+ /*
+ * Save the FPU state (mark it for reload if necessary):
+ *
+--
+2.19.1
+
diff --git a/features/rt/debugobjects-Make-RT-aware.patch b/features/rt/debugobjects-Make-RT-aware.patch
new file mode 100644
index 00000000..59a4e281
--- /dev/null
+++ b/features/rt/debugobjects-Make-RT-aware.patch
@@ -0,0 +1,31 @@
+From e4cdd4565da1b21cf7b71738d85a794dab064aa5 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:41:35 +0200
+Subject: [PATCH 140/191] debugobjects: Make RT aware
+
+Avoid filling the pool / allocating memory with irqs off().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/debugobjects.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 9e14ae02306b..083882a3cf2f 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -557,7 +557,10 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
+ struct debug_obj *obj;
+ unsigned long flags;
+
+- fill_pool();
++#ifdef CONFIG_PREEMPT_RT
++ if (preempt_count() == 0 && !irqs_disabled())
++#endif
++ fill_pool();
+
+ db = get_bucket((unsigned long) addr);
+
+--
+2.19.1
+
diff --git a/features/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch b/features/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch
new file mode 100644
index 00000000..4ed6aa46
--- /dev/null
+++ b/features/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch
@@ -0,0 +1,92 @@
+From 66e6d402c94d47b29c4a7f3f5872fee8d2be6ebb Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <umgwanakikbuti@gmail.com>
+Date: Thu, 31 Mar 2016 04:08:28 +0200
+Subject: [PATCH 186/191] drivers/block/zram: Replace bit spinlocks with
+ rtmutex for -rt
+
+They're nondeterministic, and lead to ___might_sleep() splats in -rt.
+OTOH, they're a lot less wasteful than an rtmutex per page.
+
+Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/block/zram/zram_drv.c | 36 +++++++++++++++++++++++++++++++++++
+ drivers/block/zram/zram_drv.h | 1 +
+ 2 files changed, 37 insertions(+)
+
+diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
+index cf8deecc39ef..5c7999ebc4e0 100644
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index);
+ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+ u32 index, int offset, struct bio *bio);
+
++#ifdef CONFIG_PREEMPT_RT
++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
++{
++ size_t index;
++
++ for (index = 0; index < num_pages; index++)
++ spin_lock_init(&zram->table[index].lock);
++}
++
++static int zram_slot_trylock(struct zram *zram, u32 index)
++{
++ int ret;
++
++ ret = spin_trylock(&zram->table[index].lock);
++ if (ret)
++ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
++ return ret;
++}
++
++static void zram_slot_lock(struct zram *zram, u32 index)
++{
++ spin_lock(&zram->table[index].lock);
++ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
++}
++
++static void zram_slot_unlock(struct zram *zram, u32 index)
++{
++ __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
++ spin_unlock(&zram->table[index].lock);
++}
++
++#else
++
++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
+
+ static int zram_slot_trylock(struct zram *zram, u32 index)
+ {
+@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
+ {
+ bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+ }
++#endif
+
+ static inline bool init_done(struct zram *zram)
+ {
+@@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
+
+ if (!huge_class_size)
+ huge_class_size = zs_huge_class_size(zram->mem_pool);
++ zram_meta_init_table_locks(zram, num_pages);
+ return true;
+ }
+
+diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
+index 419a7e8281ee..561c7ba1421f 100644
+--- a/drivers/block/zram/zram_drv.h
++++ b/drivers/block/zram/zram_drv.h
+@@ -63,6 +63,7 @@ struct zram_table_entry {
+ unsigned long element;
+ };
+ unsigned long flags;
++ spinlock_t lock;
+ #ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ ktime_t ac_time;
+ #endif
+--
+2.19.1
+
diff --git a/features/rt/drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch b/features/rt/drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch
new file mode 100644
index 00000000..2c1a9fa8
--- /dev/null
+++ b/features/rt/drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch
@@ -0,0 +1,79 @@
+From bd74da1edb71b84b90b8f68b9a8d28b0f580b1f0 Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <umgwanakikbuti@gmail.com>
+Date: Sat, 27 Feb 2016 09:01:42 +0100
+Subject: [PATCH 156/191] drm/i915: Don't disable interrupts on PREEMPT_RT
+ during atomic updates
+
+Commit
+ 8d7849db3eab7 ("drm/i915: Make sprite updates atomic")
+
+started disabling interrupts across atomic updates. This breaks on PREEMPT_RT
+because within this section the code attempt to acquire spinlock_t locks which
+are sleeping locks on PREEMPT_RT.
+
+According to the comment the interrupts are disabled to avoid random delays and
+not required for protection or synchronisation.
+
+Don't disable interrupts on PREEMPT_RT during atomic updates.
+
+[bigeasy: drop local locks, commit message]
+
+Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/gpu/drm/i915/display/intel_sprite.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
+index 993543334a1e..cc0dd8a1e420 100644
+--- a/drivers/gpu/drm/i915/display/intel_sprite.c
++++ b/drivers/gpu/drm/i915/display/intel_sprite.c
+@@ -127,7 +127,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+ "PSR idle timed out 0x%x, atomic update may fail\n",
+ psr_status);
+
+- local_irq_disable();
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ local_irq_disable();
+
+ crtc->debug.min_vbl = min;
+ crtc->debug.max_vbl = max;
+@@ -152,11 +153,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+ break;
+ }
+
+- local_irq_enable();
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ local_irq_enable();
+
+ timeout = schedule_timeout(timeout);
+
+- local_irq_disable();
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ local_irq_disable();
+ }
+
+ finish_wait(wq, &wait);
+@@ -189,7 +192,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+ return;
+
+ irq_disable:
+- local_irq_disable();
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ local_irq_disable();
+ }
+
+ #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
+@@ -268,7 +272,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
+ new_crtc_state->uapi.event = NULL;
+ }
+
+- local_irq_enable();
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ local_irq_enable();
+
+ /* Send VRR Push to terminate Vblank */
+ intel_vrr_send_push(new_crtc_state);
+--
+2.19.1
+
diff --git a/features/rt/drm-i915-disable-tracing-on-RT.patch b/features/rt/drm-i915-disable-tracing-on-RT.patch
new file mode 100644
index 00000000..41898919
--- /dev/null
+++ b/features/rt/drm-i915-disable-tracing-on-RT.patch
@@ -0,0 +1,46 @@
+From 5e250a1f215a8a39bc712c70a8a6215f570e3a9c Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 6 Dec 2018 09:52:20 +0100
+Subject: [PATCH 157/191] drm/i915: disable tracing on -RT
+
+Luca Abeni reported this:
+| BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003
+| CPU: 1 PID: 15203 Comm: kworker/u8:2 Not tainted 4.19.1-rt3 #10
+| Call Trace:
+| rt_spin_lock+0x3f/0x50
+| gen6_read32+0x45/0x1d0 [i915]
+| g4x_get_vblank_counter+0x36/0x40 [i915]
+| trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915]
+
+The tracing events use trace_i915_pipe_update_start() among other events
+use functions acquire spin locks. A few trace points use
+intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also
+might acquire a sleeping lock.
+
+Based on this I don't see any other way than disable trace points on RT.
+
+Cc: stable-rt@vger.kernel.org
+Reported-by: Luca Abeni <lucabe72@gmail.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/gpu/drm/i915/i915_trace.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
+index a4addcc64978..0ba5a0a0fd25 100644
+--- a/drivers/gpu/drm/i915/i915_trace.h
++++ b/drivers/gpu/drm/i915/i915_trace.h
+@@ -2,6 +2,10 @@
+ #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+ #define _I915_TRACE_H_
+
++#ifdef CONFIG_PREEMPT_RT
++#define NOTRACE
++#endif
++
+ #include <linux/stringify.h>
+ #include <linux/types.h>
+ #include <linux/tracepoint.h>
+--
+2.19.1
+
diff --git a/features/rt/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch b/features/rt/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch
new file mode 100644
index 00000000..e8de6823
--- /dev/null
+++ b/features/rt/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch
@@ -0,0 +1,51 @@
+From 0dfb9c223f7310e28f5a5384a96aa99b563e219b Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 7 Jul 2020 12:25:11 +0200
+Subject: [PATCH 159/191] drm/i915/gt: Only disable interrupts for the timeline
+ lock on !force-threaded
+
+According to commit
+ d67739268cf0e ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe")
+
+the intrrupts are disabled the code may be called from an interrupt
+handler and from preemptible context.
+With `force_irqthreads' set the timeline mutex is never observed in IRQ
+context so it is not neede to disable interrupts.
+
+Disable only interrupts if not in `force_irqthreads' mode.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+index e67d09259dd0..2a480b47dac2 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+@@ -81,9 +81,10 @@ static int __engine_unpark(struct intel_wakeref *wf)
+
+ static unsigned long __timeline_mark_lock(struct intel_context *ce)
+ {
+- unsigned long flags;
++ unsigned long flags = 0;
+
+- local_irq_save(flags);
++ if (!force_irqthreads)
++ local_irq_save(flags);
+ mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
+
+ return flags;
+@@ -93,7 +94,8 @@ static void __timeline_mark_unlock(struct intel_context *ce,
+ unsigned long flags)
+ {
+ mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
+- local_irq_restore(flags);
++ if (!force_irqthreads)
++ local_irq_restore(flags);
+ }
+
+ #else
+--
+2.19.1
+
diff --git a/features/rt/drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch b/features/rt/drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch
new file mode 100644
index 00000000..16b96b72
--- /dev/null
+++ b/features/rt/drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch
@@ -0,0 +1,32 @@
+From 90efb159f7ef762ff40fc78ca4a4aad63dcb530c Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 19 Dec 2018 10:47:02 +0100
+Subject: [PATCH 158/191] drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with
+ NOTRACE
+
+The order of the header files is important. If this header file is
+included after tracepoint.h was included then the NOTRACE here becomes a
+nop. Currently this happens for two .c files which use the tracepoitns
+behind DRM_I915_LOW_LEVEL_TRACEPOINTS.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/gpu/drm/i915/i915_trace.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
+index 0ba5a0a0fd25..396b6598694d 100644
+--- a/drivers/gpu/drm/i915/i915_trace.h
++++ b/drivers/gpu/drm/i915/i915_trace.h
+@@ -782,7 +782,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
+ TP_ARGS(rq)
+ );
+
+-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
++#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
+ DEFINE_EVENT(i915_request, i915_request_submit,
+ TP_PROTO(struct i915_request *rq),
+ TP_ARGS(rq)
+--
+2.19.1
+
diff --git a/features/rt/drm-radeon-i915-Use-preempt_disable-enable_rt-where-.patch b/features/rt/drm-radeon-i915-Use-preempt_disable-enable_rt-where-.patch
new file mode 100644
index 00000000..5535ee9e
--- /dev/null
+++ b/features/rt/drm-radeon-i915-Use-preempt_disable-enable_rt-where-.patch
@@ -0,0 +1,60 @@
+From bcd7c523e0e70fe91424916891e64bc520ea1262 Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <umgwanakikbuti@gmail.com>
+Date: Sat, 27 Feb 2016 08:09:11 +0100
+Subject: [PATCH 155/191] drm,radeon,i915: Use preempt_disable/enable_rt()
+ where recommended
+
+DRM folks identified the spots, so use them.
+
+Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/gpu/drm/i915/i915_irq.c | 2 ++
+ drivers/gpu/drm/radeon/radeon_display.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
+index 1a701367a718..32db99bead7d 100644
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -888,6 +888,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+ spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+
+ /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
++ preempt_disable_rt();
+
+ /* Get optional system timestamp before query. */
+ if (stime)
+@@ -952,6 +953,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+ *etime = ktime_get();
+
+ /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
++ preempt_enable_rt();
+
+ spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+
+diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
+index 652af7a134bd..a2f5a4c0134f 100644
+--- a/drivers/gpu/drm/radeon/radeon_display.c
++++ b/drivers/gpu/drm/radeon/radeon_display.c
+@@ -1813,6 +1813,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
+ struct radeon_device *rdev = dev->dev_private;
+
+ /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
++ preempt_disable_rt();
+
+ /* Get optional system timestamp before query. */
+ if (stime)
+@@ -1905,6 +1906,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
+ *etime = ktime_get();
+
+ /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
++ preempt_enable_rt();
+
+ /* Decode into vertical and horizontal scanout position. */
+ *vpos = position & 0x1fff;
+--
+2.19.1
+
diff --git a/features/rt/efi-Allow-efi-runtime.patch b/features/rt/efi-Allow-efi-runtime.patch
new file mode 100644
index 00000000..84be1d7e
--- /dev/null
+++ b/features/rt/efi-Allow-efi-runtime.patch
@@ -0,0 +1,31 @@
+From 2b3184121c862ea3b57dc901f864eed390eed7c8 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 26 Jul 2018 15:06:10 +0200
+Subject: [PATCH 098/191] efi: Allow efi=runtime
+
+In case the command line option "efi=noruntime" is default at built-time, the user
+could overwrite its state by `efi=runtime' and allow it again.
+
+Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/firmware/efi/efi.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
+index 1819da1c9fec..709c65c0a816 100644
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str)
+ if (parse_option_str(str, "noruntime"))
+ disable_runtime = true;
+
++ if (parse_option_str(str, "runtime"))
++ disable_runtime = false;
++
+ if (parse_option_str(str, "nosoftreserve"))
+ set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
+
+--
+2.19.1
+
diff --git a/features/rt/efi-Disable-runtime-services-on-RT.patch b/features/rt/efi-Disable-runtime-services-on-RT.patch
new file mode 100644
index 00000000..fbc4df1d
--- /dev/null
+++ b/features/rt/efi-Disable-runtime-services-on-RT.patch
@@ -0,0 +1,45 @@
+From 8ff328d63beab0f17efde608bcaa09ff553a8e94 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 26 Jul 2018 15:03:16 +0200
+Subject: [PATCH 097/191] efi: Disable runtime services on RT
+
+Based on meassurements the EFI functions get_variable /
+get_next_variable take up to 2us which looks okay.
+The functions get_time, set_time take around 10ms. Those 10ms are too
+much. Even one ms would be too much.
+Ard mentioned that SetVariable might even trigger larger latencies if
+the firware will erase flash blocks on NOR.
+
+The time-functions are used by efi-rtc and can be triggered during
+runtimed (either via explicit read/write or ntp sync).
+
+The variable write could be used by pstore.
+These functions can be disabled without much of a loss. The poweroff /
+reboot hooks may be provided by PSCI.
+
+Disable EFI's runtime wrappers.
+
+This was observed on "EFI v2.60 by SoftIron Overdrive 1000".
+
+Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/firmware/efi/efi.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
+index df3f9bcab581..1819da1c9fec 100644
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -66,7 +66,7 @@ struct mm_struct efi_mm = {
+
+ struct workqueue_struct *efi_rts_wq;
+
+-static bool disable_runtime;
++static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
+ static int __init setup_noefi(char *arg)
+ {
+ disable_runtime = true;
+--
+2.19.1
+
diff --git a/features/rt/firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch b/features/rt/firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch
new file mode 100644
index 00000000..60edded7
--- /dev/null
+++ b/features/rt/firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch
@@ -0,0 +1,60 @@
+From 784277e7d29b56d2fc367068d909e1219c082315 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 9 Mar 2021 09:42:16 +0100
+Subject: [PATCH 053/191] firewire: ohci: Use tasklet_disable_in_atomic() where
+ required
+
+tasklet_disable() is invoked in several places. Some of them are in atomic
+context which prevents a conversion of tasklet_disable() to a sleepable
+function.
+
+The atomic callchains are:
+
+ ar_context_tasklet()
+ ohci_cancel_packet()
+ tasklet_disable()
+
+ ...
+ ohci_flush_iso_completions()
+ tasklet_disable()
+
+The invocation of tasklet_disable() from at_context_flush() is always in
+preemptible context.
+
+Use tasklet_disable_in_atomic() for the two invocations in
+ohci_cancel_packet() and ohci_flush_iso_completions().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
+Cc: linux1394-devel@lists.sourceforge.net
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/firewire/ohci.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
+index 9811c40956e5..17c9d825188b 100644
+--- a/drivers/firewire/ohci.c
++++ b/drivers/firewire/ohci.c
+@@ -2545,7 +2545,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
+ struct driver_data *driver_data = packet->driver_data;
+ int ret = -ENOENT;
+
+- tasklet_disable(&ctx->tasklet);
++ tasklet_disable_in_atomic(&ctx->tasklet);
+
+ if (packet->ack != 0)
+ goto out;
+@@ -3465,7 +3465,7 @@ static int ohci_flush_iso_completions(struct fw_iso_context *base)
+ struct iso_context *ctx = container_of(base, struct iso_context, base);
+ int ret = 0;
+
+- tasklet_disable(&ctx->context.tasklet);
++ tasklet_disable_in_atomic(&ctx->context.tasklet);
+
+ if (!test_and_set_bit_lock(0, &ctx->flushing_completions)) {
+ context_tasklet((unsigned long)&ctx->context);
+--
+2.19.1
+
diff --git a/features/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch b/features/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch
new file mode 100644
index 00000000..0e3c58f2
--- /dev/null
+++ b/features/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch
@@ -0,0 +1,98 @@
+From 3280c5c16fc06b062dce646186bf6d9449b03a5e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 20 Oct 2017 11:29:53 +0200
+Subject: [PATCH 090/191] fs/dcache: disable preemption on i_dir_seq's write
+ side
+
+i_dir_seq is an opencoded seqcounter. Based on the code it looks like we
+could have two writers in parallel despite the fact that the d_lock is
+held. The problem is that during the write process on RT the preemption
+is still enabled and if this process is interrupted by a reader with RT
+priority then we lock up.
+To avoid that lock up I am disabling the preemption during the update.
+The rename of i_dir_seq is here to ensure to catch new write sides in
+future.
+
+Cc: stable-rt@vger.kernel.org
+Reported-by: Oleg.Karfich@wago.com
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ fs/dcache.c | 12 +++++++-----
+ fs/inode.c | 2 +-
+ include/linux/fs.h | 2 +-
+ 3 files changed, 9 insertions(+), 7 deletions(-)
+
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 2c36711a222e..cd6405fd483a 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -2537,9 +2537,10 @@ EXPORT_SYMBOL(d_rehash);
+ static inline unsigned start_dir_add(struct inode *dir)
+ {
+
++ preempt_disable_rt();
+ for (;;) {
+- unsigned n = dir->i_dir_seq;
+- if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
++ unsigned n = dir->__i_dir_seq;
++ if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n)
+ return n;
+ cpu_relax();
+ }
+@@ -2547,7 +2548,8 @@ static inline unsigned start_dir_add(struct inode *dir)
+
+ static inline void end_dir_add(struct inode *dir, unsigned n)
+ {
+- smp_store_release(&dir->i_dir_seq, n + 2);
++ smp_store_release(&dir->__i_dir_seq, n + 2);
++ preempt_enable_rt();
+ }
+
+ static void d_wait_lookup(struct dentry *dentry)
+@@ -2583,7 +2585,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
+
+ retry:
+ rcu_read_lock();
+- seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
++ seq = smp_load_acquire(&parent->d_inode->__i_dir_seq);
+ r_seq = read_seqbegin(&rename_lock);
+ dentry = __d_lookup_rcu(parent, name, &d_seq);
+ if (unlikely(dentry)) {
+@@ -2611,7 +2613,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
+ }
+
+ hlist_bl_lock(b);
+- if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
++ if (unlikely(READ_ONCE(parent->d_inode->__i_dir_seq) != seq)) {
+ hlist_bl_unlock(b);
+ rcu_read_unlock();
+ goto retry;
+diff --git a/fs/inode.c b/fs/inode.c
+index 0672530acf7d..70a1230062e3 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -158,7 +158,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
+ inode->i_pipe = NULL;
+ inode->i_cdev = NULL;
+ inode->i_link = NULL;
+- inode->i_dir_seq = 0;
++ inode->__i_dir_seq = 0;
+ inode->i_rdev = 0;
+ inode->dirtied_when = 0;
+
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 4fa4b24535fd..9ac35f563f6e 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -700,7 +700,7 @@ struct inode {
+ struct pipe_inode_info *i_pipe;
+ struct cdev *i_cdev;
+ char *i_link;
+- unsigned i_dir_seq;
++ unsigned __i_dir_seq;
+ };
+
+ __u32 i_generation;
+--
+2.19.1
+
diff --git a/features/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch b/features/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch
new file mode 100644
index 00000000..281405e5
--- /dev/null
+++ b/features/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch
@@ -0,0 +1,262 @@
+From 36dc3d1f121e7e2b2d5d102de86220b6eb1d827d Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 14 Sep 2016 14:35:49 +0200
+Subject: [PATCH 089/191] fs/dcache: use swait_queue instead of waitqueue
+
+__d_lookup_done() invokes wake_up_all() while holding a hlist_bl_lock()
+which disables preemption. As a workaround convert it to swait.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ fs/afs/dir_silly.c | 2 +-
+ fs/cifs/readdir.c | 2 +-
+ fs/dcache.c | 27 +++++++++++++++------------
+ fs/fuse/readdir.c | 2 +-
+ fs/namei.c | 4 ++--
+ fs/nfs/dir.c | 4 ++--
+ fs/nfs/unlink.c | 4 ++--
+ fs/proc/base.c | 3 ++-
+ fs/proc/proc_sysctl.c | 2 +-
+ include/linux/dcache.h | 4 ++--
+ include/linux/nfs_xdr.h | 2 +-
+ kernel/sched/swait.c | 1 +
+ 12 files changed, 31 insertions(+), 26 deletions(-)
+
+diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
+index 04f75a44f243..60cbce1995a5 100644
+--- a/fs/afs/dir_silly.c
++++ b/fs/afs/dir_silly.c
+@@ -236,7 +236,7 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+ struct dentry *alias;
+ int ret;
+
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+
+diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
+index 80bf4c6f4c7b..a975cfed14f5 100644
+--- a/fs/cifs/readdir.c
++++ b/fs/cifs/readdir.c
+@@ -82,7 +82,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
+ struct inode *inode;
+ struct super_block *sb = parent->d_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
+
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 00e97c9ae7c4..2c36711a222e 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -2552,21 +2552,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n)
+
+ static void d_wait_lookup(struct dentry *dentry)
+ {
+- if (d_in_lookup(dentry)) {
+- DECLARE_WAITQUEUE(wait, current);
+- add_wait_queue(dentry->d_wait, &wait);
+- do {
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- spin_unlock(&dentry->d_lock);
+- schedule();
+- spin_lock(&dentry->d_lock);
+- } while (d_in_lookup(dentry));
+- }
++ struct swait_queue __wait;
++
++ if (!d_in_lookup(dentry))
++ return;
++
++ INIT_LIST_HEAD(&__wait.task_list);
++ do {
++ prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
++ spin_unlock(&dentry->d_lock);
++ schedule();
++ spin_lock(&dentry->d_lock);
++ } while (d_in_lookup(dentry));
++ finish_swait(dentry->d_wait, &__wait);
+ }
+
+ struct dentry *d_alloc_parallel(struct dentry *parent,
+ const struct qstr *name,
+- wait_queue_head_t *wq)
++ struct swait_queue_head *wq)
+ {
+ unsigned int hash = name->hash;
+ struct hlist_bl_head *b = in_lookup_hash(parent, hash);
+@@ -2681,7 +2684,7 @@ void __d_lookup_done(struct dentry *dentry)
+ hlist_bl_lock(b);
+ dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
+ __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
+- wake_up_all(dentry->d_wait);
++ swake_up_all(dentry->d_wait);
+ dentry->d_wait = NULL;
+ hlist_bl_unlock(b);
+ INIT_HLIST_NODE(&dentry->d_u.d_alias);
+diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
+index 3441ffa740f3..2fcae5cfd272 100644
+--- a/fs/fuse/readdir.c
++++ b/fs/fuse/readdir.c
+@@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct file *file,
+ struct inode *dir = d_inode(parent);
+ struct fuse_conn *fc;
+ struct inode *inode;
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ if (!o->nodeid) {
+ /*
+diff --git a/fs/namei.c b/fs/namei.c
+index 216f16e74351..06ffd9490a34 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1602,7 +1602,7 @@ static struct dentry *__lookup_slow(const struct qstr *name,
+ {
+ struct dentry *dentry, *old;
+ struct inode *inode = dir->d_inode;
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ /* Don't go there if it's already dead */
+ if (unlikely(IS_DEADDIR(inode)))
+@@ -3131,7 +3131,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
+ struct dentry *dentry;
+ int error, create_error = 0;
+ umode_t mode = op->mode;
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ if (unlikely(IS_DEADDIR(dir_inode)))
+ return ERR_PTR(-ENOENT);
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index fc4f490f2d78..8f00a87b7ae2 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -636,7 +636,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
+ unsigned long dir_verifier)
+ {
+ struct qstr filename = QSTR_INIT(entry->name, entry->len);
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ struct dentry *dentry;
+ struct dentry *alias;
+ struct inode *inode;
+@@ -1868,7 +1868,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned open_flags,
+ umode_t mode)
+ {
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ struct nfs_open_context *ctx;
+ struct dentry *res;
+ struct iattr attr = { .ia_valid = ATTR_OPEN };
+diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
+index 5fa11e1aca4c..984f26eb888c 100644
+--- a/fs/nfs/unlink.c
++++ b/fs/nfs/unlink.c
+@@ -13,7 +13,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/sched.h>
+-#include <linux/wait.h>
++#include <linux/swait.h>
+ #include <linux/namei.h>
+ #include <linux/fsnotify.h>
+
+@@ -180,7 +180,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
+
+ data->cred = get_current_cred();
+ data->res.dir_attr = &data->dir_attr;
+- init_waitqueue_head(&data->wq);
++ init_swait_queue_head(&data->wq);
+
+ status = -EBUSY;
+ spin_lock(&dentry->d_lock);
+diff --git a/fs/proc/base.c b/fs/proc/base.c
+index d632ddd5f5ee..4ea6111fae71 100644
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -95,6 +95,7 @@
+ #include <linux/posix-timers.h>
+ #include <linux/time_namespace.h>
+ #include <linux/resctrl.h>
++#include <linux/swait.h>
+ #include <trace/events/oom.h>
+ #include "internal.h"
+ #include "fd.h"
+@@ -2037,7 +2038,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
+
+ child = d_hash_and_lookup(dir, &qname);
+ if (!child) {
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ child = d_alloc_parallel(dir, &qname, &wq);
+ if (IS_ERR(child))
+ goto end_instantiate;
+diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
+index 984e42f8cb11..2bc8e2409308 100644
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -683,7 +683,7 @@ static bool proc_sys_fill_cache(struct file *file,
+
+ child = d_lookup(dir, &qname);
+ if (!child) {
+- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ child = d_alloc_parallel(dir, &qname, &wq);
+ if (IS_ERR(child))
+ return false;
+diff --git a/include/linux/dcache.h b/include/linux/dcache.h
+index c1e48014106f..f2dc84edbcf1 100644
+--- a/include/linux/dcache.h
++++ b/include/linux/dcache.h
+@@ -107,7 +107,7 @@ struct dentry {
+
+ union {
+ struct list_head d_lru; /* LRU list */
+- wait_queue_head_t *d_wait; /* in-lookup ones only */
++ struct swait_queue_head *d_wait; /* in-lookup ones only */
+ };
+ struct list_head d_child; /* child of parent list */
+ struct list_head d_subdirs; /* our children */
+@@ -239,7 +239,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
+ extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
+ extern struct dentry * d_alloc_anon(struct super_block *);
+ extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
+- wait_queue_head_t *);
++ struct swait_queue_head *);
+ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+ extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
+ extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
+diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
+index 0d62b8db4d24..0278c359e3ef 100644
+--- a/include/linux/nfs_xdr.h
++++ b/include/linux/nfs_xdr.h
+@@ -1684,7 +1684,7 @@ struct nfs_unlinkdata {
+ struct nfs_removeargs args;
+ struct nfs_removeres res;
+ struct dentry *dentry;
+- wait_queue_head_t wq;
++ struct swait_queue_head wq;
+ const struct cred *cred;
+ struct nfs_fattr dir_attr;
+ long timeout;
+diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
+index e1c655f928c7..f230b1ac7f91 100644
+--- a/kernel/sched/swait.c
++++ b/kernel/sched/swait.c
+@@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_head *q)
+ struct swait_queue *curr;
+ LIST_HEAD(tmp);
+
++ WARN_ON(irqs_disabled());
+ raw_spin_lock_irq(&q->lock);
+ list_splice_init(&q->task_list, &tmp);
+ while (!list_empty(&tmp)) {
+--
+2.19.1
+
diff --git a/features/rt/fs-namespace-Use-cpu_chill-in-trylock-loops.patch b/features/rt/fs-namespace-Use-cpu_chill-in-trylock-loops.patch
new file mode 100644
index 00000000..a4536630
--- /dev/null
+++ b/features/rt/fs-namespace-Use-cpu_chill-in-trylock-loops.patch
@@ -0,0 +1,43 @@
+From 411705d51a89aa7c844f7cae5379a406d720f619 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 7 Mar 2012 21:00:34 +0100
+Subject: [PATCH 139/191] fs: namespace: Use cpu_chill() in trylock loops
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Use cpu_chill() instead of cpu_relax() to let the system
+make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/namespace.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 74957d5edaa8..a02b35e791a3 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -14,6 +14,7 @@
+ #include <linux/mnt_namespace.h>
+ #include <linux/user_namespace.h>
+ #include <linux/namei.h>
++#include <linux/hrtimer.h>
+ #include <linux/security.h>
+ #include <linux/cred.h>
+ #include <linux/idr.h>
+@@ -342,8 +343,11 @@ int __mnt_want_write(struct vfsmount *m)
+ * incremented count after it has set MNT_WRITE_HOLD.
+ */
+ smp_mb();
+- while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
+- cpu_relax();
++ while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
++ preempt_enable();
++ cpu_chill();
++ preempt_disable();
++ }
+ /*
+ * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+ * be set to match its requirements. So we must not load that until
+--
+2.19.1
+
diff --git a/features/rt/futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch b/features/rt/futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch
new file mode 100644
index 00000000..1e9a9e63
--- /dev/null
+++ b/features/rt/futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch
@@ -0,0 +1,117 @@
+From 1301c0809848cdcd4a0fae002d13707fa3e3db5d Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 14 Jul 2015 14:26:34 +0200
+Subject: [PATCH 066/191] futex: Fix bug on when a requeued RT task times out
+
+Requeue with timeout causes a bug with PREEMPT_RT.
+
+The bug comes from a timed out condition.
+
+ TASK 1 TASK 2
+ ------ ------
+ futex_wait_requeue_pi()
+ futex_wait_queue_me()
+ <timed out>
+
+ double_lock_hb();
+
+ raw_spin_lock(pi_lock);
+ if (current->pi_blocked_on) {
+ } else {
+ current->pi_blocked_on = PI_WAKE_INPROGRESS;
+ run_spin_unlock(pi_lock);
+ spin_lock(hb->lock); <-- blocked!
+
+ plist_for_each_entry_safe(this) {
+ rt_mutex_start_proxy_lock();
+ task_blocks_on_rt_mutex();
+ BUG_ON(task->pi_blocked_on)!!!!
+
+The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the
+problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to
+grab the hb->lock, which it fails to do so. As the hb->lock is a mutex,
+it will block and set the "pi_blocked_on" to the hb->lock.
+
+When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails
+because the task1's pi_blocked_on is no longer set to that, but instead,
+set to the hb->lock.
+
+The fix:
+
+When calling rt_mutex_start_proxy_lock() a check is made to see
+if the proxy tasks pi_blocked_on is set. If so, exit out early.
+Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies
+the proxy task that it is being requeued, and will handle things
+appropriately.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 31 ++++++++++++++++++++++++++++++-
+ kernel/locking/rtmutex_common.h | 1 +
+ 2 files changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 7fff3b88b96b..4d2a57e8dcb9 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -138,7 +138,8 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
+
+ static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
+ {
+- return waiter && waiter != PI_WAKEUP_INPROGRESS;
++ return waiter && waiter != PI_WAKEUP_INPROGRESS &&
++ waiter != PI_REQUEUE_INPROGRESS;
+ }
+
+ /*
+@@ -1707,6 +1708,34 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ if (try_to_take_rt_mutex(lock, task, NULL))
+ return 1;
+
++#ifdef CONFIG_PREEMPT_RT
++ /*
++ * In PREEMPT_RT there's an added race.
++ * If the task, that we are about to requeue, times out,
++ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
++ * to skip this task. But right after the task sets
++ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
++ * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
++ * This will replace the PI_WAKEUP_INPROGRESS with the actual
++ * lock that it blocks on. We *must not* place this task
++ * on this proxy lock in that case.
++ *
++ * To prevent this race, we first take the task's pi_lock
++ * and check if it has updated its pi_blocked_on. If it has,
++ * we assume that it woke up and we return -EAGAIN.
++ * Otherwise, we set the task's pi_blocked_on to
++ * PI_REQUEUE_INPROGRESS, so that if the task is waking up
++ * it will know that we are in the process of requeuing it.
++ */
++ raw_spin_lock(&task->pi_lock);
++ if (task->pi_blocked_on) {
++ raw_spin_unlock(&task->pi_lock);
++ return -EAGAIN;
++ }
++ task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
++ raw_spin_unlock(&task->pi_lock);
++#endif
++
+ /* We enforce deadlock detection for futexes */
+ ret = task_blocks_on_rt_mutex(lock, waiter, task,
+ RT_MUTEX_FULL_CHAINWALK);
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index 096b16cfb096..37cd6b3bf6f4 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -126,6 +126,7 @@ enum rtmutex_chainwalk {
+ * PI-futex support (proxy locking functions, etc.):
+ */
+ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
++#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
+
+ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+--
+2.19.1
+
diff --git a/features/rt/genirq-Disable-irqpoll-on-rt.patch b/features/rt/genirq-Disable-irqpoll-on-rt.patch
new file mode 100644
index 00000000..37c2cf92
--- /dev/null
+++ b/features/rt/genirq-Disable-irqpoll-on-rt.patch
@@ -0,0 +1,42 @@
+From 838f295da60b853b16a3bfbfdd0790472ce70d94 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:57 -0500
+Subject: [PATCH 189/191] genirq: Disable irqpoll on -rt
+
+Creates long latencies for no value
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/irq/spurious.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
+index f865e5f4d382..dc7311dd74b1 100644
+--- a/kernel/irq/spurious.c
++++ b/kernel/irq/spurious.c
+@@ -443,6 +443,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
+
+ static int __init irqfixup_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT\n");
++ return 1;
++#endif
+ irqfixup = 1;
+ printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+ printk(KERN_WARNING "This may impact system performance.\n");
+@@ -455,6 +459,10 @@ module_param(irqfixup, int, 0644);
+
+ static int __init irqpoll_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT\n");
++ return 1;
++#endif
+ irqfixup = 2;
+ printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+ "enabled\n");
+--
+2.19.1
+
diff --git a/features/rt/genirq-Move-prio-assignment-into-the-newly-created-t.patch b/features/rt/genirq-Move-prio-assignment-into-the-newly-created-t.patch
new file mode 100644
index 00000000..3c62cff6
--- /dev/null
+++ b/features/rt/genirq-Move-prio-assignment-into-the-newly-created-t.patch
@@ -0,0 +1,61 @@
+From a5c8ce0345b9379be44f67042816beb0afda8176 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 9 Nov 2020 23:32:39 +0100
+Subject: [PATCH 004/191] genirq: Move prio assignment into the newly created
+ thread
+
+With enabled threaded interrupts the nouveau driver reported the
+following:
+| Chain exists of:
+| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem
+|
+| Possible unsafe locking scenario:
+|
+| CPU0 CPU1
+| ---- ----
+| lock(&cpuset_rwsem);
+| lock(&device->mutex);
+| lock(&cpuset_rwsem);
+| lock(&mm->mmap_lock#2);
+
+The device->mutex is nvkm_device::mutex.
+
+Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing
+to do.
+Move the priority assignment to the start of the newly created thread.
+
+Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()")
+Reported-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bigeasy: Patch description]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de
+---
+ kernel/irq/manage.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index dec3f73e8db9..ac2c57429750 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -1225,6 +1225,8 @@ static int irq_thread(void *data)
+ irqreturn_t (*handler_fn)(struct irq_desc *desc,
+ struct irqaction *action);
+
++ sched_set_fifo(current);
++
+ if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
+ &action->thread_flags))
+ handler_fn = irq_forced_thread_fn;
+@@ -1390,8 +1392,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+
+- sched_set_fifo(t);
+-
+ /*
+ * We keep the reference to the task struct even if
+ * the thread dies to avoid that the interrupt code
+--
+2.19.1
+
diff --git a/features/rt/genirq-update-irq_set_irqchip_state-documentation.patch b/features/rt/genirq-update-irq_set_irqchip_state-documentation.patch
new file mode 100644
index 00000000..d203983a
--- /dev/null
+++ b/features/rt/genirq-update-irq_set_irqchip_state-documentation.patch
@@ -0,0 +1,31 @@
+From a7f1d1036ba191caf447313670c2d682537c6f51 Mon Sep 17 00:00:00 2001
+From: Josh Cartwright <joshc@ni.com>
+Date: Thu, 11 Feb 2016 11:54:00 -0600
+Subject: [PATCH 174/191] genirq: update irq_set_irqchip_state documentation
+
+On -rt kernels, the use of migrate_disable()/migrate_enable() is
+sufficient to guarantee a task isn't moved to another CPU. Update the
+irq_set_irqchip_state() documentation to reflect this.
+
+Signed-off-by: Josh Cartwright <joshc@ni.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/irq/manage.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index f4d18858d079..b279a8683c38 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -2787,7 +2787,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
+ * This call sets the internal irqchip state of an interrupt,
+ * depending on the value of @which.
+ *
+- * This function should be called with preemption disabled if the
++ * This function should be called with migration disabled if the
+ * interrupt controller has per-cpu registers.
+ */
+ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
+--
+2.19.1
+
diff --git a/features/rt/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch b/features/rt/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch
new file mode 100644
index 00000000..30c5cea1
--- /dev/null
+++ b/features/rt/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch
@@ -0,0 +1,84 @@
+From a93cd2c33ab23c4dce5b28971b24ad3bb0a85d55 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 30 Oct 2020 13:59:06 +0100
+Subject: [PATCH 001/191] highmem: Don't disable preemption on RT in
+ kmap_atomic()
+
+Disabling preemption makes it impossible to acquire sleeping locks within
+kmap_atomic() section.
+For PREEMPT_RT it is sufficient to disable migration.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/highmem-internal.h | 27 ++++++++++++++++++++++-----
+ 1 file changed, 22 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
+index 7902c7d8b55f..4aa1031d3e4c 100644
+--- a/include/linux/highmem-internal.h
++++ b/include/linux/highmem-internal.h
+@@ -90,7 +90,11 @@ static inline void __kunmap_local(void *vaddr)
+
+ static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+ {
+- preempt_disable();
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ migrate_disable();
++ else
++ preempt_disable();
++
+ pagefault_disable();
+ return __kmap_local_page_prot(page, prot);
+ }
+@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct page *page)
+
+ static inline void *kmap_atomic_pfn(unsigned long pfn)
+ {
+- preempt_disable();
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ migrate_disable();
++ else
++ preempt_disable();
++
+ pagefault_disable();
+ return __kmap_local_pfn_prot(pfn, kmap_prot);
+ }
+@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void *addr)
+ {
+ kunmap_local_indexed(addr);
+ pagefault_enable();
+- preempt_enable();
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ migrate_enable();
++ else
++ preempt_enable();
+ }
+
+ unsigned int __nr_free_highpages(void);
+@@ -179,7 +190,10 @@ static inline void __kunmap_local(void *addr)
+
+ static inline void *kmap_atomic(struct page *page)
+ {
+- preempt_disable();
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ migrate_disable();
++ else
++ preempt_disable();
+ pagefault_disable();
+ return page_address(page);
+ }
+@@ -200,7 +214,10 @@ static inline void __kunmap_atomic(void *addr)
+ kunmap_flush_on_unmap(addr);
+ #endif
+ pagefault_enable();
+- preempt_enable();
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ migrate_enable();
++ else
++ preempt_enable();
+ }
+
+ static inline unsigned int nr_free_highpages(void) { return 0; }
+--
+2.19.1
+
diff --git a/features/rt/irqtime-Make-accounting-correct-on-RT.patch b/features/rt/irqtime-Make-accounting-correct-on-RT.patch
new file mode 100644
index 00000000..bb84cb23
--- /dev/null
+++ b/features/rt/irqtime-Make-accounting-correct-on-RT.patch
@@ -0,0 +1,53 @@
+From 02a22f4cb3638326b34be9b98415ab6f2718f071 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 9 Mar 2021 09:55:54 +0100
+Subject: [PATCH 056/191] irqtime: Make accounting correct on RT
+
+vtime_account_irq and irqtime_account_irq() base checks on preempt_count()
+which fails on RT because preempt_count() does not contain the softirq
+accounting which is seperate on RT.
+
+These checks do not need the full preempt count as they only operate on the
+hard and softirq sections.
+
+Use irq_count() instead which provides the correct value on both RT and non
+RT kernels. The compiler is clever enough to fold the masking for !RT:
+
+ 99b: 65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax
+ - 9a2: 25 ff ff ff 7f and $0x7fffffff,%eax
+ + 9a2: 25 00 ff ff 00 and $0xffff00,%eax
+
+Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/cputime.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
+index 5f611658eeab..2c36a5fad589 100644
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -60,7 +60,7 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
+ cpu = smp_processor_id();
+ delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
+ irqtime->irq_start_time += delta;
+- pc = preempt_count() - offset;
++ pc = irq_count() - offset;
+
+ /*
+ * We do not account for softirq time from ksoftirqd here.
+@@ -421,7 +421,7 @@ void vtime_task_switch(struct task_struct *prev)
+
+ void vtime_account_irq(struct task_struct *tsk, unsigned int offset)
+ {
+- unsigned int pc = preempt_count() - offset;
++ unsigned int pc = irq_count() - offset;
+
+ if (pc & HARDIRQ_OFFSET) {
+ vtime_account_hardirq(tsk);
+--
+2.19.1
+
diff --git a/features/rt/irqwork-push-most-work-into-softirq-context.patch b/features/rt/irqwork-push-most-work-into-softirq-context.patch
new file mode 100644
index 00000000..0b69634b
--- /dev/null
+++ b/features/rt/irqwork-push-most-work-into-softirq-context.patch
@@ -0,0 +1,200 @@
+From 79182fbaac5c336ea2aadecca97ad3e96ba2d2f0 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 23 Jun 2015 15:32:51 +0200
+Subject: [PATCH 144/191] irqwork: push most work into softirq context
+
+Initially we defered all irqwork into softirq because we didn't want the
+latency spikes if perf or another user was busy and delayed the RT task.
+The NOHZ trigger (nohz_full_kick_work) was the first user that did not work
+as expected if it did not run in the original irqwork context so we had to
+bring it back somehow for it. push_irq_work_func is the second one that
+requires this.
+
+This patch adds the IRQ_WORK_HARD_IRQ which makes sure the callback runs
+in raw-irq context. Everything else is defered into softirq context. Without
+-RT we have the orignal behavior.
+
+This patch incorporates tglx orignal work which revoked a little bringing back
+the arch_irq_work_raise() if possible and a few fixes from Steven Rostedt and
+Mike Galbraith,
+
+[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a
+ hard and soft variant]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/irq_work.h | 6 ++++
+ kernel/irq_work.c | 69 ++++++++++++++++++++++++++++++++--------
+ kernel/sched/topology.c | 3 +-
+ kernel/time/timer.c | 2 ++
+ 4 files changed, 66 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
+index ec2a47a81e42..dbbef9089789 100644
+--- a/include/linux/irq_work.h
++++ b/include/linux/irq_work.h
+@@ -64,4 +64,10 @@ static inline void irq_work_run(void) { }
+ static inline void irq_work_single(void *arg) { }
+ #endif
+
++#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT)
++void irq_work_tick_soft(void);
++#else
++static inline void irq_work_tick_soft(void) { }
++#endif
++
+ #endif /* _LINUX_IRQ_WORK_H */
+diff --git a/kernel/irq_work.c b/kernel/irq_work.c
+index e8da1e71583a..c3455910196f 100644
+--- a/kernel/irq_work.c
++++ b/kernel/irq_work.c
+@@ -18,6 +18,7 @@
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
+ #include <linux/smp.h>
++#include <linux/interrupt.h>
+ #include <asm/processor.h>
+
+
+@@ -52,13 +53,27 @@ void __weak arch_irq_work_raise(void)
+ /* Enqueue on current CPU, work must already be claimed and preempt disabled */
+ static void __irq_work_queue_local(struct irq_work *work)
+ {
+- /* If the work is "lazy", handle it from next tick if any */
+- if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
+- if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
+- tick_nohz_tick_stopped())
+- arch_irq_work_raise();
+- } else {
+- if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
++ struct llist_head *list;
++ bool lazy_work;
++ int work_flags;
++
++ work_flags = atomic_read(&work->node.a_flags);
++ if (work_flags & IRQ_WORK_LAZY)
++ lazy_work = true;
++ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++ !(work_flags & IRQ_WORK_HARD_IRQ))
++ lazy_work = true;
++ else
++ lazy_work = false;
++
++ if (lazy_work)
++ list = this_cpu_ptr(&lazy_list);
++ else
++ list = this_cpu_ptr(&raised_list);
++
++ if (llist_add(&work->node.llist, list)) {
++ /* If the work is "lazy", handle it from next tick if any */
++ if (!lazy_work || tick_nohz_tick_stopped())
+ arch_irq_work_raise();
+ }
+ }
+@@ -102,7 +117,14 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
+ if (cpu != smp_processor_id()) {
+ /* Arch remote IPI send/receive backend aren't NMI safe */
+ WARN_ON_ONCE(in_nmi());
+- __smp_call_single_queue(cpu, &work->node.llist);
++
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
++ if (llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
++ /* && tick_nohz_tick_stopped_cpu(cpu) */
++ arch_send_call_function_single_ipi(cpu);
++ } else {
++ __smp_call_single_queue(cpu, &work->node.llist);
++ }
+ } else {
+ __irq_work_queue_local(work);
+ }
+@@ -120,9 +142,8 @@ bool irq_work_needs_cpu(void)
+ raised = this_cpu_ptr(&raised_list);
+ lazy = this_cpu_ptr(&lazy_list);
+
+- if (llist_empty(raised) || arch_irq_work_has_interrupt())
+- if (llist_empty(lazy))
+- return false;
++ if (llist_empty(raised) && llist_empty(lazy))
++ return false;
+
+ /* All work should have been flushed before going offline */
+ WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
+@@ -165,8 +186,12 @@ static void irq_work_run_list(struct llist_head *list)
+ struct irq_work *work, *tmp;
+ struct llist_node *llnode;
+
++#ifndef CONFIG_PREEMPT_RT
++ /*
++ * nort: On RT IRQ-work may run in SOFTIRQ context.
++ */
+ BUG_ON(!irqs_disabled());
+-
++#endif
+ if (llist_empty(list))
+ return;
+
+@@ -182,7 +207,16 @@ static void irq_work_run_list(struct llist_head *list)
+ void irq_work_run(void)
+ {
+ irq_work_run_list(this_cpu_ptr(&raised_list));
+- irq_work_run_list(this_cpu_ptr(&lazy_list));
++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
++ /*
++ * NOTE: we raise softirq via IPI for safety,
++ * and execute in irq_work_tick() to move the
++ * overhead from hard to soft irq context.
++ */
++ if (!llist_empty(this_cpu_ptr(&lazy_list)))
++ raise_softirq(TIMER_SOFTIRQ);
++ } else
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
+ }
+ EXPORT_SYMBOL_GPL(irq_work_run);
+
+@@ -192,8 +226,17 @@ void irq_work_tick(void)
+
+ if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
+ irq_work_run_list(raised);
++
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
++}
++
++#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT)
++void irq_work_tick_soft(void)
++{
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
+ }
++#endif
+
+ /*
+ * Synchronize against the irq_work @entry, ensures the entry is not
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index 09d35044bd88..6f7c0f493be3 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -526,7 +526,8 @@ static int init_rootdomain(struct root_domain *rd)
+ #ifdef HAVE_RT_PUSH_IPI
+ rd->rto_cpu = -1;
+ raw_spin_lock_init(&rd->rto_lock);
+- init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
++// init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
++ rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
+ #endif
+
+ rd->visit_gen = 0;
+diff --git a/kernel/time/timer.c b/kernel/time/timer.c
+index a0ec4450b1d8..4f7602724f9a 100644
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -1757,6 +1757,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
+ {
+ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+
++ irq_work_tick_soft();
++
+ __run_timers(base);
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
+ __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
+--
+2.19.1
+
diff --git a/features/rt/jump-label-disable-if-stop_machine-is-used.patch b/features/rt/jump-label-disable-if-stop_machine-is-used.patch
new file mode 100644
index 00000000..7ed59db6
--- /dev/null
+++ b/features/rt/jump-label-disable-if-stop_machine-is-used.patch
@@ -0,0 +1,41 @@
+From 14fd102dd9a45ac6a7e986617ed27fcdcd6f860f Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 8 Jul 2015 17:14:48 +0200
+Subject: [PATCH 169/191] jump-label: disable if stop_machine() is used
+
+Some architectures are using stop_machine() while switching the opcode which
+leads to latency spikes.
+The architectures which use stop_machine() atm:
+- ARM stop machine
+- s390 stop machine
+
+The architecures which use other sorcery:
+- MIPS
+- X86
+- powerpc
+- sparc
+- arm64
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bigeasy: only ARM for now]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index 5179eb3a35d5..fe49c7cb76bd 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -67,7 +67,7 @@ config ARM
+ select HARDIRQS_SW_RESEND
+ select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
+ select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
++ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
+ select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
+ select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
+ select HAVE_ARCH_MMAP_RND_BITS if MMU
+--
+2.19.1
+
diff --git a/features/rt/kconfig-Disable-config-options-which-are-not-RT-comp.patch b/features/rt/kconfig-Disable-config-options-which-are-not-RT-comp.patch
new file mode 100644
index 00000000..3bb7f987
--- /dev/null
+++ b/features/rt/kconfig-Disable-config-options-which-are-not-RT-comp.patch
@@ -0,0 +1,29 @@
+From 6e80f68f3fae004cd7506a5584ed2e5216bacd62 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 24 Jul 2011 12:11:43 +0200
+Subject: [PATCH 093/191] kconfig: Disable config options which are not RT
+ compatible
+
+Disable stuff which is known to have issues on RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/Kconfig b/mm/Kconfig
+index 24c045b24b95..ec06984ec4ac 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -387,7 +387,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
+
+ config TRANSPARENT_HUGEPAGE
+ bool "Transparent Hugepage Support"
+- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
++ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT
+ select COMPACTION
+ select XARRAY_MULTI
+ help
+--
+2.19.1
+
diff --git a/features/rt/kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch b/features/rt/kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch
new file mode 100644
index 00000000..2cb22e3a
--- /dev/null
+++ b/features/rt/kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch
@@ -0,0 +1,109 @@
+From 4dcdff69e55d504a126793e1126d5d475b8c40b0 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 18 Feb 2021 18:31:24 +0100
+Subject: [PATCH 034/191] kcov: Remove kcov include from sched.h and move it to
+ its users.
+
+The recent addition of in_serving_softirq() to kconv.h results in
+compile failure on PREEMPT_RT because it requires
+task_struct::softirq_disable_cnt. This is not available if kconv.h is
+included from sched.h.
+
+It is not needed to include kconv.h from sched.h. All but the net/ user
+already include the kconv header file.
+
+Move the include of the kconv.h header from sched.h it its users.
+Additionally include sched.h from kconv.h to ensure that everything
+task_struct related is available.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Acked-by: Andrey Konovalov <andreyknvl@google.com>
+Link: https://lkml.kernel.org/r/20210218173124.iy5iyqv3a4oia4vv@linutronix.de
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/usb/usbip/usbip_common.h | 1 +
+ include/linux/kcov.h | 1 +
+ include/linux/sched.h | 1 -
+ net/core/skbuff.c | 1 +
+ net/mac80211/iface.c | 1 +
+ net/mac80211/rx.c | 1 +
+ 6 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
+index d60ce17d3dd2..a7dd6c66aee5 100644
+--- a/drivers/usb/usbip/usbip_common.h
++++ b/drivers/usb/usbip/usbip_common.h
+@@ -18,6 +18,7 @@
+ #include <linux/usb.h>
+ #include <linux/wait.h>
+ #include <linux/sched/task.h>
++#include <linux/kcov.h>
+ #include <uapi/linux/usbip.h>
+
+ #undef pr_fmt
+diff --git a/include/linux/kcov.h b/include/linux/kcov.h
+index 4e3037dc1204..55dc338f6bcd 100644
+--- a/include/linux/kcov.h
++++ b/include/linux/kcov.h
+@@ -2,6 +2,7 @@
+ #ifndef _LINUX_KCOV_H
+ #define _LINUX_KCOV_H
+
++#include <linux/sched.h>
+ #include <uapi/linux/kcov.h>
+
+ struct task_struct;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index ef00bb22164c..cf245bc237e7 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -14,7 +14,6 @@
+ #include <linux/pid.h>
+ #include <linux/sem.h>
+ #include <linux/shm.h>
+-#include <linux/kcov.h>
+ #include <linux/mutex.h>
+ #include <linux/plist.h>
+ #include <linux/hrtimer.h>
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index c421c8f80925..4275b88726f4 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -60,6 +60,7 @@
+ #include <linux/prefetch.h>
+ #include <linux/if_vlan.h>
+ #include <linux/mpls.h>
++#include <linux/kcov.h>
+
+ #include <net/protocol.h>
+ #include <net/dst.h>
+diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
+index b80c9b016b2b..c127debdc12e 100644
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -15,6 +15,7 @@
+ #include <linux/if_arp.h>
+ #include <linux/netdevice.h>
+ #include <linux/rtnetlink.h>
++#include <linux/kcov.h>
+ #include <net/mac80211.h>
+ #include <net/ieee80211_radiotap.h>
+ #include "ieee80211_i.h"
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index c1343c028b76..62047e93e217 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -17,6 +17,7 @@
+ #include <linux/etherdevice.h>
+ #include <linux/rcupdate.h>
+ #include <linux/export.h>
++#include <linux/kcov.h>
+ #include <linux/bitops.h>
+ #include <net/mac80211.h>
+ #include <net/ieee80211_radiotap.h>
+--
+2.19.1
+
diff --git a/features/rt/kernel-sched-add-put-get-_cpu_light.patch b/features/rt/kernel-sched-add-put-get-_cpu_light.patch
new file mode 100644
index 00000000..8cb9d552
--- /dev/null
+++ b/features/rt/kernel-sched-add-put-get-_cpu_light.patch
@@ -0,0 +1,27 @@
+From 7497b8dc12591dd237a63c9748b71666d48a2eb4 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Sat, 27 May 2017 19:02:06 +0200
+Subject: [PATCH 101/191] kernel/sched: add {put|get}_cpu_light()
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/smp.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/include/linux/smp.h b/include/linux/smp.h
+index 70c6f6284dcf..4c602ca3bc13 100644
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -238,6 +238,9 @@ static inline int get_boot_cpu_id(void)
+ #define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
+ #define put_cpu() preempt_enable()
+
++#define get_cpu_light() ({ migrate_disable(); __smp_processor_id(); })
++#define put_cpu_light() migrate_enable()
++
+ /*
+ * Callback to arch code if there's nosmp or maxcpus=0 on the
+ * boot command line:
+--
+2.19.1
+
diff --git a/features/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch b/features/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch
new file mode 100644
index 00000000..085edde5
--- /dev/null
+++ b/features/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch
@@ -0,0 +1,80 @@
+From cb9ddd5eec36831a80572aa3d33be62eff791e74 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 21 Nov 2016 19:31:08 +0100
+Subject: [PATCH 121/191] kernel/sched: move stack + kprobe clean up to
+ __put_task_struct()
+
+There is no need to free the stack before the task struct (except for reasons
+mentioned in commit 68f24b08ee89 ("sched/core: Free the stack early if
+CONFIG_THREAD_INFO_IN_TASK")). This also comes handy on -RT because we can't
+free memory in preempt disabled region.
+vfree_atomic() delays the memory cleanup to a worker. Since we move everything
+to the RCU callback, we can also free it immediately.
+
+Cc: stable-rt@vger.kernel.org #for kprobe_flush_task()
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/fork.c | 12 +++++++++++-
+ kernel/sched/core.c | 9 ---------
+ 2 files changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/fork.c b/kernel/fork.c
+index da1b307cbf73..5fdb0a1bbad8 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -42,6 +42,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/fs.h>
+ #include <linux/mm.h>
++#include <linux/kprobes.h>
+ #include <linux/vmacache.h>
+ #include <linux/nsproxy.h>
+ #include <linux/capability.h>
+@@ -288,7 +289,7 @@ static inline void free_thread_stack(struct task_struct *tsk)
+ return;
+ }
+
+- vfree_atomic(tsk->stack);
++ vfree(tsk->stack);
+ return;
+ }
+ #endif
+@@ -743,6 +744,15 @@ void __put_task_struct(struct task_struct *tsk)
+ WARN_ON(refcount_read(&tsk->usage));
+ WARN_ON(tsk == current);
+
++ /*
++ * Remove function-return probe instances associated with this
++ * task and put them back on the free list.
++ */
++ kprobe_flush_task(tsk);
++
++ /* Task is done with its stack. */
++ put_task_stack(tsk);
++
+ io_uring_free(tsk);
+ cgroup_free(tsk);
+ task_numa_free(tsk, true);
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 9ec24e4188f4..2069022bdce5 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4282,15 +4282,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
+ if (prev->sched_class->task_dead)
+ prev->sched_class->task_dead(prev);
+
+- /*
+- * Remove function-return probe instances associated with this
+- * task and put them back on the free list.
+- */
+- kprobe_flush_task(prev);
+-
+- /* Task is done with its stack. */
+- put_task_stack(prev);
+-
+ put_task_struct_rcu_user(prev);
+ }
+
+--
+2.19.1
+
diff --git a/features/rt/kthread-Move-prio-affinite-change-into-the-newly-cre.patch b/features/rt/kthread-Move-prio-affinite-change-into-the-newly-cre.patch
new file mode 100644
index 00000000..ee7fef7e
--- /dev/null
+++ b/features/rt/kthread-Move-prio-affinite-change-into-the-newly-cre.patch
@@ -0,0 +1,85 @@
+From 278ed17d1f399724a529b1b7c3730b364f136e16 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 9 Nov 2020 21:30:41 +0100
+Subject: [PATCH 003/191] kthread: Move prio/affinite change into the newly
+ created thread
+
+With enabled threaded interrupts the nouveau driver reported the
+following:
+| Chain exists of:
+| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem
+|
+| Possible unsafe locking scenario:
+|
+| CPU0 CPU1
+| ---- ----
+| lock(&cpuset_rwsem);
+| lock(&device->mutex);
+| lock(&cpuset_rwsem);
+| lock(&mm->mmap_lock#2);
+
+The device->mutex is nvkm_device::mutex.
+
+Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing
+to do.
+Move the priority reset to the start of the newly created thread.
+
+Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()")
+Reported-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de
+---
+ kernel/kthread.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/kernel/kthread.c b/kernel/kthread.c
+index 1578973c5740..bb0602597ffd 100644
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -243,6 +243,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme);
+
+ static int kthread(void *_create)
+ {
++ static const struct sched_param param = { .sched_priority = 0 };
+ /* Copy data: it's on kthread's stack */
+ struct kthread_create_info *create = _create;
+ int (*threadfn)(void *data) = create->threadfn;
+@@ -273,6 +274,13 @@ static int kthread(void *_create)
+ init_completion(&self->parked);
+ current->vfork_done = &self->exited;
+
++ /*
++ * The new thread inherited kthreadd's priority and CPU mask. Reset
++ * back to default in case they have been changed.
++ */
++ sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
++ set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
++
+ /* OK, tell user we're spawned, wait for stop or wakeup */
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ create->result = current;
+@@ -370,7 +378,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+ }
+ task = create->result;
+ if (!IS_ERR(task)) {
+- static const struct sched_param param = { .sched_priority = 0 };
+ char name[TASK_COMM_LEN];
+
+ /*
+@@ -379,13 +386,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+ */
+ vsnprintf(name, sizeof(name), namefmt, args);
+ set_task_comm(task, name);
+- /*
+- * root may have changed our (kthreadd's) priority or CPU mask.
+- * The kernel thread should not inherit these properties.
+- */
+- sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
+- set_cpus_allowed_ptr(task,
+- housekeeping_cpumask(HK_FLAG_KTHREAD));
+ }
+ kfree(create);
+ return task;
+--
+2.19.1
+
diff --git a/features/rt/leds-trigger-disable-CPU-trigger-on-RT.patch b/features/rt/leds-trigger-disable-CPU-trigger-on-RT.patch
new file mode 100644
index 00000000..c0182d45
--- /dev/null
+++ b/features/rt/leds-trigger-disable-CPU-trigger-on-RT.patch
@@ -0,0 +1,39 @@
+From fc70c36c08df5b41b07e2c8aa2833f78c0b5b090 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 23 Jan 2014 14:45:59 +0100
+Subject: [PATCH 170/191] leds: trigger: disable CPU trigger on -RT
+
+as it triggers:
+|CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.8-rt10 #141
+|[<c0014aa4>] (unwind_backtrace+0x0/0xf8) from [<c0012788>] (show_stack+0x1c/0x20)
+|[<c0012788>] (show_stack+0x1c/0x20) from [<c043c8dc>] (dump_stack+0x20/0x2c)
+|[<c043c8dc>] (dump_stack+0x20/0x2c) from [<c004c5e8>] (__might_sleep+0x13c/0x170)
+|[<c004c5e8>] (__might_sleep+0x13c/0x170) from [<c043f270>] (__rt_spin_lock+0x28/0x38)
+|[<c043f270>] (__rt_spin_lock+0x28/0x38) from [<c043fa00>] (rt_read_lock+0x68/0x7c)
+|[<c043fa00>] (rt_read_lock+0x68/0x7c) from [<c036cf74>] (led_trigger_event+0x2c/0x5c)
+|[<c036cf74>] (led_trigger_event+0x2c/0x5c) from [<c036e0bc>] (ledtrig_cpu+0x54/0x5c)
+|[<c036e0bc>] (ledtrig_cpu+0x54/0x5c) from [<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c)
+|[<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c) from [<c00590b8>] (cpu_startup_entry+0xa8/0x234)
+|[<c00590b8>] (cpu_startup_entry+0xa8/0x234) from [<c043b2cc>] (rest_init+0xb8/0xe0)
+|[<c043b2cc>] (rest_init+0xb8/0xe0) from [<c061ebe0>] (start_kernel+0x2c4/0x380)
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/leds/trigger/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
+index b77a01bd27f4..aa74e2a05798 100644
+--- a/drivers/leds/trigger/Kconfig
++++ b/drivers/leds/trigger/Kconfig
+@@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT
+
+ config LEDS_TRIGGER_CPU
+ bool "LED CPU Trigger"
++ depends on !PREEMPT_RT
+ help
+ This allows LEDs to be controlled by active CPUs. This shows
+ the active CPUs across an array of LEDs so you can see which
+--
+2.19.1
+
diff --git a/features/rt/lockdep-Make-it-RT-aware.patch b/features/rt/lockdep-Make-it-RT-aware.patch
new file mode 100644
index 00000000..7b9c216e
--- /dev/null
+++ b/features/rt/lockdep-Make-it-RT-aware.patch
@@ -0,0 +1,77 @@
+From 41fd5cd7a4baa5c51e83bd2637358791447960a7 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 18:51:23 +0200
+Subject: [PATCH 151/191] lockdep: Make it RT aware
+
+teach lockdep that we don't really do softirqs on -RT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/irqflags.h | 23 +++++++++++++++--------
+ kernel/locking/lockdep.c | 2 ++
+ 2 files changed, 17 insertions(+), 8 deletions(-)
+
+diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
+index 600c10da321a..4b140938b03e 100644
+--- a/include/linux/irqflags.h
++++ b/include/linux/irqflags.h
+@@ -71,14 +71,6 @@ do { \
+ do { \
+ __this_cpu_dec(hardirq_context); \
+ } while (0)
+-# define lockdep_softirq_enter() \
+-do { \
+- current->softirq_context++; \
+-} while (0)
+-# define lockdep_softirq_exit() \
+-do { \
+- current->softirq_context--; \
+-} while (0)
+
+ # define lockdep_hrtimer_enter(__hrtimer) \
+ ({ \
+@@ -140,6 +132,21 @@ do { \
+ # define lockdep_irq_work_exit(__work) do { } while (0)
+ #endif
+
++#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT)
++# define lockdep_softirq_enter() \
++do { \
++ current->softirq_context++; \
++} while (0)
++# define lockdep_softirq_exit() \
++do { \
++ current->softirq_context--; \
++} while (0)
++
++#else
++# define lockdep_softirq_enter() do { } while (0)
++# define lockdep_softirq_exit() do { } while (0)
++#endif
++
+ #if defined(CONFIG_IRQSOFF_TRACER) || \
+ defined(CONFIG_PREEMPT_TRACER)
+ extern void stop_critical_timings(void);
+diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
+index ac177ade7d9b..82fd5cacc492 100644
+--- a/kernel/locking/lockdep.c
++++ b/kernel/locking/lockdep.c
+@@ -5360,6 +5360,7 @@ static noinstr void check_flags(unsigned long flags)
+ }
+ }
+
++#ifndef CONFIG_PREEMPT_RT
+ /*
+ * We dont accurately track softirq state in e.g.
+ * hardirq contexts (such as on 4KSTACKS), so only
+@@ -5374,6 +5375,7 @@ static noinstr void check_flags(unsigned long flags)
+ DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+ }
+ }
++#endif
+
+ if (!debug_locks)
+ print_irqtrace_events(current);
+--
+2.19.1
+
diff --git a/features/rt/lockdep-Reduce-header-files-in-debug_locks.h.patch b/features/rt/lockdep-Reduce-header-files-in-debug_locks.h.patch
new file mode 100644
index 00000000..24d5c811
--- /dev/null
+++ b/features/rt/lockdep-Reduce-header-files-in-debug_locks.h.patch
@@ -0,0 +1,32 @@
+From b127ff5cf25163e47794c314b66398cef8fec54f Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 14 Aug 2020 16:55:25 +0200
+Subject: [PATCH 070/191] lockdep: Reduce header files in debug_locks.h
+
+The inclusion of printk.h leads to circular dependency if spinlock_t is
+based on rt_mutex.
+
+Include only atomic.h (xchg()) and cache.h (__read_mostly).
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/debug_locks.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
+index 2915f56ad421..5a9e3e3769ce 100644
+--- a/include/linux/debug_locks.h
++++ b/include/linux/debug_locks.h
+@@ -3,8 +3,7 @@
+ #define __LINUX_DEBUG_LOCKING_H
+
+ #include <linux/atomic.h>
+-#include <linux/bug.h>
+-#include <linux/printk.h>
++#include <linux/cache.h>
+
+ struct task_struct;
+
+--
+2.19.1
+
diff --git a/features/rt/lockdep-disable-self-test.patch b/features/rt/lockdep-disable-self-test.patch
new file mode 100644
index 00000000..fae3936a
--- /dev/null
+++ b/features/rt/lockdep-disable-self-test.patch
@@ -0,0 +1,34 @@
+From 87bd0ee2b2c1a55ad355c37f1143996b0c9f654d Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 17 Oct 2017 16:36:18 +0200
+Subject: [PATCH 154/191] lockdep: disable self-test
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The self-test wasn't always 100% accurate for RT. We disabled a few
+tests which failed because they had a different semantic for RT. Some
+still reported false positives. Now the selftest locks up the system
+during boot and it needs to be investigated…
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ lib/Kconfig.debug | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index 2779c29d9981..74a0a9e54fdb 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -1392,7 +1392,7 @@ config DEBUG_ATOMIC_SLEEP
+
+ config DEBUG_LOCKING_API_SELFTESTS
+ bool "Locking API boot-time self-tests"
+- depends on DEBUG_KERNEL
++ depends on DEBUG_KERNEL && !PREEMPT_RT
+ help
+ Say Y here if you want the kernel to run a short self-test during
+ bootup. The self-test checks whether common types of locking bugs
+--
+2.19.1
+
diff --git a/features/rt/lockdep-selftest-Only-do-hardirq-context-test-for-ra.patch b/features/rt/lockdep-selftest-Only-do-hardirq-context-test-for-ra.patch
new file mode 100644
index 00000000..decdc693
--- /dev/null
+++ b/features/rt/lockdep-selftest-Only-do-hardirq-context-test-for-ra.patch
@@ -0,0 +1,61 @@
+From 976fc02f2b164dce981f5e3c6a5aca9d9aba1a73 Mon Sep 17 00:00:00 2001
+From: Yong Zhang <yong.zhang@windriver.com>
+Date: Mon, 16 Apr 2012 15:01:56 +0800
+Subject: [PATCH 152/191] lockdep: selftest: Only do hardirq context test for
+ raw spinlock
+
+On -rt there is no softirq context any more and rwlock is sleepable,
+disable softirq context test and rwlock+irq test.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Yong Zhang <yong.zhang@windriver.com>
+Link: http://lkml.kernel.org/r/1334559716-18447-3-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/locking-selftest.c | 23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
+index 2d85abac1744..5ff07ae1cc67 100644
+--- a/lib/locking-selftest.c
++++ b/lib/locking-selftest.c
+@@ -2841,6 +2841,7 @@ void locking_selftest(void)
+
+ printk(" --------------------------------------------------------------------------\n");
+
++#ifndef CONFIG_PREEMPT_RT
+ /*
+ * irq-context testcases:
+ */
+@@ -2855,6 +2856,28 @@ void locking_selftest(void)
+ DO_TESTCASE_6x2x2RW("irq read-recursion #2", irq_read_recursion2);
+ DO_TESTCASE_6x2x2RW("irq read-recursion #3", irq_read_recursion3);
+
++#else
++ /* On -rt, we only do hardirq context test for raw spinlock */
++ DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
++ DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
++
++ DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
++ DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
++
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
++
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
++#endif
+ ww_tests();
+
+ force_read_lock_recursive = 0;
+--
+2.19.1
+
diff --git a/features/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch b/features/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch
new file mode 100644
index 00000000..a0354b31
--- /dev/null
+++ b/features/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch
@@ -0,0 +1,148 @@
+From d161010c471cc0d0b18c8875379b75194cd124e8 Mon Sep 17 00:00:00 2001
+From: Josh Cartwright <josh.cartwright@ni.com>
+Date: Wed, 28 Jan 2015 13:08:45 -0600
+Subject: [PATCH 153/191] lockdep: selftest: fix warnings due to missing
+ PREEMPT_RT conditionals
+
+"lockdep: Selftest: Only do hardirq context test for raw spinlock"
+disabled the execution of certain tests with PREEMPT_RT, but did
+not prevent the tests from still being defined. This leads to warnings
+like:
+
+ ./linux/lib/locking-selftest.c:574:1: warning: 'irqsafe1_hard_rlock_12' defined but not used [-Wunused-function]
+ ./linux/lib/locking-selftest.c:574:1: warning: 'irqsafe1_hard_rlock_21' defined but not used [-Wunused-function]
+ ./linux/lib/locking-selftest.c:577:1: warning: 'irqsafe1_hard_wlock_12' defined but not used [-Wunused-function]
+ ./linux/lib/locking-selftest.c:577:1: warning: 'irqsafe1_hard_wlock_21' defined but not used [-Wunused-function]
+ ./linux/lib/locking-selftest.c:580:1: warning: 'irqsafe1_soft_spin_12' defined but not used [-Wunused-function]
+ ...
+
+Fixed by wrapping the test definitions in #ifndef CONFIG_PREEMPT_RT
+conditionals.
+
+Signed-off-by: Josh Cartwright <josh.cartwright@ni.com>
+Signed-off-by: Xander Huff <xander.huff@ni.com>
+Acked-by: Gratian Crisan <gratian.crisan@ni.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ lib/locking-selftest.c | 28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
+index 5ff07ae1cc67..3d2d99d8ed13 100644
+--- a/lib/locking-selftest.c
++++ b/lib/locking-selftest.c
+@@ -794,6 +794,8 @@ GENERATE_TESTCASE(init_held_rtmutex);
+ #include "locking-selftest-spin-hardirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
+
++#ifndef CONFIG_PREEMPT_RT
++
+ #include "locking-selftest-rlock-hardirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
+
+@@ -809,9 +811,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
+
++#endif
++
+ #undef E1
+ #undef E2
+
++#ifndef CONFIG_PREEMPT_RT
+ /*
+ * Enabling hardirqs with a softirq-safe lock held:
+ */
+@@ -844,6 +849,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+ #undef E1
+ #undef E2
+
++#endif
++
+ /*
+ * Enabling irqs with an irq-safe lock held:
+ */
+@@ -867,6 +874,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+ #include "locking-selftest-spin-hardirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
+
++#ifndef CONFIG_PREEMPT_RT
++
+ #include "locking-selftest-rlock-hardirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
+
+@@ -882,6 +891,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+
++#endif
++
+ #undef E1
+ #undef E2
+
+@@ -913,6 +924,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+ #include "locking-selftest-spin-hardirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
+
++#ifndef CONFIG_PREEMPT_RT
++
+ #include "locking-selftest-rlock-hardirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
+
+@@ -928,6 +941,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+
++#endif
++
+ #undef E1
+ #undef E2
+ #undef E3
+@@ -961,6 +976,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+ #include "locking-selftest-spin-hardirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
+
++#ifndef CONFIG_PREEMPT_RT
++
+ #include "locking-selftest-rlock-hardirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
+
+@@ -976,10 +993,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
+
++#endif
++
+ #undef E1
+ #undef E2
+ #undef E3
+
++#ifndef CONFIG_PREEMPT_RT
++
+ /*
+ * read-lock / write-lock irq inversion.
+ *
+@@ -1169,6 +1190,11 @@ GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3_R3W1)
+ #undef E1
+ #undef E2
+ #undef E3
++
++#endif
++
++#ifndef CONFIG_PREEMPT_RT
++
+ /*
+ * read-lock / write-lock recursion that is actually safe.
+ */
+@@ -1215,6 +1241,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
+ #undef E2
+ #undef E3
+
++#endif
++
+ /*
+ * read-lock / write-lock recursion that is unsafe.
+ */
+--
+2.19.1
+
diff --git a/features/rt/locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch b/features/rt/locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch
new file mode 100644
index 00000000..665a4bb1
--- /dev/null
+++ b/features/rt/locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch
@@ -0,0 +1,125 @@
+From 634b7dbcbf43a51f9c149ff014b99cf95ec798d6 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 19 Nov 2019 09:25:04 +0100
+Subject: [PATCH 131/191] locking: Make spinlock_t and rwlock_t a RCU section
+ on RT
+
+On !RT a locked spinlock_t and rwlock_t disables preemption which
+implies a RCU read section. There is code that relies on that behaviour.
+
+Add an explicit RCU read section on RT while a sleeping lock (a lock
+which would disables preemption on !RT) acquired.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 6 ++++++
+ kernel/locking/rwlock-rt.c | 6 ++++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 4cd9d6c4cd68..d4da971759fb 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1118,6 +1118,7 @@ void __lockfunc rt_spin_lock(spinlock_t *lock)
+ {
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ EXPORT_SYMBOL(rt_spin_lock);
+@@ -1132,6 +1133,7 @@ void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
+ {
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ EXPORT_SYMBOL(rt_spin_lock_nested);
+@@ -1141,6 +1143,7 @@ void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock,
+ {
+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ EXPORT_SYMBOL(rt_spin_lock_nest_lock);
+@@ -1151,6 +1154,7 @@ void __lockfunc rt_spin_unlock(spinlock_t *lock)
+ /* NOTE: we always pass in '1' for nested, for simplicity */
+ spin_release(&lock->dep_map, _RET_IP_);
+ migrate_enable();
++ rcu_read_unlock();
+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
+ }
+ EXPORT_SYMBOL(rt_spin_unlock);
+@@ -1180,6 +1184,7 @@ int __lockfunc rt_spin_trylock(spinlock_t *lock)
+ ret = __rt_mutex_trylock(&lock->lock);
+ if (ret) {
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ return ret;
+@@ -1194,6 +1199,7 @@ int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
+ ret = __rt_mutex_trylock(&lock->lock);
+ if (ret) {
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++ rcu_read_lock();
+ migrate_disable();
+ } else {
+ local_bh_enable();
+diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c
+index 948d10214ee2..cc8bc2ef4ba2 100644
+--- a/kernel/locking/rwlock-rt.c
++++ b/kernel/locking/rwlock-rt.c
+@@ -270,6 +270,7 @@ int __lockfunc rt_read_trylock(rwlock_t *rwlock)
+ ret = __read_rt_trylock(rwlock);
+ if (ret) {
+ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ return ret;
+@@ -283,6 +284,7 @@ int __lockfunc rt_write_trylock(rwlock_t *rwlock)
+ ret = __write_rt_trylock(rwlock);
+ if (ret) {
+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ return ret;
+@@ -293,6 +295,7 @@ void __lockfunc rt_read_lock(rwlock_t *rwlock)
+ {
+ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+ __read_rt_lock(rwlock);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ EXPORT_SYMBOL(rt_read_lock);
+@@ -301,6 +304,7 @@ void __lockfunc rt_write_lock(rwlock_t *rwlock)
+ {
+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
+ __write_rt_lock(rwlock);
++ rcu_read_lock();
+ migrate_disable();
+ }
+ EXPORT_SYMBOL(rt_write_lock);
+@@ -309,6 +313,7 @@ void __lockfunc rt_read_unlock(rwlock_t *rwlock)
+ {
+ rwlock_release(&rwlock->dep_map, _RET_IP_);
+ migrate_enable();
++ rcu_read_unlock();
+ __read_rt_unlock(rwlock);
+ }
+ EXPORT_SYMBOL(rt_read_unlock);
+@@ -317,6 +322,7 @@ void __lockfunc rt_write_unlock(rwlock_t *rwlock)
+ {
+ rwlock_release(&rwlock->dep_map, _RET_IP_);
+ migrate_enable();
++ rcu_read_unlock();
+ __write_rt_unlock(rwlock);
+ }
+ EXPORT_SYMBOL(rt_write_unlock);
+--
+2.19.1
+
diff --git a/features/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch b/features/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch
new file mode 100644
index 00000000..198af701
--- /dev/null
+++ b/features/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch
@@ -0,0 +1,165 @@
+From 4c06cc850cb5404dedbdefec937342ce96422992 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 4 Aug 2017 17:40:42 +0200
+Subject: [PATCH 103/191] locking: don't check for __LINUX_SPINLOCK_TYPES_H on
+ -RT archs
+
+Upstream uses arch_spinlock_t within spinlock_t and requests that
+spinlock_types.h header file is included first.
+On -RT we have the rt_mutex with its raw_lock wait_lock which needs
+architectures' spinlock_types.h header file for its definition. However
+we need rt_mutex first because it is used to build the spinlock_t so
+that check does not work for us.
+Therefore I am dropping that check.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/alpha/include/asm/spinlock_types.h | 4 ----
+ arch/arm/include/asm/spinlock_types.h | 4 ----
+ arch/arm64/include/asm/spinlock_types.h | 4 ----
+ arch/hexagon/include/asm/spinlock_types.h | 4 ----
+ arch/ia64/include/asm/spinlock_types.h | 4 ----
+ arch/powerpc/include/asm/spinlock_types.h | 4 ----
+ arch/s390/include/asm/spinlock_types.h | 4 ----
+ arch/sh/include/asm/spinlock_types.h | 4 ----
+ arch/xtensa/include/asm/spinlock_types.h | 4 ----
+ 9 files changed, 36 deletions(-)
+
+diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
+index 1d5716bc060b..6883bc952d22 100644
+--- a/arch/alpha/include/asm/spinlock_types.h
++++ b/arch/alpha/include/asm/spinlock_types.h
+@@ -2,10 +2,6 @@
+ #ifndef _ALPHA_SPINLOCK_TYPES_H
+ #define _ALPHA_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
+-# error "please don't include this file directly"
+-#endif
+-
+ typedef struct {
+ volatile unsigned int lock;
+ } arch_spinlock_t;
+diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
+index 5976958647fe..a37c0803954b 100644
+--- a/arch/arm/include/asm/spinlock_types.h
++++ b/arch/arm/include/asm/spinlock_types.h
+@@ -2,10 +2,6 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
+-# error "please don't include this file directly"
+-#endif
+-
+ #define TICKET_SHIFT 16
+
+ typedef struct {
+diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
+index 18782f0c4721..6672b05350b4 100644
+--- a/arch/arm64/include/asm/spinlock_types.h
++++ b/arch/arm64/include/asm/spinlock_types.h
+@@ -5,10 +5,6 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+
+-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+-# error "please don't include this file directly"
+-#endif
+-
+ #include <asm-generic/qspinlock_types.h>
+ #include <asm-generic/qrwlock_types.h>
+
+diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h
+index 19d233497ba5..de72fb23016d 100644
+--- a/arch/hexagon/include/asm/spinlock_types.h
++++ b/arch/hexagon/include/asm/spinlock_types.h
+@@ -8,10 +8,6 @@
+ #ifndef _ASM_SPINLOCK_TYPES_H
+ #define _ASM_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
+-# error "please don't include this file directly"
+-#endif
+-
+ typedef struct {
+ volatile unsigned int lock;
+ } arch_spinlock_t;
+diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
+index 6e345fefcdca..681408d6816f 100644
+--- a/arch/ia64/include/asm/spinlock_types.h
++++ b/arch/ia64/include/asm/spinlock_types.h
+@@ -2,10 +2,6 @@
+ #ifndef _ASM_IA64_SPINLOCK_TYPES_H
+ #define _ASM_IA64_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
+-# error "please don't include this file directly"
+-#endif
+-
+ typedef struct {
+ volatile unsigned int lock;
+ } arch_spinlock_t;
+diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
+index c5d742f18021..cc6922a011ba 100644
+--- a/arch/powerpc/include/asm/spinlock_types.h
++++ b/arch/powerpc/include/asm/spinlock_types.h
+@@ -2,10 +2,6 @@
+ #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
+ #define _ASM_POWERPC_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
+-# error "please don't include this file directly"
+-#endif
+-
+ #ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+ #include <asm-generic/qspinlock_types.h>
+ #include <asm-generic/qrwlock_types.h>
+diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
+index cfed272e4fd5..8e28e8176ec8 100644
+--- a/arch/s390/include/asm/spinlock_types.h
++++ b/arch/s390/include/asm/spinlock_types.h
+@@ -2,10 +2,6 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
+-# error "please don't include this file directly"
+-#endif
+-
+ typedef struct {
+ int lock;
+ } __attribute__ ((aligned (4))) arch_spinlock_t;
+diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
+index e82369f286a2..22ca9a98bbb8 100644
+--- a/arch/sh/include/asm/spinlock_types.h
++++ b/arch/sh/include/asm/spinlock_types.h
+@@ -2,10 +2,6 @@
+ #ifndef __ASM_SH_SPINLOCK_TYPES_H
+ #define __ASM_SH_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
+-# error "please don't include this file directly"
+-#endif
+-
+ typedef struct {
+ volatile unsigned int lock;
+ } arch_spinlock_t;
+diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h
+index 64c9389254f1..dc846323b1cd 100644
+--- a/arch/xtensa/include/asm/spinlock_types.h
++++ b/arch/xtensa/include/asm/spinlock_types.h
+@@ -2,10 +2,6 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+
+-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+-# error "please don't include this file directly"
+-#endif
+-
+ #include <asm-generic/qspinlock_types.h>
+ #include <asm-generic/qrwlock_types.h>
+
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch b/features/rt/locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch
new file mode 100644
index 00000000..2e0997d6
--- /dev/null
+++ b/features/rt/locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch
@@ -0,0 +1,36 @@
+From 50f8ea4c311b4bc5100595c2c43f02754afa2798 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 2 Dec 2015 11:34:07 +0100
+Subject: [PATCH 076/191] locking/rtmutex: Allow rt_mutex_trylock() on
+ PREEMPT_RT
+
+Non PREEMPT_RT kernel can deadlock on rt_mutex_trylock() in softirq
+context.
+On PREEMPT_RT the softirq context is handled in thread context. This
+avoids the deadlock in the slow path and PI-boosting will be done on the
+correct thread.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 03ffb955b286..f0bc7fcae441 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1866,7 +1866,11 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+
+ int __sched __rt_mutex_trylock(struct rt_mutex *lock)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ if (WARN_ON_ONCE(in_irq() || in_nmi()))
++#else
+ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
++#endif
+ return 0;
+
+ return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Avoid-include-hell.patch b/features/rt/locking-rtmutex-Avoid-include-hell.patch
new file mode 100644
index 00000000..57b13749
--- /dev/null
+++ b/features/rt/locking-rtmutex-Avoid-include-hell.patch
@@ -0,0 +1,29 @@
+From 1fec4e5db16df775436f8ababd922f46ee58e7e0 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 20:06:39 +0200
+Subject: [PATCH 069/191] locking/rtmutex: Avoid include hell
+
+Include only the required raw types. This avoids pulling in the
+complete spinlock header which in turn requires rtmutex.h at some point.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rtmutex.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index 243fabc2c85f..add1dab27df5 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -15,7 +15,7 @@
+
+ #include <linux/linkage.h>
+ #include <linux/rbtree.h>
+-#include <linux/spinlock_types.h>
++#include <linux/spinlock_types_raw.h>
+
+ extern int max_lock_depth; /* for sysctl */
+
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Handle-the-various-new-futex-race-co.patch b/features/rt/locking-rtmutex-Handle-the-various-new-futex-race-co.patch
new file mode 100644
index 00000000..12a83eab
--- /dev/null
+++ b/features/rt/locking-rtmutex-Handle-the-various-new-futex-race-co.patch
@@ -0,0 +1,255 @@
+From d8b0c4e14162b18ee6a955f443a6a1b3fa8473e1 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 10 Jun 2011 11:04:15 +0200
+Subject: [PATCH 065/191] locking/rtmutex: Handle the various new futex race
+ conditions
+
+RT opens a few new interesting race conditions in the rtmutex/futex
+combo due to futex hash bucket lock being a 'sleeping' spinlock and
+therefor not disabling preemption.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c | 78 ++++++++++++++++++++++++++-------
+ kernel/locking/rtmutex.c | 36 ++++++++++++---
+ kernel/locking/rtmutex_common.h | 2 +
+ 3 files changed, 95 insertions(+), 21 deletions(-)
+
+diff --git a/kernel/futex.c b/kernel/futex.c
+index e68db7745039..0315333c0587 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2154,6 +2154,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+ */
+ requeue_pi_wake_futex(this, &key2, hb2);
+ continue;
++ } else if (ret == -EAGAIN) {
++ /*
++ * Waiter was woken by timeout or
++ * signal and has set pi_blocked_on to
++ * PI_WAKEUP_INPROGRESS before we
++ * tried to enqueue it on the rtmutex.
++ */
++ this->pi_state = NULL;
++ put_pi_state(pi_state);
++ continue;
+ } else if (ret) {
+ /*
+ * rt_mutex_start_proxy_lock() detected a
+@@ -3171,7 +3181,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ {
+ struct hrtimer_sleeper timeout, *to;
+ struct rt_mutex_waiter rt_waiter;
+- struct futex_hash_bucket *hb;
++ struct futex_hash_bucket *hb, *hb2;
+ union futex_key key2 = FUTEX_KEY_INIT;
+ struct futex_q q = futex_q_init;
+ int res, ret;
+@@ -3223,20 +3233,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ /* Queue the futex_q, drop the hb lock, wait for wakeup. */
+ futex_wait_queue_me(hb, &q, to);
+
+- spin_lock(&hb->lock);
+- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+- spin_unlock(&hb->lock);
+- if (ret)
+- goto out;
++ /*
++ * On RT we must avoid races with requeue and trying to block
++ * on two mutexes (hb->lock and uaddr2's rtmutex) by
++ * serializing access to pi_blocked_on with pi_lock.
++ */
++ raw_spin_lock_irq(&current->pi_lock);
++ if (current->pi_blocked_on) {
++ /*
++ * We have been requeued or are in the process of
++ * being requeued.
++ */
++ raw_spin_unlock_irq(&current->pi_lock);
++ } else {
++ /*
++ * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
++ * prevents a concurrent requeue from moving us to the
++ * uaddr2 rtmutex. After that we can safely acquire
++ * (and possibly block on) hb->lock.
++ */
++ current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
++ raw_spin_unlock_irq(&current->pi_lock);
++
++ spin_lock(&hb->lock);
++
++ /*
++ * Clean up pi_blocked_on. We might leak it otherwise
++ * when we succeeded with the hb->lock in the fast
++ * path.
++ */
++ raw_spin_lock_irq(&current->pi_lock);
++ current->pi_blocked_on = NULL;
++ raw_spin_unlock_irq(&current->pi_lock);
++
++ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
++ spin_unlock(&hb->lock);
++ if (ret)
++ goto out;
++ }
+
+ /*
+- * In order for us to be here, we know our q.key == key2, and since
+- * we took the hb->lock above, we also know that futex_requeue() has
+- * completed and we no longer have to concern ourselves with a wakeup
+- * race with the atomic proxy lock acquisition by the requeue code. The
+- * futex_requeue dropped our key1 reference and incremented our key2
+- * reference count.
++ * In order to be here, we have either been requeued, are in
++ * the process of being requeued, or requeue successfully
++ * acquired uaddr2 on our behalf. If pi_blocked_on was
++ * non-null above, we may be racing with a requeue. Do not
++ * rely on q->lock_ptr to be hb2->lock until after blocking on
++ * hb->lock or hb2->lock. The futex_requeue dropped our key1
++ * reference and incremented our key2 reference count.
+ */
++ hb2 = hash_futex(&key2);
+
+ /* Check if the requeue code acquired the second futex for us. */
+ if (!q.rt_waiter) {
+@@ -3245,14 +3290,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ * did a lock-steal - fix up the PI-state in that case.
+ */
+ if (q.pi_state && (q.pi_state->owner != current)) {
+- spin_lock(q.lock_ptr);
++ spin_lock(&hb2->lock);
++ BUG_ON(&hb2->lock != q.lock_ptr);
+ ret = fixup_pi_state_owner(uaddr2, &q, current);
+ /*
+ * Drop the reference to the pi state which
+ * the requeue_pi() code acquired for us.
+ */
+ put_pi_state(q.pi_state);
+- spin_unlock(q.lock_ptr);
++ spin_unlock(&hb2->lock);
++
+ /*
+ * Adjust the return value. It's either -EFAULT or
+ * success (1) but the caller expects 0 for success.
+@@ -3271,7 +3318,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ pi_mutex = &q.pi_state->pi_mutex;
+ ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
+
+- spin_lock(q.lock_ptr);
++ spin_lock(&hb2->lock);
++ BUG_ON(&hb2->lock != q.lock_ptr);
+ if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
+ ret = 0;
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index c9a37b6d6ea0..7fff3b88b96b 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -136,6 +136,11 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
+ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+ }
+
++static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
++{
++ return waiter && waiter != PI_WAKEUP_INPROGRESS;
++}
++
+ /*
+ * We can speed up the acquire/release, if there's no debugging state to be
+ * set up.
+@@ -360,7 +365,8 @@ int max_lock_depth = 1024;
+
+ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
+ {
+- return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
++ return rt_mutex_real_waiter(p->pi_blocked_on) ?
++ p->pi_blocked_on->lock : NULL;
+ }
+
+ /*
+@@ -496,7 +502,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * reached or the state of the chain has changed while we
+ * dropped the locks.
+ */
+- if (!waiter)
++ if (!rt_mutex_real_waiter(waiter))
+ goto out_unlock_pi;
+
+ /*
+@@ -929,6 +935,22 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ return -EDEADLK;
+
+ raw_spin_lock(&task->pi_lock);
++ /*
++ * In the case of futex requeue PI, this will be a proxy
++ * lock. The task will wake unaware that it is enqueueed on
++ * this lock. Avoid blocking on two locks and corrupting
++ * pi_blocked_on via the PI_WAKEUP_INPROGRESS
++ * flag. futex_wait_requeue_pi() sets this when it wakes up
++ * before requeue (due to a signal or timeout). Do not enqueue
++ * the task if PI_WAKEUP_INPROGRESS is set.
++ */
++ if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
++ raw_spin_unlock(&task->pi_lock);
++ return -EAGAIN;
++ }
++
++ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
++
+ waiter->task = task;
+ waiter->lock = lock;
+ waiter->prio = task->prio;
+@@ -952,7 +974,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ rt_mutex_enqueue_pi(owner, waiter);
+
+ rt_mutex_adjust_prio(owner);
+- if (owner->pi_blocked_on)
++ if (rt_mutex_real_waiter(owner->pi_blocked_on))
+ chain_walk = 1;
+ } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
+ chain_walk = 1;
+@@ -1048,7 +1070,7 @@ static void remove_waiter(struct rt_mutex *lock,
+ {
+ bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
+ struct task_struct *owner = rt_mutex_owner(lock);
+- struct rt_mutex *next_lock;
++ struct rt_mutex *next_lock = NULL;
+
+ lockdep_assert_held(&lock->wait_lock);
+
+@@ -1074,7 +1096,8 @@ static void remove_waiter(struct rt_mutex *lock,
+ rt_mutex_adjust_prio(owner);
+
+ /* Store the lock on which owner is blocked or NULL */
+- next_lock = task_blocked_on_lock(owner);
++ if (rt_mutex_real_waiter(owner->pi_blocked_on))
++ next_lock = task_blocked_on_lock(owner);
+
+ raw_spin_unlock(&owner->pi_lock);
+
+@@ -1110,7 +1133,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ waiter = task->pi_blocked_on;
+- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++ if (!rt_mutex_real_waiter(waiter) ||
++ rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return;
+ }
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index b1455dc2366f..096b16cfb096 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -125,6 +125,8 @@ enum rtmutex_chainwalk {
+ /*
+ * PI-futex support (proxy locking functions, etc.):
+ */
++#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
++
+ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+ struct task_struct *proxy_owner);
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Make-lock_killable-work.patch b/features/rt/locking-rtmutex-Make-lock_killable-work.patch
new file mode 100644
index 00000000..f68466c3
--- /dev/null
+++ b/features/rt/locking-rtmutex-Make-lock_killable-work.patch
@@ -0,0 +1,49 @@
+From fee27667d6f81e0cd5a7e9ca9fe1422c613775d0 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 1 Apr 2017 12:50:59 +0200
+Subject: [PATCH 067/191] locking/rtmutex: Make lock_killable work
+
+Locking an rt mutex killable does not work because signal handling is
+restricted to TASK_INTERRUPTIBLE.
+
+Use signal_pending_state() unconditionally.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 19 +++++++------------
+ 1 file changed, 7 insertions(+), 12 deletions(-)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 4d2a57e8dcb9..b346dbc37d6d 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1179,18 +1179,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ if (try_to_take_rt_mutex(lock, current, waiter))
+ break;
+
+- /*
+- * TASK_INTERRUPTIBLE checks for signals and
+- * timeout. Ignored otherwise.
+- */
+- if (likely(state == TASK_INTERRUPTIBLE)) {
+- /* Signal pending? */
+- if (signal_pending(current))
+- ret = -EINTR;
+- if (timeout && !timeout->task)
+- ret = -ETIMEDOUT;
+- if (ret)
+- break;
++ if (timeout && !timeout->task) {
++ ret = -ETIMEDOUT;
++ break;
++ }
++ if (signal_pending_state(state, current)) {
++ ret = -EINTR;
++ break;
+ }
+
+ raw_spin_unlock_irq(&lock->wait_lock);
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch b/features/rt/locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch
new file mode 100644
index 00000000..61fe6760
--- /dev/null
+++ b/features/rt/locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch
@@ -0,0 +1,59 @@
+From d3f167aba99034929675a15535bcbd7deccf97bc Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 29 Sep 2020 16:32:49 +0200
+Subject: [PATCH 063/191] locking/rtmutex: Move rt_mutex_init() outside of
+ CONFIG_DEBUG_RT_MUTEXES
+
+rt_mutex_init() only initializes lockdep if CONFIG_DEBUG_RT_MUTEXES is
+enabled. The static initializer (DEFINE_RT_MUTEX) does not have such a
+restriction.
+
+Move rt_mutex_init() outside of CONFIG_DEBUG_RT_MUTEXES.
+Move the remaining functions in this CONFIG_DEBUG_RT_MUTEXES block to
+the upper block.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/rtmutex.h | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index 88a0ba806066..2dc10b582d4a 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -43,6 +43,7 @@ struct hrtimer_sleeper;
+ extern int rt_mutex_debug_check_no_locks_freed(const void *from,
+ unsigned long len);
+ extern void rt_mutex_debug_check_no_locks_held(struct task_struct *task);
++ extern void rt_mutex_debug_task_free(struct task_struct *tsk);
+ #else
+ static inline int rt_mutex_debug_check_no_locks_freed(const void *from,
+ unsigned long len)
+@@ -50,22 +51,15 @@ struct hrtimer_sleeper;
+ return 0;
+ }
+ # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
++# define rt_mutex_debug_task_free(t) do { } while (0)
+ #endif
+
+-#ifdef CONFIG_DEBUG_RT_MUTEXES
+-
+-# define rt_mutex_init(mutex) \
++#define rt_mutex_init(mutex) \
+ do { \
+ static struct lock_class_key __key; \
+ __rt_mutex_init(mutex, __func__, &__key); \
+ } while (0)
+
+- extern void rt_mutex_debug_task_free(struct task_struct *tsk);
+-#else
+-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL)
+-# define rt_mutex_debug_task_free(t) do { } while (0)
+-#endif
+-
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
+ , .dep_map = { .name = #mutexname }
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch b/features/rt/locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch
new file mode 100644
index 00000000..3d8c655e
--- /dev/null
+++ b/features/rt/locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch
@@ -0,0 +1,144 @@
+From e5c1aab430c174bc70abbda6936ea83bd7eb4db6 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 12 Oct 2017 16:14:22 +0200
+Subject: [PATCH 072/191] locking/rtmutex: Provide rt_mutex_slowlock_locked()
+
+This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 67 +++++++++++++++++++--------------
+ kernel/locking/rtmutex_common.h | 7 ++++
+ 2 files changed, 45 insertions(+), 29 deletions(-)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index b346dbc37d6d..670c4a577322 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1216,35 +1216,16 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
+ }
+ }
+
+-/*
+- * Slow path lock function:
+- */
+-static int __sched
+-rt_mutex_slowlock(struct rt_mutex *lock, int state,
+- struct hrtimer_sleeper *timeout,
+- enum rtmutex_chainwalk chwalk)
++int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
++ struct hrtimer_sleeper *timeout,
++ enum rtmutex_chainwalk chwalk,
++ struct rt_mutex_waiter *waiter)
+ {
+- struct rt_mutex_waiter waiter;
+- unsigned long flags;
+- int ret = 0;
+-
+- rt_mutex_init_waiter(&waiter);
+-
+- /*
+- * Technically we could use raw_spin_[un]lock_irq() here, but this can
+- * be called in early boot if the cmpxchg() fast path is disabled
+- * (debug, no architecture support). In this case we will acquire the
+- * rtmutex with lock->wait_lock held. But we cannot unconditionally
+- * enable interrupts in that early boot case. So we need to use the
+- * irqsave/restore variants.
+- */
+- raw_spin_lock_irqsave(&lock->wait_lock, flags);
++ int ret;
+
+ /* Try to acquire the lock again: */
+- if (try_to_take_rt_mutex(lock, current, NULL)) {
+- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
++ if (try_to_take_rt_mutex(lock, current, NULL))
+ return 0;
+- }
+
+ set_current_state(state);
+
+@@ -1252,16 +1233,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ if (unlikely(timeout))
+ hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+
+- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
++ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
+
+ if (likely(!ret))
+ /* sleep on the mutex */
+- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
++ ret = __rt_mutex_slowlock(lock, state, timeout, waiter);
+
+ if (unlikely(ret)) {
+ __set_current_state(TASK_RUNNING);
+- remove_waiter(lock, &waiter);
+- rt_mutex_handle_deadlock(ret, chwalk, &waiter);
++ remove_waiter(lock, waiter);
++ rt_mutex_handle_deadlock(ret, chwalk, waiter);
+ }
+
+ /*
+@@ -1269,6 +1250,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ * unconditionally. We might have to fix that up.
+ */
+ fixup_rt_mutex_waiters(lock);
++ return ret;
++}
++
++/*
++ * Slow path lock function:
++ */
++static int __sched
++rt_mutex_slowlock(struct rt_mutex *lock, int state,
++ struct hrtimer_sleeper *timeout,
++ enum rtmutex_chainwalk chwalk)
++{
++ struct rt_mutex_waiter waiter;
++ unsigned long flags;
++ int ret = 0;
++
++ rt_mutex_init_waiter(&waiter);
++
++ /*
++ * Technically we could use raw_spin_[un]lock_irq() here, but this can
++ * be called in early boot if the cmpxchg() fast path is disabled
++ * (debug, no architecture support). In this case we will acquire the
++ * rtmutex with lock->wait_lock held. But we cannot unconditionally
++ * enable interrupts in that early boot case. So we need to use the
++ * irqsave/restore variants.
++ */
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
++
++ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter);
+
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index 37cd6b3bf6f4..b5a2affa59d5 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -15,6 +15,7 @@
+
+ #include <linux/rtmutex.h>
+ #include <linux/sched/wake_q.h>
++#include <linux/sched/debug.h>
+
+ /*
+ * This is the control structure for tasks blocked on a rt_mutex,
+@@ -153,6 +154,12 @@ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ struct wake_q_head *wqh);
+
+ extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
++/* RW semaphore special interface */
++
++int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
++ struct hrtimer_sleeper *timeout,
++ enum rtmutex_chainwalk chwalk,
++ struct rt_mutex_waiter *waiter);
+
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Remove-cruft.patch b/features/rt/locking-rtmutex-Remove-cruft.patch
new file mode 100644
index 00000000..e2db5d4c
--- /dev/null
+++ b/features/rt/locking-rtmutex-Remove-cruft.patch
@@ -0,0 +1,98 @@
+From e859f18657493031c5e83651fea4298b1e5dd97b Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 29 Sep 2020 15:21:17 +0200
+Subject: [PATCH 061/191] locking/rtmutex: Remove cruft
+
+Most of this is around since the very beginning. I'm not sure if this
+was used while the rtmutex-deadlock-tester was around but today it seems
+to only waste memory:
+- save_state: No users
+- name: Assigned and printed if a dead lock was detected. I'm keeping it
+ but want to point out that lockdep has the same information.
+- file + line: Printed if ::name was NULL. This is only used for
+ in-kernel locks so it ::name shouldn't be NULL and then ::file and
+ ::line isn't used.
+- magic: Assigned to NULL by rt_mutex_destroy().
+
+Remove members of rt_mutex which are not used.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/rtmutex.h | 7 ++-----
+ kernel/locking/rtmutex-debug.c | 7 +------
+ kernel/locking/rtmutex.c | 3 ---
+ kernel/locking/rtmutex_common.h | 1 -
+ 4 files changed, 3 insertions(+), 15 deletions(-)
+
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index 6fd615a0eea9..16f974a22f51 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -32,10 +32,7 @@ struct rt_mutex {
+ struct rb_root_cached waiters;
+ struct task_struct *owner;
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+- int save_state;
+- const char *name, *file;
+- int line;
+- void *magic;
++ const char *name;
+ #endif
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+@@ -60,7 +57,7 @@ struct hrtimer_sleeper;
+
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
+- , .name = #mutexname, .file = __FILE__, .line = __LINE__
++ , .name = #mutexname
+
+ # define rt_mutex_init(mutex) \
+ do { \
+diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
+index 36e69100e8e0..7e411b946d4c 100644
+--- a/kernel/locking/rtmutex-debug.c
++++ b/kernel/locking/rtmutex-debug.c
+@@ -42,12 +42,7 @@ static void printk_task(struct task_struct *p)
+
+ static void printk_lock(struct rt_mutex *lock, int print_owner)
+ {
+- if (lock->name)
+- printk(" [%p] {%s}\n",
+- lock, lock->name);
+- else
+- printk(" [%p] {%s:%d}\n",
+- lock, lock->file, lock->line);
++ printk(" [%p] {%s}\n", lock, lock->name);
+
+ if (print_owner && rt_mutex_owner(lock)) {
+ printk(".. ->owner: %p\n", lock->owner);
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 48fff6437901..170e160fc0b5 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1640,9 +1640,6 @@ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
+ void rt_mutex_destroy(struct rt_mutex *lock)
+ {
+ WARN_ON(rt_mutex_is_locked(lock));
+-#ifdef CONFIG_DEBUG_RT_MUTEXES
+- lock->magic = NULL;
+-#endif
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
+
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index ca6fb489007b..e6913103d7ff 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -30,7 +30,6 @@ struct rt_mutex_waiter {
+ struct task_struct *task;
+ struct rt_mutex *lock;
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+- unsigned long ip;
+ struct pid *deadlock_task_pid;
+ struct rt_mutex *deadlock_lock;
+ #endif
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Remove-output-from-deadlock-detector.patch b/features/rt/locking-rtmutex-Remove-output-from-deadlock-detector.patch
new file mode 100644
index 00000000..fbcd6d07
--- /dev/null
+++ b/features/rt/locking-rtmutex-Remove-output-from-deadlock-detector.patch
@@ -0,0 +1,311 @@
+From 485d93ce4bd0dfae3a0324cc8b0257ae3de02d01 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 29 Sep 2020 16:05:11 +0200
+Subject: [PATCH 062/191] locking/rtmutex: Remove output from deadlock
+ detector.
+
+In commit
+ f5694788ad8da ("rt_mutex: Add lockdep annotations")
+
+rtmutex gained lockdep annotation for rt_mutex_lock() and and related
+functions.
+lockdep will see the locking order and may complain about a deadlock
+before rtmutex' own mechanism gets a chance to detect it.
+The rtmutex deadlock detector will only complain locks with the
+RT_MUTEX_MIN_CHAINWALK and a waiter must be pending. That means it
+works only for in-kernel locks because the futex interface always uses
+RT_MUTEX_FULL_CHAINWALK.
+The requirement for an active waiter limits the detector to actual
+deadlocks and makes it possible to report potential deadlocks like
+lockdep does.
+It looks like lockdep is better suited for reporting deadlocks.
+
+Remove rtmutex' debug print on deadlock detection.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/rtmutex.h | 7 ---
+ kernel/locking/rtmutex-debug.c | 97 ---------------------------------
+ kernel/locking/rtmutex-debug.h | 11 ----
+ kernel/locking/rtmutex.c | 9 ---
+ kernel/locking/rtmutex.h | 7 ---
+ kernel/locking/rtmutex_common.h | 4 --
+ 6 files changed, 135 deletions(-)
+
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index 16f974a22f51..88a0ba806066 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -31,9 +31,6 @@ struct rt_mutex {
+ raw_spinlock_t wait_lock;
+ struct rb_root_cached waiters;
+ struct task_struct *owner;
+-#ifdef CONFIG_DEBUG_RT_MUTEXES
+- const char *name;
+-#endif
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+ #endif
+@@ -56,8 +53,6 @@ struct hrtimer_sleeper;
+ #endif
+
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+-# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
+- , .name = #mutexname
+
+ # define rt_mutex_init(mutex) \
+ do { \
+@@ -67,7 +62,6 @@ do { \
+
+ extern void rt_mutex_debug_task_free(struct task_struct *tsk);
+ #else
+-# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
+ # define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL)
+ # define rt_mutex_debug_task_free(t) do { } while (0)
+ #endif
+@@ -83,7 +77,6 @@ do { \
+ { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
+ , .waiters = RB_ROOT_CACHED \
+ , .owner = NULL \
+- __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
+ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
+
+ #define DEFINE_RT_MUTEX(mutexname) \
+diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
+index 7e411b946d4c..fb150100335f 100644
+--- a/kernel/locking/rtmutex-debug.c
++++ b/kernel/locking/rtmutex-debug.c
+@@ -32,105 +32,12 @@
+
+ #include "rtmutex_common.h"
+
+-static void printk_task(struct task_struct *p)
+-{
+- if (p)
+- printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
+- else
+- printk("<none>");
+-}
+-
+-static void printk_lock(struct rt_mutex *lock, int print_owner)
+-{
+- printk(" [%p] {%s}\n", lock, lock->name);
+-
+- if (print_owner && rt_mutex_owner(lock)) {
+- printk(".. ->owner: %p\n", lock->owner);
+- printk(".. held by: ");
+- printk_task(rt_mutex_owner(lock));
+- printk("\n");
+- }
+-}
+-
+ void rt_mutex_debug_task_free(struct task_struct *task)
+ {
+ DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root));
+ DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
+ }
+
+-/*
+- * We fill out the fields in the waiter to store the information about
+- * the deadlock. We print when we return. act_waiter can be NULL in
+- * case of a remove waiter operation.
+- */
+-void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
+- struct rt_mutex_waiter *act_waiter,
+- struct rt_mutex *lock)
+-{
+- struct task_struct *task;
+-
+- if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter)
+- return;
+-
+- task = rt_mutex_owner(act_waiter->lock);
+- if (task && task != current) {
+- act_waiter->deadlock_task_pid = get_pid(task_pid(task));
+- act_waiter->deadlock_lock = lock;
+- }
+-}
+-
+-void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
+-{
+- struct task_struct *task;
+-
+- if (!waiter->deadlock_lock || !debug_locks)
+- return;
+-
+- rcu_read_lock();
+- task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID);
+- if (!task) {
+- rcu_read_unlock();
+- return;
+- }
+-
+- if (!debug_locks_off()) {
+- rcu_read_unlock();
+- return;
+- }
+-
+- pr_warn("\n");
+- pr_warn("============================================\n");
+- pr_warn("WARNING: circular locking deadlock detected!\n");
+- pr_warn("%s\n", print_tainted());
+- pr_warn("--------------------------------------------\n");
+- printk("%s/%d is deadlocking current task %s/%d\n\n",
+- task->comm, task_pid_nr(task),
+- current->comm, task_pid_nr(current));
+-
+- printk("\n1) %s/%d is trying to acquire this lock:\n",
+- current->comm, task_pid_nr(current));
+- printk_lock(waiter->lock, 1);
+-
+- printk("\n2) %s/%d is blocked on this lock:\n",
+- task->comm, task_pid_nr(task));
+- printk_lock(waiter->deadlock_lock, 1);
+-
+- debug_show_held_locks(current);
+- debug_show_held_locks(task);
+-
+- printk("\n%s/%d's [blocked] stackdump:\n\n",
+- task->comm, task_pid_nr(task));
+- show_stack(task, NULL, KERN_DEFAULT);
+- printk("\n%s/%d's [current] stackdump:\n\n",
+- current->comm, task_pid_nr(current));
+- dump_stack();
+- debug_show_all_locks();
+- rcu_read_unlock();
+-
+- printk("[ turning off deadlock detection."
+- "Please report this trace. ]\n\n");
+-}
+-
+ void debug_rt_mutex_lock(struct rt_mutex *lock)
+ {
+ }
+@@ -153,12 +60,10 @@ void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
+ void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
+ {
+ memset(waiter, 0x11, sizeof(*waiter));
+- waiter->deadlock_task_pid = NULL;
+ }
+
+ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
+ {
+- put_pid(waiter->deadlock_task_pid);
+ memset(waiter, 0x22, sizeof(*waiter));
+ }
+
+@@ -168,10 +73,8 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_cl
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+- lock->name = name;
+
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+ #endif
+ }
+-
+diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
+index fc549713bba3..659e93e256c6 100644
+--- a/kernel/locking/rtmutex-debug.h
++++ b/kernel/locking/rtmutex-debug.h
+@@ -18,20 +18,9 @@ extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
+ extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
+ struct task_struct *powner);
+ extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
+-extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
+- struct rt_mutex_waiter *waiter,
+- struct rt_mutex *lock);
+-extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
+-# define debug_rt_mutex_reset_waiter(w) \
+- do { (w)->deadlock_lock = NULL; } while (0)
+
+ static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
+ enum rtmutex_chainwalk walk)
+ {
+ return (waiter != NULL);
+ }
+-
+-static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
+-{
+- debug_rt_mutex_print_deadlock(w);
+-}
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 170e160fc0b5..5e04d7a247d9 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -579,7 +579,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * walk, we detected a deadlock.
+ */
+ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
+- debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
+ raw_spin_unlock(&lock->wait_lock);
+ ret = -EDEADLK;
+ goto out_unlock_pi;
+@@ -1171,8 +1170,6 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+- debug_rt_mutex_print_deadlock(waiter);
+-
+ schedule();
+
+ raw_spin_lock_irq(&lock->wait_lock);
+@@ -1193,10 +1190,6 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
+ if (res != -EDEADLOCK || detect_deadlock)
+ return;
+
+- /*
+- * Yell lowdly and stop the task right here.
+- */
+- rt_mutex_print_deadlock(w);
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+@@ -1750,8 +1743,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ ret = 0;
+ }
+
+- debug_rt_mutex_print_deadlock(waiter);
+-
+ return ret;
+ }
+
+diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
+index 732f96abf462..338ccd29119a 100644
+--- a/kernel/locking/rtmutex.h
++++ b/kernel/locking/rtmutex.h
+@@ -19,15 +19,8 @@
+ #define debug_rt_mutex_proxy_unlock(l) do { } while (0)
+ #define debug_rt_mutex_unlock(l) do { } while (0)
+ #define debug_rt_mutex_init(m, n, k) do { } while (0)
+-#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0)
+-#define debug_rt_mutex_print_deadlock(w) do { } while (0)
+ #define debug_rt_mutex_reset_waiter(w) do { } while (0)
+
+-static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
+-{
+- WARN(1, "rtmutex deadlock detected\n");
+-}
+-
+ static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w,
+ enum rtmutex_chainwalk walk)
+ {
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index e6913103d7ff..b1455dc2366f 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -29,10 +29,6 @@ struct rt_mutex_waiter {
+ struct rb_node pi_tree_entry;
+ struct task_struct *task;
+ struct rt_mutex *lock;
+-#ifdef CONFIG_DEBUG_RT_MUTEXES
+- struct pid *deadlock_task_pid;
+- struct rt_mutex *deadlock_lock;
+-#endif
+ int prio;
+ u64 deadline;
+ };
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Remove-rt_mutex_timed_lock.patch b/features/rt/locking-rtmutex-Remove-rt_mutex_timed_lock.patch
new file mode 100644
index 00000000..33f0f4da
--- /dev/null
+++ b/features/rt/locking-rtmutex-Remove-rt_mutex_timed_lock.patch
@@ -0,0 +1,97 @@
+From 0f2d138c83afbe1bb580d0d1ab2e50418bdb7e6e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 7 Oct 2020 12:11:33 +0200
+Subject: [PATCH 064/191] locking/rtmutex: Remove rt_mutex_timed_lock()
+
+rt_mutex_timed_lock() has no callers since commit
+ c051b21f71d1f ("rtmutex: Confine deadlock logic to futex")
+
+Remove rt_mutex_timed_lock().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/rtmutex.h | 3 ---
+ kernel/locking/rtmutex.c | 46 ----------------------------------------
+ 2 files changed, 49 deletions(-)
+
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index 2dc10b582d4a..243fabc2c85f 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -99,9 +99,6 @@ extern void rt_mutex_lock(struct rt_mutex *lock);
+ #endif
+
+ extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
+-extern int rt_mutex_timed_lock(struct rt_mutex *lock,
+- struct hrtimer_sleeper *timeout);
+-
+ extern int rt_mutex_trylock(struct rt_mutex *lock);
+
+ extern void rt_mutex_unlock(struct rt_mutex *lock);
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 5e04d7a247d9..c9a37b6d6ea0 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1387,21 +1387,6 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
+ }
+
+-static inline int
+-rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
+- struct hrtimer_sleeper *timeout,
+- enum rtmutex_chainwalk chwalk,
+- int (*slowfn)(struct rt_mutex *lock, int state,
+- struct hrtimer_sleeper *timeout,
+- enum rtmutex_chainwalk chwalk))
+-{
+- if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
+- likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
+- return 0;
+-
+- return slowfn(lock, state, timeout, chwalk);
+-}
+-
+ static inline int
+ rt_mutex_fasttrylock(struct rt_mutex *lock,
+ int (*slowfn)(struct rt_mutex *lock))
+@@ -1509,37 +1494,6 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+ return __rt_mutex_slowtrylock(lock);
+ }
+
+-/**
+- * rt_mutex_timed_lock - lock a rt_mutex interruptible
+- * the timeout structure is provided
+- * by the caller
+- *
+- * @lock: the rt_mutex to be locked
+- * @timeout: timeout structure or NULL (no timeout)
+- *
+- * Returns:
+- * 0 on success
+- * -EINTR when interrupted by a signal
+- * -ETIMEDOUT when the timeout expired
+- */
+-int
+-rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
+-{
+- int ret;
+-
+- might_sleep();
+-
+- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+- ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
+- RT_MUTEX_MIN_CHAINWALK,
+- rt_mutex_slowlock);
+- if (ret)
+- mutex_release(&lock->dep_map, _RET_IP_);
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
+-
+ /**
+ * rt_mutex_trylock - try to lock a rt_mutex
+ *
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-Use-custom-scheduling-function-for-s.patch b/features/rt/locking-rtmutex-Use-custom-scheduling-function-for-s.patch
new file mode 100644
index 00000000..3ba23b2f
--- /dev/null
+++ b/features/rt/locking-rtmutex-Use-custom-scheduling-function-for-s.patch
@@ -0,0 +1,242 @@
+From 7e9de9093f6ab9c69a00c038c12e21054214f301 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 6 Oct 2020 13:07:17 +0200
+Subject: [PATCH 082/191] locking/rtmutex: Use custom scheduling function for
+ spin-schedule()
+
+PREEMPT_RT builds the rwsem, mutex, spinlock and rwlock typed locks on
+top of a rtmutex lock. While blocked task->pi_blocked_on is set
+(tsk_is_pi_blocked()) and task needs to schedule away while waiting.
+
+The schedule process must distinguish between blocking on a regular
+sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock
+and rwlock):
+- rwsem and mutex must flush block requests (blk_schedule_flush_plug())
+ even if blocked on a lock. This can not deadlock because this also
+ happens for non-RT.
+ There should be a warning if the scheduling point is within a RCU read
+ section.
+
+- spinlock and rwlock must not flush block requests. This will deadlock
+ if the callback attempts to acquire a lock which is already acquired.
+ Similarly to being preempted, there should be no warning if the
+ scheduling point is within a RCU read section.
+
+Add preempt_schedule_lock() which is invoked if scheduling is required
+while blocking on a PREEMPT_RT-only sleeping lock.
+Remove tsk_is_pi_blocked() from the scheduler path which is no longer
+needed with the additional scheduler entry point.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm64/include/asm/preempt.h | 3 +++
+ arch/x86/include/asm/preempt.h | 3 +++
+ include/asm-generic/preempt.h | 3 +++
+ include/linux/sched/rt.h | 8 --------
+ kernel/locking/rtmutex.c | 2 +-
+ kernel/locking/rwlock-rt.c | 2 +-
+ kernel/sched/core.c | 32 +++++++++++++++++++++-----------
+ 7 files changed, 32 insertions(+), 21 deletions(-)
+
+diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
+index 80e946b2abee..f06a23898540 100644
+--- a/arch/arm64/include/asm/preempt.h
++++ b/arch/arm64/include/asm/preempt.h
+@@ -81,6 +81,9 @@ static inline bool should_resched(int preempt_offset)
+
+ #ifdef CONFIG_PREEMPTION
+ void preempt_schedule(void);
++#ifdef CONFIG_PREEMPT_RT
++void preempt_schedule_lock(void);
++#endif
+ #define __preempt_schedule() preempt_schedule()
+ void preempt_schedule_notrace(void);
+ #define __preempt_schedule_notrace() preempt_schedule_notrace()
+diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
+index f8cb8af4de5c..7e0358f99d22 100644
+--- a/arch/x86/include/asm/preempt.h
++++ b/arch/x86/include/asm/preempt.h
+@@ -104,6 +104,9 @@ static __always_inline bool should_resched(int preempt_offset)
+ }
+
+ #ifdef CONFIG_PREEMPTION
++#ifdef CONFIG_PREEMPT_RT
++ extern void preempt_schedule_lock(void);
++#endif
+
+ extern asmlinkage void preempt_schedule(void);
+ extern asmlinkage void preempt_schedule_thunk(void);
+diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
+index d683f5e6d791..71c1535db56a 100644
+--- a/include/asm-generic/preempt.h
++++ b/include/asm-generic/preempt.h
+@@ -79,6 +79,9 @@ static __always_inline bool should_resched(int preempt_offset)
+ }
+
+ #ifdef CONFIG_PREEMPTION
++#ifdef CONFIG_PREEMPT_RT
++extern void preempt_schedule_lock(void);
++#endif
+ extern asmlinkage void preempt_schedule(void);
+ #define __preempt_schedule() preempt_schedule()
+ extern asmlinkage void preempt_schedule_notrace(void);
+diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
+index e5af028c08b4..994c25640e15 100644
+--- a/include/linux/sched/rt.h
++++ b/include/linux/sched/rt.h
+@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
+ }
+ extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+-{
+- return tsk->pi_blocked_on != NULL;
+-}
+ #else
+ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+ {
+ return NULL;
+ }
+ # define rt_mutex_adjust_pi(p) do { } while (0)
+-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+-{
+- return false;
+-}
+ #endif
+
+ extern void normalize_rt_tasks(void);
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 3fc947dd2e60..4cd9d6c4cd68 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1049,7 +1049,7 @@ void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ if (top_waiter != waiter || adaptive_wait(lock, lock_owner))
+- schedule();
++ preempt_schedule_lock();
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+
+diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c
+index 64c4f5728b11..948d10214ee2 100644
+--- a/kernel/locking/rwlock-rt.c
++++ b/kernel/locking/rwlock-rt.c
+@@ -211,7 +211,7 @@ static void __write_rt_lock(struct rt_rw_lock *lock)
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+
+ if (atomic_read(&lock->readers) != 0)
+- schedule();
++ preempt_schedule_lock();
+
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 46e7db92b343..59fb8b18799f 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -5004,7 +5004,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ *
+ * WARNING: must be called with preemption disabled!
+ */
+-static void __sched notrace __schedule(bool preempt)
++static void __sched notrace __schedule(bool preempt, bool spinning_lock)
+ {
+ struct task_struct *prev, *next;
+ unsigned long *switch_count;
+@@ -5057,7 +5057,7 @@ static void __sched notrace __schedule(bool preempt)
+ * - ptrace_{,un}freeze_traced() can change ->state underneath us.
+ */
+ prev_state = prev->state;
+- if (!preempt && prev_state) {
++ if ((!preempt || spinning_lock) && prev_state) {
+ if (signal_pending_state(prev_state, prev)) {
+ prev->state = TASK_RUNNING;
+ } else {
+@@ -5141,7 +5141,7 @@ void __noreturn do_task_dead(void)
+ /* Tell freezer to ignore us: */
+ current->flags |= PF_NOFREEZE;
+
+- __schedule(false);
++ __schedule(false, false);
+ BUG();
+
+ /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
+@@ -5174,9 +5174,6 @@ static inline void sched_submit_work(struct task_struct *tsk)
+ preempt_enable_no_resched();
+ }
+
+- if (tsk_is_pi_blocked(tsk))
+- return;
+-
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+ * make sure to submit it to avoid deadlocks.
+@@ -5202,7 +5199,7 @@ asmlinkage __visible void __sched schedule(void)
+ sched_submit_work(tsk);
+ do {
+ preempt_disable();
+- __schedule(false);
++ __schedule(false, false);
+ sched_preempt_enable_no_resched();
+ } while (need_resched());
+ sched_update_worker(tsk);
+@@ -5230,7 +5227,7 @@ void __sched schedule_idle(void)
+ */
+ WARN_ON_ONCE(current->state);
+ do {
+- __schedule(false);
++ __schedule(false, false);
+ } while (need_resched());
+ }
+
+@@ -5283,7 +5280,7 @@ static void __sched notrace preempt_schedule_common(void)
+ */
+ preempt_disable_notrace();
+ preempt_latency_start(1);
+- __schedule(true);
++ __schedule(true, false);
+ preempt_latency_stop(1);
+ preempt_enable_no_resched_notrace();
+
+@@ -5313,6 +5310,19 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
+ NOKPROBE_SYMBOL(preempt_schedule);
+ EXPORT_SYMBOL(preempt_schedule);
+
++#ifdef CONFIG_PREEMPT_RT
++void __sched notrace preempt_schedule_lock(void)
++{
++ do {
++ preempt_disable();
++ __schedule(true, true);
++ sched_preempt_enable_no_resched();
++ } while (need_resched());
++}
++NOKPROBE_SYMBOL(preempt_schedule_lock);
++EXPORT_SYMBOL(preempt_schedule_lock);
++#endif
++
+ #ifdef CONFIG_PREEMPT_DYNAMIC
+ DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
+ EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
+@@ -5362,7 +5372,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+ * an infinite recursion.
+ */
+ prev_ctx = exception_enter();
+- __schedule(true);
++ __schedule(true, false);
+ exception_exit(prev_ctx);
+
+ preempt_latency_stop(1);
+@@ -5580,7 +5590,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
+ do {
+ preempt_disable();
+ local_irq_enable();
+- __schedule(true);
++ __schedule(true, false);
+ local_irq_disable();
+ sched_preempt_enable_no_resched();
+ } while (need_resched());
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-add-mutex-implementation-based-on-rt.patch b/features/rt/locking-rtmutex-add-mutex-implementation-based-on-rt.patch
new file mode 100644
index 00000000..287478c2
--- /dev/null
+++ b/features/rt/locking-rtmutex-add-mutex-implementation-based-on-rt.patch
@@ -0,0 +1,384 @@
+From d1a9c2854e127015b149b26f5d1bf143c7f2c8ce Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 12 Oct 2017 17:17:03 +0200
+Subject: [PATCH 077/191] locking/rtmutex: add mutex implementation based on
+ rtmutex
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/mutex_rt.h | 130 ++++++++++++++++++++++
+ kernel/locking/mutex-rt.c | 224 ++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 354 insertions(+)
+ create mode 100644 include/linux/mutex_rt.h
+ create mode 100644 kernel/locking/mutex-rt.c
+
+diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
+new file mode 100644
+index 000000000000..f0b2e07cd5c5
+--- /dev/null
++++ b/include/linux/mutex_rt.h
+@@ -0,0 +1,130 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#ifndef __LINUX_MUTEX_RT_H
++#define __LINUX_MUTEX_RT_H
++
++#ifndef __LINUX_MUTEX_H
++#error "Please include mutex.h"
++#endif
++
++#include <linux/rtmutex.h>
++
++/* FIXME: Just for __lockfunc */
++#include <linux/spinlock.h>
++
++struct mutex {
++ struct rt_mutex lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++};
++
++#define __MUTEX_INITIALIZER(mutexname) \
++ { \
++ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
++ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
++ }
++
++#define DEFINE_MUTEX(mutexname) \
++ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
++
++extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
++extern void __lockfunc _mutex_lock(struct mutex *lock);
++extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
++extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
++extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
++extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
++extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_trylock(struct mutex *lock);
++extern void __lockfunc _mutex_unlock(struct mutex *lock);
++
++#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
++#define mutex_lock(l) _mutex_lock(l)
++#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
++#define mutex_lock_killable(l) _mutex_lock_killable(l)
++#define mutex_trylock(l) _mutex_trylock(l)
++#define mutex_unlock(l) _mutex_unlock(l)
++#define mutex_lock_io(l) _mutex_lock_io_nested(l, 0);
++
++#define __mutex_owner(l) ((l)->lock.owner)
++
++#ifdef CONFIG_DEBUG_MUTEXES
++#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
++#else
++static inline void mutex_destroy(struct mutex *lock) {}
++#endif
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
++# define mutex_lock_interruptible_nested(l, s) \
++ _mutex_lock_interruptible_nested(l, s)
++# define mutex_lock_killable_nested(l, s) \
++ _mutex_lock_killable_nested(l, s)
++# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s)
++
++# define mutex_lock_nest_lock(lock, nest_lock) \
++do { \
++ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
++ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
++} while (0)
++
++#else
++# define mutex_lock_nested(l, s) _mutex_lock(l)
++# define mutex_lock_interruptible_nested(l, s) \
++ _mutex_lock_interruptible(l)
++# define mutex_lock_killable_nested(l, s) \
++ _mutex_lock_killable(l)
++# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
++# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s)
++#endif
++
++# define mutex_init(mutex) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ rt_mutex_init(&(mutex)->lock); \
++ __mutex_do_init((mutex), #mutex, &__key); \
++} while (0)
++
++# define __mutex_init(mutex, name, key) \
++do { \
++ rt_mutex_init(&(mutex)->lock); \
++ __mutex_do_init((mutex), name, key); \
++} while (0)
++
++/**
++ * These values are chosen such that FAIL and SUCCESS match the
++ * values of the regular mutex_trylock().
++ */
++enum mutex_trylock_recursive_enum {
++ MUTEX_TRYLOCK_FAILED = 0,
++ MUTEX_TRYLOCK_SUCCESS = 1,
++ MUTEX_TRYLOCK_RECURSIVE,
++};
++/**
++ * mutex_trylock_recursive - trylock variant that allows recursive locking
++ * @lock: mutex to be locked
++ *
++ * This function should not be used, _ever_. It is purely for hysterical GEM
++ * raisins, and once those are gone this will be removed.
++ *
++ * Returns:
++ * MUTEX_TRYLOCK_FAILED - trylock failed,
++ * MUTEX_TRYLOCK_SUCCESS - lock acquired,
++ * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
++ */
++int __rt_mutex_owner_current(struct rt_mutex *lock);
++
++static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum
++mutex_trylock_recursive(struct mutex *lock)
++{
++ if (unlikely(__rt_mutex_owner_current(&lock->lock)))
++ return MUTEX_TRYLOCK_RECURSIVE;
++
++ return mutex_trylock(lock);
++}
++
++extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
++
++#endif
+diff --git a/kernel/locking/mutex-rt.c b/kernel/locking/mutex-rt.c
+new file mode 100644
+index 000000000000..2b849e6b9b4a
+--- /dev/null
++++ b/kernel/locking/mutex-rt.c
+@@ -0,0 +1,224 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Real-Time Preemption Support
++ *
++ * started by Ingo Molnar:
++ *
++ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
++ *
++ * historic credit for proving that Linux spinlocks can be implemented via
++ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
++ * and others) who prototyped it on 2.4 and did lots of comparative
++ * research and analysis; TimeSys, for proving that you can implement a
++ * fully preemptible kernel via the use of IRQ threading and mutexes;
++ * Bill Huey for persuasively arguing on lkml that the mutex model is the
++ * right one; and to MontaVista, who ported pmutexes to 2.6.
++ *
++ * This code is a from-scratch implementation and is not based on pmutexes,
++ * but the idea of converting spinlocks to mutexes is used here too.
++ *
++ * lock debugging, locking tree, deadlock detection:
++ *
++ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
++ * Released under the General Public License (GPL).
++ *
++ * Includes portions of the generic R/W semaphore implementation from:
++ *
++ * Copyright (c) 2001 David Howells (dhowells@redhat.com).
++ * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
++ * - Derived also from comments by Linus
++ *
++ * Pending ownership of locks and ownership stealing:
++ *
++ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
++ *
++ * (also by Steven Rostedt)
++ * - Converted single pi_lock to individual task locks.
++ *
++ * By Esben Nielsen:
++ * Doing priority inheritance with help of the scheduler.
++ *
++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
++ * - major rework based on Esben Nielsens initial patch
++ * - replaced thread_info references by task_struct refs
++ * - removed task->pending_owner dependency
++ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
++ * in the scheduler return path as discussed with Steven Rostedt
++ *
++ * Copyright (C) 2006, Kihon Technologies Inc.
++ * Steven Rostedt <rostedt@goodmis.org>
++ * - debugged and patched Thomas Gleixner's rework.
++ * - added back the cmpxchg to the rework.
++ * - turned atomic require back on for SMP.
++ */
++
++#include <linux/spinlock.h>
++#include <linux/rtmutex.h>
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/syscalls.h>
++#include <linux/interrupt.h>
++#include <linux/plist.h>
++#include <linux/fs.h>
++#include <linux/futex.h>
++#include <linux/hrtimer.h>
++#include <linux/blkdev.h>
++
++#include "rtmutex_common.h"
++
++/*
++ * struct mutex functions
++ */
++void __mutex_do_init(struct mutex *mutex, const char *name,
++ struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held lock:
++ */
++ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
++ lockdep_init_map(&mutex->dep_map, name, key, 0);
++#endif
++ mutex->lock.save_state = 0;
++}
++EXPORT_SYMBOL(__mutex_do_init);
++
++static int _mutex_lock_blk_flush(struct mutex *lock, int state)
++{
++ /*
++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
++ * late if one of the callbacks needs to acquire a sleeping lock.
++ */
++ if (blk_needs_flush_plug(current))
++ blk_schedule_flush_plug(current);
++ return __rt_mutex_lock_state(&lock->lock, state);
++}
++
++void __lockfunc _mutex_lock(struct mutex *lock)
++{
++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(_mutex_lock);
++
++void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass)
++{
++ int token;
++
++ token = io_schedule_prepare();
++
++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
++ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
++
++ io_schedule_finish(token);
++}
++EXPORT_SYMBOL_GPL(_mutex_lock_io_nested);
++
++int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
++{
++ int ret;
++
++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
++ if (ret)
++ mutex_release(&lock->dep_map, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_interruptible);
++
++int __lockfunc _mutex_lock_killable(struct mutex *lock)
++{
++ int ret;
++
++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
++ if (ret)
++ mutex_release(&lock->dep_map, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_killable);
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
++{
++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(_mutex_lock_nested);
++
++void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
++{
++ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(_mutex_lock_nest_lock);
++
++int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
++{
++ int ret;
++
++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
++ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
++ if (ret)
++ mutex_release(&lock->dep_map, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
++
++int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
++{
++ int ret;
++
++ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
++ if (ret)
++ mutex_release(&lock->dep_map, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_killable_nested);
++#endif
++
++int __lockfunc _mutex_trylock(struct mutex *lock)
++{
++ int ret = __rt_mutex_trylock(&lock->lock);
++
++ if (ret)
++ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_trylock);
++
++void __lockfunc _mutex_unlock(struct mutex *lock)
++{
++ mutex_release(&lock->dep_map, _RET_IP_);
++ __rt_mutex_unlock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_unlock);
++
++/**
++ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
++ * @cnt: the atomic which we are to dec
++ * @lock: the mutex to return holding if we dec to 0
++ *
++ * return true and hold lock if we dec to 0, return false otherwise
++ */
++int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
++{
++ /* dec if we can't possibly hit 0 */
++ if (atomic_add_unless(cnt, -1, 1))
++ return 0;
++ /* we might hit 0, so take the lock */
++ mutex_lock(lock);
++ if (!atomic_dec_and_test(cnt)) {
++ /* when we actually did the dec, we didn't hit 0 */
++ mutex_unlock(lock);
++ return 0;
++ }
++ /* we hit 0, and we hold the lock */
++ return 1;
++}
++EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-add-rwlock-implementation-based-on-r.patch b/features/rt/locking-rtmutex-add-rwlock-implementation-based-on-r.patch
new file mode 100644
index 00000000..c45a4242
--- /dev/null
+++ b/features/rt/locking-rtmutex-add-rwlock-implementation-based-on-r.patch
@@ -0,0 +1,557 @@
+From 157ad15dc4fba83f4959571c08ebc2654dbdacd6 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 12 Oct 2017 17:18:06 +0200
+Subject: [PATCH 079/191] locking/rtmutex: add rwlock implementation based on
+ rtmutex
+
+The implementation is bias-based, similar to the rwsem implementation.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/rwlock_rt.h | 113 +++++++++++
+ include/linux/rwlock_types_rt.h | 56 ++++++
+ kernel/Kconfig.locks | 2 +-
+ kernel/locking/rwlock-rt.c | 334 ++++++++++++++++++++++++++++++++
+ 4 files changed, 504 insertions(+), 1 deletion(-)
+ create mode 100644 include/linux/rwlock_rt.h
+ create mode 100644 include/linux/rwlock_types_rt.h
+ create mode 100644 kernel/locking/rwlock-rt.c
+
+diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
+new file mode 100644
+index 000000000000..552b8d69cdb3
+--- /dev/null
++++ b/include/linux/rwlock_rt.h
+@@ -0,0 +1,113 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#ifndef __LINUX_RWLOCK_RT_H
++#define __LINUX_RWLOCK_RT_H
++
++#ifndef __LINUX_SPINLOCK_H
++#error Do not include directly. Use spinlock.h
++#endif
++
++extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
++extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
++extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
++extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
++extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
++extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
++extern int __lockfunc rt_read_can_lock(rwlock_t *rwlock);
++extern int __lockfunc rt_write_can_lock(rwlock_t *rwlock);
++extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
++extern int __lockfunc rt_rwlock_is_contended(rwlock_t *rwlock);
++
++#define read_can_lock(rwlock) rt_read_can_lock(rwlock)
++#define write_can_lock(rwlock) rt_write_can_lock(rwlock)
++
++#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
++#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
++
++static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags)
++{
++ *flags = 0;
++ return rt_write_trylock(lock);
++}
++
++#define write_trylock_irqsave(lock, flags) \
++ __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags)))
++
++#define read_lock_irqsave(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ rt_read_lock(lock); \
++ flags = 0; \
++ } while (0)
++
++#define write_lock_irqsave(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ rt_write_lock(lock); \
++ flags = 0; \
++ } while (0)
++
++#define read_lock(lock) rt_read_lock(lock)
++
++#define read_lock_bh(lock) \
++ do { \
++ local_bh_disable(); \
++ rt_read_lock(lock); \
++ } while (0)
++
++#define read_lock_irq(lock) read_lock(lock)
++
++#define write_lock(lock) rt_write_lock(lock)
++
++#define write_lock_bh(lock) \
++ do { \
++ local_bh_disable(); \
++ rt_write_lock(lock); \
++ } while (0)
++
++#define write_lock_irq(lock) write_lock(lock)
++
++#define read_unlock(lock) rt_read_unlock(lock)
++
++#define read_unlock_bh(lock) \
++ do { \
++ rt_read_unlock(lock); \
++ local_bh_enable(); \
++ } while (0)
++
++#define read_unlock_irq(lock) read_unlock(lock)
++
++#define write_unlock(lock) rt_write_unlock(lock)
++
++#define write_unlock_bh(lock) \
++ do { \
++ rt_write_unlock(lock); \
++ local_bh_enable(); \
++ } while (0)
++
++#define write_unlock_irq(lock) write_unlock(lock)
++
++#define read_unlock_irqrestore(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ (void) flags; \
++ rt_read_unlock(lock); \
++ } while (0)
++
++#define write_unlock_irqrestore(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ (void) flags; \
++ rt_write_unlock(lock); \
++ } while (0)
++
++#define rwlock_init(rwl) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ __rt_rwlock_init(rwl, #rwl, &__key); \
++} while (0)
++
++#define rwlock_is_contended(lock) \
++ rt_rwlock_is_contended(lock)
++
++#endif
+diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h
+new file mode 100644
+index 000000000000..4762391d659b
+--- /dev/null
++++ b/include/linux/rwlock_types_rt.h
+@@ -0,0 +1,56 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#ifndef __LINUX_RWLOCK_TYPES_RT_H
++#define __LINUX_RWLOCK_TYPES_RT_H
++
++#ifndef __LINUX_SPINLOCK_TYPES_H
++#error "Do not include directly. Include spinlock_types.h instead"
++#endif
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
++#else
++# define RW_DEP_MAP_INIT(lockname)
++#endif
++
++typedef struct rt_rw_lock rwlock_t;
++
++#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name)
++
++#define DEFINE_RWLOCK(name) \
++ rwlock_t name = __RW_LOCK_UNLOCKED(name)
++
++/*
++ * A reader biased implementation primarily for CPU pinning.
++ *
++ * Can be selected as general replacement for the single reader RT rwlock
++ * variant
++ */
++struct rt_rw_lock {
++ struct rt_mutex rtmutex;
++ atomic_t readers;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++};
++
++#define READER_BIAS (1U << 31)
++#define WRITER_BIAS (1U << 30)
++
++#define __RWLOCK_RT_INITIALIZER(name) \
++{ \
++ .readers = ATOMIC_INIT(READER_BIAS), \
++ .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \
++ RW_DEP_MAP_INIT(name) \
++}
++
++void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name,
++ struct lock_class_key *key);
++
++#define rwlock_biased_rt_init(rwlock) \
++ do { \
++ static struct lock_class_key __key; \
++ \
++ __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \
++ } while (0)
++
++#endif
+diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
+index 3de8fd11873b..4198f0273ecd 100644
+--- a/kernel/Kconfig.locks
++++ b/kernel/Kconfig.locks
+@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS
+
+ config QUEUED_RWLOCKS
+ def_bool y if ARCH_USE_QUEUED_RWLOCKS
+- depends on SMP
++ depends on SMP && !PREEMPT_RT
+
+ config ARCH_HAS_MMIOWB
+ bool
+diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c
+new file mode 100644
+index 000000000000..64c4f5728b11
+--- /dev/null
++++ b/kernel/locking/rwlock-rt.c
+@@ -0,0 +1,334 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include <linux/sched/debug.h>
++#include <linux/export.h>
++
++#include "rtmutex_common.h"
++#include <linux/rwlock_types_rt.h>
++
++/*
++ * RT-specific reader/writer locks
++ *
++ * write_lock()
++ * 1) Lock lock->rtmutex
++ * 2) Remove the reader BIAS to force readers into the slow path
++ * 3) Wait until all readers have left the critical region
++ * 4) Mark it write locked
++ *
++ * write_unlock()
++ * 1) Remove the write locked marker
++ * 2) Set the reader BIAS so readers can use the fast path again
++ * 3) Unlock lock->rtmutex to release blocked readers
++ *
++ * read_lock()
++ * 1) Try fast path acquisition (reader BIAS is set)
++ * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag
++ * 3) If !writelocked, acquire it for read
++ * 4) If writelocked, block on lock->rtmutex
++ * 5) unlock lock->rtmutex, goto 1)
++ *
++ * read_unlock()
++ * 1) Try fast path release (reader count != 1)
++ * 2) Wake the writer waiting in write_lock()#3
++ *
++ * read_lock()#3 has the consequence, that rw locks on RT are not writer
++ * fair, but writers, which should be avoided in RT tasks (think tasklist
++ * lock), are subject to the rtmutex priority/DL inheritance mechanism.
++ *
++ * It's possible to make the rw locks writer fair by keeping a list of
++ * active readers. A blocked writer would force all newly incoming readers
++ * to block on the rtmutex, but the rtmutex would have to be proxy locked
++ * for one reader after the other. We can't use multi-reader inheritance
++ * because there is no way to support that with
++ * SCHED_DEADLINE. Implementing the one by one reader boosting/handover
++ * mechanism is a major surgery for a very dubious value.
++ *
++ * The risk of writer starvation is there, but the pathological use cases
++ * which trigger it are not necessarily the typical RT workloads.
++ */
++
++void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name,
++ struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held semaphore:
++ */
++ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
++ lockdep_init_map(&lock->dep_map, name, key, 0);
++#endif
++ atomic_set(&lock->readers, READER_BIAS);
++ rt_mutex_init(&lock->rtmutex);
++ lock->rtmutex.save_state = 1;
++}
++
++static int __read_rt_trylock(struct rt_rw_lock *lock)
++{
++ int r, old;
++
++ /*
++ * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is
++ * set.
++ */
++ for (r = atomic_read(&lock->readers); r < 0;) {
++ old = atomic_cmpxchg(&lock->readers, r, r + 1);
++ if (likely(old == r))
++ return 1;
++ r = old;
++ }
++ return 0;
++}
++
++static void __read_rt_lock(struct rt_rw_lock *lock)
++{
++ struct rt_mutex *m = &lock->rtmutex;
++ struct rt_mutex_waiter waiter;
++ unsigned long flags;
++
++ if (__read_rt_trylock(lock))
++ return;
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++ /*
++ * Allow readers as long as the writer has not completely
++ * acquired the semaphore for write.
++ */
++ if (atomic_read(&lock->readers) != WRITER_BIAS) {
++ atomic_inc(&lock->readers);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ return;
++ }
++
++ /*
++ * Call into the slow lock path with the rtmutex->wait_lock
++ * held, so this can't result in the following race:
++ *
++ * Reader1 Reader2 Writer
++ * read_lock()
++ * write_lock()
++ * rtmutex_lock(m)
++ * swait()
++ * read_lock()
++ * unlock(m->wait_lock)
++ * read_unlock()
++ * swake()
++ * lock(m->wait_lock)
++ * lock->writelocked=true
++ * unlock(m->wait_lock)
++ *
++ * write_unlock()
++ * lock->writelocked=false
++ * rtmutex_unlock(m)
++ * read_lock()
++ * write_lock()
++ * rtmutex_lock(m)
++ * swait()
++ * rtmutex_lock(m)
++ *
++ * That would put Reader1 behind the writer waiting on
++ * Reader2 to call read_unlock() which might be unbound.
++ */
++ rt_mutex_init_waiter(&waiter, true);
++ rt_spin_lock_slowlock_locked(m, &waiter, flags);
++ /*
++ * The slowlock() above is guaranteed to return with the rtmutex is
++ * now held, so there can't be a writer active. Increment the reader
++ * count and immediately drop the rtmutex again.
++ */
++ atomic_inc(&lock->readers);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ rt_spin_lock_slowunlock(m);
++
++ debug_rt_mutex_free_waiter(&waiter);
++}
++
++static void __read_rt_unlock(struct rt_rw_lock *lock)
++{
++ struct rt_mutex *m = &lock->rtmutex;
++ struct task_struct *tsk;
++
++ /*
++ * sem->readers can only hit 0 when a writer is waiting for the
++ * active readers to leave the critical region.
++ */
++ if (!atomic_dec_and_test(&lock->readers))
++ return;
++
++ raw_spin_lock_irq(&m->wait_lock);
++ /*
++ * Wake the writer, i.e. the rtmutex owner. It might release the
++ * rtmutex concurrently in the fast path, but to clean up the rw
++ * lock it needs to acquire m->wait_lock. The worst case which can
++ * happen is a spurious wakeup.
++ */
++ tsk = rt_mutex_owner(m);
++ if (tsk)
++ wake_up_process(tsk);
++
++ raw_spin_unlock_irq(&m->wait_lock);
++}
++
++static void __write_unlock_common(struct rt_rw_lock *lock, int bias,
++ unsigned long flags)
++{
++ struct rt_mutex *m = &lock->rtmutex;
++
++ atomic_add(READER_BIAS - bias, &lock->readers);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ rt_spin_lock_slowunlock(m);
++}
++
++static void __write_rt_lock(struct rt_rw_lock *lock)
++{
++ struct rt_mutex *m = &lock->rtmutex;
++ struct task_struct *self = current;
++ unsigned long flags;
++
++ /* Take the rtmutex as a first step */
++ __rt_spin_lock(m);
++
++ /* Force readers into slow path */
++ atomic_sub(READER_BIAS, &lock->readers);
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++
++ raw_spin_lock(&self->pi_lock);
++ self->saved_state = self->state;
++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++ raw_spin_unlock(&self->pi_lock);
++
++ for (;;) {
++ /* Have all readers left the critical region? */
++ if (!atomic_read(&lock->readers)) {
++ atomic_set(&lock->readers, WRITER_BIAS);
++ raw_spin_lock(&self->pi_lock);
++ __set_current_state_no_track(self->saved_state);
++ self->saved_state = TASK_RUNNING;
++ raw_spin_unlock(&self->pi_lock);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ return;
++ }
++
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++
++ if (atomic_read(&lock->readers) != 0)
++ schedule();
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++
++ raw_spin_lock(&self->pi_lock);
++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++ raw_spin_unlock(&self->pi_lock);
++ }
++}
++
++static int __write_rt_trylock(struct rt_rw_lock *lock)
++{
++ struct rt_mutex *m = &lock->rtmutex;
++ unsigned long flags;
++
++ if (!__rt_mutex_trylock(m))
++ return 0;
++
++ atomic_sub(READER_BIAS, &lock->readers);
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++ if (!atomic_read(&lock->readers)) {
++ atomic_set(&lock->readers, WRITER_BIAS);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ return 1;
++ }
++ __write_unlock_common(lock, 0, flags);
++ return 0;
++}
++
++static void __write_rt_unlock(struct rt_rw_lock *lock)
++{
++ struct rt_mutex *m = &lock->rtmutex;
++ unsigned long flags;
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++ __write_unlock_common(lock, WRITER_BIAS, flags);
++}
++
++int __lockfunc rt_read_can_lock(rwlock_t *rwlock)
++{
++ return atomic_read(&rwlock->readers) < 0;
++}
++
++int __lockfunc rt_write_can_lock(rwlock_t *rwlock)
++{
++ return atomic_read(&rwlock->readers) == READER_BIAS;
++}
++
++/*
++ * The common functions which get wrapped into the rwlock API.
++ */
++int __lockfunc rt_read_trylock(rwlock_t *rwlock)
++{
++ int ret;
++
++ ret = __read_rt_trylock(rwlock);
++ if (ret) {
++ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
++ migrate_disable();
++ }
++ return ret;
++}
++EXPORT_SYMBOL(rt_read_trylock);
++
++int __lockfunc rt_write_trylock(rwlock_t *rwlock)
++{
++ int ret;
++
++ ret = __write_rt_trylock(rwlock);
++ if (ret) {
++ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
++ migrate_disable();
++ }
++ return ret;
++}
++EXPORT_SYMBOL(rt_write_trylock);
++
++void __lockfunc rt_read_lock(rwlock_t *rwlock)
++{
++ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
++ __read_rt_lock(rwlock);
++ migrate_disable();
++}
++EXPORT_SYMBOL(rt_read_lock);
++
++void __lockfunc rt_write_lock(rwlock_t *rwlock)
++{
++ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
++ __write_rt_lock(rwlock);
++ migrate_disable();
++}
++EXPORT_SYMBOL(rt_write_lock);
++
++void __lockfunc rt_read_unlock(rwlock_t *rwlock)
++{
++ rwlock_release(&rwlock->dep_map, _RET_IP_);
++ migrate_enable();
++ __read_rt_unlock(rwlock);
++}
++EXPORT_SYMBOL(rt_read_unlock);
++
++void __lockfunc rt_write_unlock(rwlock_t *rwlock)
++{
++ rwlock_release(&rwlock->dep_map, _RET_IP_);
++ migrate_enable();
++ __write_rt_unlock(rwlock);
++}
++EXPORT_SYMBOL(rt_write_unlock);
++
++void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
++{
++ __rwlock_biased_rt_init(rwlock, name, key);
++}
++EXPORT_SYMBOL(__rt_rwlock_init);
++
++int __lockfunc rt_rwlock_is_contended(rwlock_t *rwlock)
++{
++ return rt_mutex_has_waiters(&rwlock->rtmutex);
++}
++EXPORT_SYMBOL(rt_rwlock_is_contended);
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-add-rwsem-implementation-based-on-rt.patch b/features/rt/locking-rtmutex-add-rwsem-implementation-based-on-rt.patch
new file mode 100644
index 00000000..335f9ac9
--- /dev/null
+++ b/features/rt/locking-rtmutex-add-rwsem-implementation-based-on-rt.patch
@@ -0,0 +1,454 @@
+From 94f118173e2b3760d995876193660096e0284c87 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 12 Oct 2017 17:28:34 +0200
+Subject: [PATCH 078/191] locking/rtmutex: add rwsem implementation based on
+ rtmutex
+
+The RT specific R/W semaphore implementation restricts the number of readers
+to one because a writer cannot block on multiple readers and inherit its
+priority or budget.
+
+The single reader restricting is painful in various ways:
+
+ - Performance bottleneck for multi-threaded applications in the page fault
+ path (mmap sem)
+
+ - Progress blocker for drivers which are carefully crafted to avoid the
+ potential reader/writer deadlock in mainline.
+
+The analysis of the writer code paths shows, that properly written RT tasks
+should not take them. Syscalls like mmap(), file access which take mmap sem
+write locked have unbound latencies which are completely unrelated to mmap
+sem. Other R/W sem users like graphics drivers are not suitable for RT tasks
+either.
+
+So there is little risk to hurt RT tasks when the RT rwsem implementation is
+changed in the following way:
+
+ - Allow concurrent readers
+
+ - Make writers block until the last reader left the critical section. This
+ blocking is not subject to priority/budget inheritance.
+
+ - Readers blocked on a writer inherit their priority/budget in the normal
+ way.
+
+There is a drawback with this scheme. R/W semaphores become writer unfair
+though the applications which have triggered writer starvation (mostly on
+mmap_sem) in the past are not really the typical workloads running on a RT
+system. So while it's unlikely to hit writer starvation, it's possible. If
+there are unexpected workloads on RT systems triggering it, we need to rethink
+the approach.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/rwsem-rt.h | 70 +++++++++
+ kernel/locking/rwsem-rt.c | 318 ++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 388 insertions(+)
+ create mode 100644 include/linux/rwsem-rt.h
+ create mode 100644 kernel/locking/rwsem-rt.c
+
+diff --git a/include/linux/rwsem-rt.h b/include/linux/rwsem-rt.h
+new file mode 100644
+index 000000000000..0ba8aae9a198
+--- /dev/null
++++ b/include/linux/rwsem-rt.h
+@@ -0,0 +1,70 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#ifndef _LINUX_RWSEM_RT_H
++#define _LINUX_RWSEM_RT_H
++
++#ifndef _LINUX_RWSEM_H
++#error "Include rwsem.h"
++#endif
++
++#include <linux/rtmutex.h>
++#include <linux/swait.h>
++
++#define READER_BIAS (1U << 31)
++#define WRITER_BIAS (1U << 30)
++
++struct rw_semaphore {
++ atomic_t readers;
++ struct rt_mutex rtmutex;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++};
++
++#define __RWSEM_INITIALIZER(name) \
++{ \
++ .readers = ATOMIC_INIT(READER_BIAS), \
++ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \
++ RW_DEP_MAP_INIT(name) \
++}
++
++#define DECLARE_RWSEM(lockname) \
++ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
++
++extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
++ struct lock_class_key *key);
++
++#define __init_rwsem(sem, name, key) \
++do { \
++ rt_mutex_init(&(sem)->rtmutex); \
++ __rwsem_init((sem), (name), (key)); \
++} while (0)
++
++#define init_rwsem(sem) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ __init_rwsem((sem), #sem, &__key); \
++} while (0)
++
++static inline int rwsem_is_locked(struct rw_semaphore *sem)
++{
++ return atomic_read(&sem->readers) != READER_BIAS;
++}
++
++static inline int rwsem_is_contended(struct rw_semaphore *sem)
++{
++ return atomic_read(&sem->readers) > 0;
++}
++
++extern void __down_read(struct rw_semaphore *sem);
++extern int __down_read_interruptible(struct rw_semaphore *sem);
++extern int __down_read_killable(struct rw_semaphore *sem);
++extern int __down_read_trylock(struct rw_semaphore *sem);
++extern void __down_write(struct rw_semaphore *sem);
++extern int __must_check __down_write_killable(struct rw_semaphore *sem);
++extern int __down_write_trylock(struct rw_semaphore *sem);
++extern void __up_read(struct rw_semaphore *sem);
++extern void __up_write(struct rw_semaphore *sem);
++extern void __downgrade_write(struct rw_semaphore *sem);
++
++#endif
+diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c
+new file mode 100644
+index 000000000000..a0771c150041
+--- /dev/null
++++ b/kernel/locking/rwsem-rt.c
+@@ -0,0 +1,318 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include <linux/rwsem.h>
++#include <linux/sched/debug.h>
++#include <linux/sched/signal.h>
++#include <linux/export.h>
++#include <linux/blkdev.h>
++
++#include "rtmutex_common.h"
++
++/*
++ * RT-specific reader/writer semaphores
++ *
++ * down_write()
++ * 1) Lock sem->rtmutex
++ * 2) Remove the reader BIAS to force readers into the slow path
++ * 3) Wait until all readers have left the critical region
++ * 4) Mark it write locked
++ *
++ * up_write()
++ * 1) Remove the write locked marker
++ * 2) Set the reader BIAS so readers can use the fast path again
++ * 3) Unlock sem->rtmutex to release blocked readers
++ *
++ * down_read()
++ * 1) Try fast path acquisition (reader BIAS is set)
++ * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag
++ * 3) If !writelocked, acquire it for read
++ * 4) If writelocked, block on sem->rtmutex
++ * 5) unlock sem->rtmutex, goto 1)
++ *
++ * up_read()
++ * 1) Try fast path release (reader count != 1)
++ * 2) Wake the writer waiting in down_write()#3
++ *
++ * down_read()#3 has the consequence, that rw semaphores on RT are not writer
++ * fair, but writers, which should be avoided in RT tasks (think mmap_sem),
++ * are subject to the rtmutex priority/DL inheritance mechanism.
++ *
++ * It's possible to make the rw semaphores writer fair by keeping a list of
++ * active readers. A blocked writer would force all newly incoming readers to
++ * block on the rtmutex, but the rtmutex would have to be proxy locked for one
++ * reader after the other. We can't use multi-reader inheritance because there
++ * is no way to support that with SCHED_DEADLINE. Implementing the one by one
++ * reader boosting/handover mechanism is a major surgery for a very dubious
++ * value.
++ *
++ * The risk of writer starvation is there, but the pathological use cases
++ * which trigger it are not necessarily the typical RT workloads.
++ */
++
++void __rwsem_init(struct rw_semaphore *sem, const char *name,
++ struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held semaphore:
++ */
++ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
++ lockdep_init_map(&sem->dep_map, name, key, 0);
++#endif
++ atomic_set(&sem->readers, READER_BIAS);
++}
++EXPORT_SYMBOL(__rwsem_init);
++
++int __down_read_trylock(struct rw_semaphore *sem)
++{
++ int r, old;
++
++ /*
++ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is
++ * set.
++ */
++ for (r = atomic_read(&sem->readers); r < 0;) {
++ old = atomic_cmpxchg(&sem->readers, r, r + 1);
++ if (likely(old == r))
++ return 1;
++ r = old;
++ }
++ return 0;
++}
++
++static int __sched __down_read_common(struct rw_semaphore *sem, int state)
++{
++ struct rt_mutex *m = &sem->rtmutex;
++ struct rt_mutex_waiter waiter;
++ int ret;
++
++ if (__down_read_trylock(sem))
++ return 0;
++
++ /*
++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
++ * late if one of the callbacks needs to acquire a sleeping lock.
++ */
++ if (blk_needs_flush_plug(current))
++ blk_schedule_flush_plug(current);
++
++ might_sleep();
++ raw_spin_lock_irq(&m->wait_lock);
++ /*
++ * Allow readers as long as the writer has not completely
++ * acquired the semaphore for write.
++ */
++ if (atomic_read(&sem->readers) != WRITER_BIAS) {
++ atomic_inc(&sem->readers);
++ raw_spin_unlock_irq(&m->wait_lock);
++ return 0;
++ }
++
++ /*
++ * Call into the slow lock path with the rtmutex->wait_lock
++ * held, so this can't result in the following race:
++ *
++ * Reader1 Reader2 Writer
++ * down_read()
++ * down_write()
++ * rtmutex_lock(m)
++ * swait()
++ * down_read()
++ * unlock(m->wait_lock)
++ * up_read()
++ * swake()
++ * lock(m->wait_lock)
++ * sem->writelocked=true
++ * unlock(m->wait_lock)
++ *
++ * up_write()
++ * sem->writelocked=false
++ * rtmutex_unlock(m)
++ * down_read()
++ * down_write()
++ * rtmutex_lock(m)
++ * swait()
++ * rtmutex_lock(m)
++ *
++ * That would put Reader1 behind the writer waiting on
++ * Reader2 to call up_read() which might be unbound.
++ */
++ rt_mutex_init_waiter(&waiter, false);
++ ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK,
++ &waiter);
++ /*
++ * The slowlock() above is guaranteed to return with the rtmutex (for
++ * ret = 0) is now held, so there can't be a writer active. Increment
++ * the reader count and immediately drop the rtmutex again.
++ * For ret != 0 we don't hold the rtmutex and need unlock the wait_lock.
++ * We don't own the lock then.
++ */
++ if (!ret)
++ atomic_inc(&sem->readers);
++ raw_spin_unlock_irq(&m->wait_lock);
++ if (!ret)
++ __rt_mutex_unlock(m);
++
++ debug_rt_mutex_free_waiter(&waiter);
++ return ret;
++}
++
++void __down_read(struct rw_semaphore *sem)
++{
++ int ret;
++
++ ret = __down_read_common(sem, TASK_UNINTERRUPTIBLE);
++ WARN_ON_ONCE(ret);
++}
++
++int __down_read_interruptible(struct rw_semaphore *sem)
++{
++ int ret;
++
++ ret = __down_read_common(sem, TASK_INTERRUPTIBLE);
++ if (likely(!ret))
++ return ret;
++ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret);
++ return -EINTR;
++}
++
++int __down_read_killable(struct rw_semaphore *sem)
++{
++ int ret;
++
++ ret = __down_read_common(sem, TASK_KILLABLE);
++ if (likely(!ret))
++ return ret;
++ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret);
++ return -EINTR;
++}
++
++void __up_read(struct rw_semaphore *sem)
++{
++ struct rt_mutex *m = &sem->rtmutex;
++ struct task_struct *tsk;
++
++ /*
++ * sem->readers can only hit 0 when a writer is waiting for the
++ * active readers to leave the critical region.
++ */
++ if (!atomic_dec_and_test(&sem->readers))
++ return;
++
++ might_sleep();
++ raw_spin_lock_irq(&m->wait_lock);
++ /*
++ * Wake the writer, i.e. the rtmutex owner. It might release the
++ * rtmutex concurrently in the fast path (due to a signal), but to
++ * clean up the rwsem it needs to acquire m->wait_lock. The worst
++ * case which can happen is a spurious wakeup.
++ */
++ tsk = rt_mutex_owner(m);
++ if (tsk)
++ wake_up_process(tsk);
++
++ raw_spin_unlock_irq(&m->wait_lock);
++}
++
++static void __up_write_unlock(struct rw_semaphore *sem, int bias,
++ unsigned long flags)
++{
++ struct rt_mutex *m = &sem->rtmutex;
++
++ atomic_add(READER_BIAS - bias, &sem->readers);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ __rt_mutex_unlock(m);
++}
++
++static int __sched __down_write_common(struct rw_semaphore *sem, int state)
++{
++ struct rt_mutex *m = &sem->rtmutex;
++ unsigned long flags;
++
++ /*
++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
++ * late if one of the callbacks needs to acquire a sleeping lock.
++ */
++ if (blk_needs_flush_plug(current))
++ blk_schedule_flush_plug(current);
++
++ /* Take the rtmutex as a first step */
++ if (__rt_mutex_lock_state(m, state))
++ return -EINTR;
++
++ /* Force readers into slow path */
++ atomic_sub(READER_BIAS, &sem->readers);
++ might_sleep();
++
++ set_current_state(state);
++ for (;;) {
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++ /* Have all readers left the critical region? */
++ if (!atomic_read(&sem->readers)) {
++ atomic_set(&sem->readers, WRITER_BIAS);
++ __set_current_state(TASK_RUNNING);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ return 0;
++ }
++
++ if (signal_pending_state(state, current)) {
++ __set_current_state(TASK_RUNNING);
++ __up_write_unlock(sem, 0, flags);
++ return -EINTR;
++ }
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++
++ if (atomic_read(&sem->readers) != 0) {
++ schedule();
++ set_current_state(state);
++ }
++ }
++}
++
++void __sched __down_write(struct rw_semaphore *sem)
++{
++ __down_write_common(sem, TASK_UNINTERRUPTIBLE);
++}
++
++int __sched __down_write_killable(struct rw_semaphore *sem)
++{
++ return __down_write_common(sem, TASK_KILLABLE);
++}
++
++int __down_write_trylock(struct rw_semaphore *sem)
++{
++ struct rt_mutex *m = &sem->rtmutex;
++ unsigned long flags;
++
++ if (!__rt_mutex_trylock(m))
++ return 0;
++
++ atomic_sub(READER_BIAS, &sem->readers);
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++ if (!atomic_read(&sem->readers)) {
++ atomic_set(&sem->readers, WRITER_BIAS);
++ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
++ return 1;
++ }
++ __up_write_unlock(sem, 0, flags);
++ return 0;
++}
++
++void __up_write(struct rw_semaphore *sem)
++{
++ struct rt_mutex *m = &sem->rtmutex;
++ unsigned long flags;
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++ __up_write_unlock(sem, WRITER_BIAS, flags);
++}
++
++void __downgrade_write(struct rw_semaphore *sem)
++{
++ struct rt_mutex *m = &sem->rtmutex;
++ unsigned long flags;
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++ /* Release it and account current as reader */
++ __up_write_unlock(sem, WRITER_BIAS - 1, flags);
++}
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-add-sleeping-lock-implementation.patch b/features/rt/locking-rtmutex-add-sleeping-lock-implementation.patch
new file mode 100644
index 00000000..91588d81
--- /dev/null
+++ b/features/rt/locking-rtmutex-add-sleeping-lock-implementation.patch
@@ -0,0 +1,1232 @@
+From 479dce6155bca792fbc3d2907d5d576e0d38c554 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 12 Oct 2017 17:11:19 +0200
+Subject: [PATCH 075/191] locking/rtmutex: add sleeping lock implementation
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/kernel.h | 5 +
+ include/linux/preempt.h | 4 +
+ include/linux/rtmutex.h | 19 +-
+ include/linux/sched.h | 7 +
+ include/linux/sched/wake_q.h | 13 +-
+ include/linux/spinlock_rt.h | 155 ++++++++++
+ include/linux/spinlock_types_rt.h | 38 +++
+ kernel/fork.c | 1 +
+ kernel/futex.c | 10 +-
+ kernel/locking/rtmutex.c | 451 +++++++++++++++++++++++++++---
+ kernel/locking/rtmutex_common.h | 14 +-
+ kernel/sched/core.c | 39 ++-
+ 12 files changed, 698 insertions(+), 58 deletions(-)
+ create mode 100644 include/linux/spinlock_rt.h
+ create mode 100644 include/linux/spinlock_types_rt.h
+
+diff --git a/include/linux/kernel.h b/include/linux/kernel.h
+index 5b7ed6dc99ac..0f9315891c17 100644
+--- a/include/linux/kernel.h
++++ b/include/linux/kernel.h
+@@ -122,6 +122,10 @@ extern void __cant_migrate(const char *file, int line);
+ */
+ # define might_sleep() \
+ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
++
++# define might_sleep_no_state_check() \
++ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
++
+ /**
+ * cant_sleep - annotation for functions that cannot sleep
+ *
+@@ -165,6 +169,7 @@ extern void __cant_migrate(const char *file, int line);
+ static inline void __might_sleep(const char *file, int line,
+ int preempt_offset) { }
+ # define might_sleep() do { might_resched(); } while (0)
++# define might_sleep_no_state_check() do { might_resched(); } while (0)
+ # define cant_sleep() do { } while (0)
+ # define cant_migrate() do { } while (0)
+ # define sched_annotate_sleep() do { } while (0)
+diff --git a/include/linux/preempt.h b/include/linux/preempt.h
+index 9881eac0698f..4d244e295e85 100644
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -121,7 +121,11 @@
+ /*
+ * The preempt_count offset after spin_lock()
+ */
++#if !defined(CONFIG_PREEMPT_RT)
+ #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
++#else
++#define PREEMPT_LOCK_OFFSET 0
++#endif
+
+ /*
+ * The preempt_count offset needed for things like:
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index b828b938c876..b02009f53026 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -19,6 +19,10 @@
+
+ extern int max_lock_depth; /* for sysctl */
+
++#ifdef CONFIG_DEBUG_MUTEXES
++#include <linux/debug_locks.h>
++#endif
++
+ /**
+ * The rt_mutex structure
+ *
+@@ -31,6 +35,7 @@ struct rt_mutex {
+ raw_spinlock_t wait_lock;
+ struct rb_root_cached waiters;
+ struct task_struct *owner;
++ int save_state;
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+ #endif
+@@ -67,11 +72,19 @@ do { \
+ #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
+ #endif
+
+-#define __RT_MUTEX_INITIALIZER(mutexname) \
+- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
++#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
+ , .waiters = RB_ROOT_CACHED \
+ , .owner = NULL \
+- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
++ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
++
++#define __RT_MUTEX_INITIALIZER(mutexname) \
++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
++ , .save_state = 0 }
++
++#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
++ , .save_state = 1 }
+
+ #define DEFINE_RT_MUTEX(mutexname) \
+ struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 3f05b8c29132..fb5350358bc8 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -141,6 +141,9 @@ struct task_group;
+ smp_store_mb(current->state, (state_value)); \
+ } while (0)
+
++#define __set_current_state_no_track(state_value) \
++ current->state = (state_value);
++
+ #define set_special_state(state_value) \
+ do { \
+ unsigned long flags; /* may shadow */ \
+@@ -194,6 +197,9 @@ struct task_group;
+ #define set_current_state(state_value) \
+ smp_store_mb(current->state, (state_value))
+
++#define __set_current_state_no_track(state_value) \
++ __set_current_state(state_value)
++
+ /*
+ * set_special_state() should be used for those states when the blocking task
+ * can not use the regular condition based wait-loop. In that case we must
+@@ -1018,6 +1024,7 @@ struct task_struct {
+ raw_spinlock_t pi_lock;
+
+ struct wake_q_node wake_q;
++ struct wake_q_node wake_q_sleeper;
+
+ #ifdef CONFIG_RT_MUTEXES
+ /* PI waiters blocked on a rt_mutex held by this task: */
+diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
+index 26a2013ac39c..6e2dff721547 100644
+--- a/include/linux/sched/wake_q.h
++++ b/include/linux/sched/wake_q.h
+@@ -58,6 +58,17 @@ static inline bool wake_q_empty(struct wake_q_head *head)
+
+ extern void wake_q_add(struct wake_q_head *head, struct task_struct *task);
+ extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task);
+-extern void wake_up_q(struct wake_q_head *head);
++extern void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task);
++extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
++
++static inline void wake_up_q(struct wake_q_head *head)
++{
++ __wake_up_q(head, false);
++}
++
++static inline void wake_up_q_sleeper(struct wake_q_head *head)
++{
++ __wake_up_q(head, true);
++}
+
+ #endif /* _LINUX_SCHED_WAKE_Q_H */
+diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
+new file mode 100644
+index 000000000000..3085132eae38
+--- /dev/null
++++ b/include/linux/spinlock_rt.h
+@@ -0,0 +1,155 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#ifndef __LINUX_SPINLOCK_RT_H
++#define __LINUX_SPINLOCK_RT_H
++
++#ifndef __LINUX_SPINLOCK_H
++#error Do not include directly. Use spinlock.h
++#endif
++
++#include <linux/bug.h>
++
++extern void
++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key);
++
++#define spin_lock_init(slock) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ rt_mutex_init(&(slock)->lock); \
++ __rt_spin_lock_init(slock, #slock, &__key); \
++} while (0)
++
++extern void __lockfunc rt_spin_lock(spinlock_t *lock);
++extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
++extern void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock);
++extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
++extern void __lockfunc rt_spin_lock_unlock(spinlock_t *lock);
++extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
++extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
++extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
++extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
++
++/*
++ * lockdep-less calls, for derived types like rwlock:
++ * (for trylock they can use rt_mutex_trylock() directly.
++ * Migrate disable handling must be done at the call site.
++ */
++extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
++extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
++extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
++
++#define spin_lock(lock) rt_spin_lock(lock)
++
++#define spin_lock_bh(lock) \
++ do { \
++ local_bh_disable(); \
++ rt_spin_lock(lock); \
++ } while (0)
++
++#define spin_lock_irq(lock) spin_lock(lock)
++
++#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
++
++#define spin_trylock(lock) \
++({ \
++ int __locked; \
++ __locked = spin_do_trylock(lock); \
++ __locked; \
++})
++
++#ifdef CONFIG_LOCKDEP
++# define spin_lock_nested(lock, subclass) \
++ do { \
++ rt_spin_lock_nested(lock, subclass); \
++ } while (0)
++
++#define spin_lock_bh_nested(lock, subclass) \
++ do { \
++ local_bh_disable(); \
++ rt_spin_lock_nested(lock, subclass); \
++ } while (0)
++
++# define spin_lock_nest_lock(lock, subclass) \
++ do { \
++ typecheck(struct lockdep_map *, &(subclass)->dep_map); \
++ rt_spin_lock_nest_lock(lock, &(subclass)->dep_map); \
++ } while (0)
++
++# define spin_lock_irqsave_nested(lock, flags, subclass) \
++ do { \
++ typecheck(unsigned long, flags); \
++ flags = 0; \
++ rt_spin_lock_nested(lock, subclass); \
++ } while (0)
++#else
++# define spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock)))
++# define spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock)))
++# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(((void)(subclass), (lock)))
++
++# define spin_lock_irqsave_nested(lock, flags, subclass) \
++ do { \
++ typecheck(unsigned long, flags); \
++ flags = 0; \
++ spin_lock(((void)(subclass), (lock))); \
++ } while (0)
++#endif
++
++#define spin_lock_irqsave(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ flags = 0; \
++ spin_lock(lock); \
++ } while (0)
++
++#define spin_unlock(lock) rt_spin_unlock(lock)
++
++#define spin_unlock_bh(lock) \
++ do { \
++ rt_spin_unlock(lock); \
++ local_bh_enable(); \
++ } while (0)
++
++#define spin_unlock_irq(lock) spin_unlock(lock)
++
++#define spin_unlock_irqrestore(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ (void) flags; \
++ spin_unlock(lock); \
++ } while (0)
++
++#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
++#define spin_trylock_irq(lock) spin_trylock(lock)
++
++#define spin_trylock_irqsave(lock, flags) \
++({ \
++ int __locked; \
++ \
++ typecheck(unsigned long, flags); \
++ flags = 0; \
++ __locked = spin_trylock(lock); \
++ __locked; \
++})
++
++#ifdef CONFIG_GENERIC_LOCKBREAK
++# define spin_is_contended(lock) ((lock)->break_lock)
++#else
++# define spin_is_contended(lock) (((void)(lock), 0))
++#endif
++
++static inline int spin_can_lock(spinlock_t *lock)
++{
++ return !rt_mutex_is_locked(&lock->lock);
++}
++
++static inline int spin_is_locked(spinlock_t *lock)
++{
++ return rt_mutex_is_locked(&lock->lock);
++}
++
++static inline void assert_spin_locked(spinlock_t *lock)
++{
++ BUG_ON(!spin_is_locked(lock));
++}
++
++#endif
+diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h
+new file mode 100644
+index 000000000000..446da786e5d5
+--- /dev/null
++++ b/include/linux/spinlock_types_rt.h
+@@ -0,0 +1,38 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#ifndef __LINUX_SPINLOCK_TYPES_RT_H
++#define __LINUX_SPINLOCK_TYPES_RT_H
++
++#ifndef __LINUX_SPINLOCK_TYPES_H
++#error "Do not include directly. Include spinlock_types.h instead"
++#endif
++
++#include <linux/cache.h>
++
++/*
++ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
++ */
++typedef struct spinlock {
++ struct rt_mutex lock;
++ unsigned int break_lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++} spinlock_t;
++
++#define __RT_SPIN_INITIALIZER(name) \
++ { \
++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
++ .save_state = 1, \
++ }
++/*
++.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
++*/
++
++#define __SPIN_LOCK_UNLOCKED(name) \
++ { .lock = __RT_SPIN_INITIALIZER(name.lock), \
++ SPIN_DEP_MAP_INIT(name) }
++
++#define DEFINE_SPINLOCK(name) \
++ spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
++
++#endif
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 882c91a52aa5..c0cfae6e545c 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -927,6 +927,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ tsk->splice_pipe = NULL;
+ tsk->task_frag.page = NULL;
+ tsk->wake_q.next = NULL;
++ tsk->wake_q_sleeper.next = NULL;
+
+ account_kernel_stack(tsk, 1);
+
+diff --git a/kernel/futex.c b/kernel/futex.c
+index 0315333c0587..93961549dc4c 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1497,6 +1497,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
+ struct task_struct *new_owner;
+ bool postunlock = false;
+ DEFINE_WAKE_Q(wake_q);
++ DEFINE_WAKE_Q(wake_sleeper_q);
+ int ret = 0;
+
+ new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
+@@ -1546,14 +1547,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
+ * not fail.
+ */
+ pi_state_update_owner(pi_state, new_owner);
+- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
++ &wake_sleeper_q);
+ }
+
+ out_unlock:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+
+ if (postunlock)
+- rt_mutex_postunlock(&wake_q);
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
+
+ return ret;
+ }
+@@ -2857,7 +2859,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+ goto no_block;
+ }
+
+- rt_mutex_init_waiter(&rt_waiter);
++ rt_mutex_init_waiter(&rt_waiter, false);
+
+ /*
+ * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
+@@ -3202,7 +3204,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ * The waiter is allocated on our stack, manipulated by the requeue
+ * code while we sleep on uaddr.
+ */
+- rt_mutex_init_waiter(&rt_waiter);
++ rt_mutex_init_waiter(&rt_waiter, false);
+
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
+ if (unlikely(ret != 0))
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 86012b16f0c2..03ffb955b286 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -8,6 +8,11 @@
+ * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
+ * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
+ * Copyright (C) 2006 Esben Nielsen
++ * Adaptive Spinlocks:
++ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
++ * and Peter Morreale,
++ * Adaptive Spinlocks simplification:
++ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
+ *
+ * See Documentation/locking/rt-mutex-design.rst for details.
+ */
+@@ -233,7 +238,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+ * Only use with rt_mutex_waiter_{less,equal}()
+ */
+ #define task_to_waiter(p) \
+- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
++ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) }
+
+ static inline int
+ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+@@ -273,6 +278,27 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+ return 1;
+ }
+
++#define STEAL_NORMAL 0
++#define STEAL_LATERAL 1
++
++static inline int
++rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode)
++{
++ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
++
++ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter))
++ return 1;
++
++ /*
++ * Note that RT tasks are excluded from lateral-steals
++ * to prevent the introduction of an unbounded latency.
++ */
++ if (mode == STEAL_NORMAL || rt_task(waiter->task))
++ return 0;
++
++ return rt_mutex_waiter_equal(waiter, top_waiter);
++}
++
+ #define __node_2_waiter(node) \
+ rb_entry((node), struct rt_mutex_waiter, tree_entry)
+
+@@ -359,6 +385,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
+ return debug_rt_mutex_detect_deadlock(waiter, chwalk);
+ }
+
++static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
++{
++ if (waiter->savestate)
++ wake_up_lock_sleeper(waiter->task);
++ else
++ wake_up_process(waiter->task);
++}
++
+ /*
+ * Max number of times we'll walk the boosting chain:
+ */
+@@ -682,13 +716,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * follow here. This is the end of the chain we are walking.
+ */
+ if (!rt_mutex_owner(lock)) {
++ struct rt_mutex_waiter *lock_top_waiter;
++
+ /*
+ * If the requeue [7] above changed the top waiter,
+ * then we need to wake the new top waiter up to try
+ * to get the lock.
+ */
+- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
+- wake_up_process(rt_mutex_top_waiter(lock)->task);
++ lock_top_waiter = rt_mutex_top_waiter(lock);
++ if (prerequeue_top_waiter != lock_top_waiter)
++ rt_mutex_wake_waiter(lock_top_waiter);
+ raw_spin_unlock_irq(&lock->wait_lock);
+ return 0;
+ }
+@@ -789,9 +826,11 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * @task: The task which wants to acquire the lock
+ * @waiter: The waiter that is queued to the lock's wait tree if the
+ * callsite called task_blocked_on_lock(), otherwise NULL
++ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL)
+ */
+-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+- struct rt_mutex_waiter *waiter)
++static int __try_to_take_rt_mutex(struct rt_mutex *lock,
++ struct task_struct *task,
++ struct rt_mutex_waiter *waiter, int mode)
+ {
+ lockdep_assert_held(&lock->wait_lock);
+
+@@ -827,12 +866,11 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ */
+ if (waiter) {
+ /*
+- * If waiter is not the highest priority waiter of
+- * @lock, give up.
++ * If waiter is not the highest priority waiter of @lock,
++ * or its peer when lateral steal is allowed, give up.
+ */
+- if (waiter != rt_mutex_top_waiter(lock))
++ if (!rt_mutex_steal(lock, waiter, mode))
+ return 0;
+-
+ /*
+ * We can acquire the lock. Remove the waiter from the
+ * lock waiters tree.
+@@ -850,14 +888,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ */
+ if (rt_mutex_has_waiters(lock)) {
+ /*
+- * If @task->prio is greater than or equal to
+- * the top waiter priority (kernel view),
+- * @task lost.
++ * If @task->prio is greater than the top waiter
++ * priority (kernel view), or equal to it when a
++ * lateral steal is forbidden, @task lost.
+ */
+- if (!rt_mutex_waiter_less(task_to_waiter(task),
+- rt_mutex_top_waiter(lock)))
++ if (!rt_mutex_steal(lock, task_to_waiter(task), mode))
+ return 0;
+-
+ /*
+ * The current top waiter stays enqueued. We
+ * don't have to change anything in the lock
+@@ -904,6 +940,289 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ return 1;
+ }
+
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * preemptible spin_lock functions:
++ */
++static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
++ void (*slowfn)(struct rt_mutex *lock))
++{
++ might_sleep_no_state_check();
++
++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
++ return;
++ else
++ slowfn(lock);
++}
++
++static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
++ void (*slowfn)(struct rt_mutex *lock))
++{
++ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
++ return;
++ else
++ slowfn(lock);
++}
++#ifdef CONFIG_SMP
++/*
++ * Note that owner is a speculative pointer and dereferencing relies
++ * on rcu_read_lock() and the check against the lock owner.
++ */
++static int adaptive_wait(struct rt_mutex *lock,
++ struct task_struct *owner)
++{
++ int res = 0;
++
++ rcu_read_lock();
++ for (;;) {
++ if (owner != rt_mutex_owner(lock))
++ break;
++ /*
++ * Ensure that owner->on_cpu is dereferenced _after_
++ * checking the above to be valid.
++ */
++ barrier();
++ if (!owner->on_cpu) {
++ res = 1;
++ break;
++ }
++ cpu_relax();
++ }
++ rcu_read_unlock();
++ return res;
++}
++#else
++static int adaptive_wait(struct rt_mutex *lock,
++ struct task_struct *orig_owner)
++{
++ return 1;
++}
++#endif
++
++static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
++ struct rt_mutex_waiter *waiter,
++ struct task_struct *task,
++ enum rtmutex_chainwalk chwalk);
++/*
++ * Slow path lock function spin_lock style: this variant is very
++ * careful not to miss any non-lock wakeups.
++ *
++ * We store the current state under p->pi_lock in p->saved_state and
++ * the try_to_wake_up() code handles this accordingly.
++ */
++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
++ struct rt_mutex_waiter *waiter,
++ unsigned long flags)
++{
++ struct task_struct *lock_owner, *self = current;
++ struct rt_mutex_waiter *top_waiter;
++ int ret;
++
++ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL))
++ return;
++
++ BUG_ON(rt_mutex_owner(lock) == self);
++
++ /*
++ * We save whatever state the task is in and we'll restore it
++ * after acquiring the lock taking real wakeups into account
++ * as well. We are serialized via pi_lock against wakeups. See
++ * try_to_wake_up().
++ */
++ raw_spin_lock(&self->pi_lock);
++ self->saved_state = self->state;
++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++ raw_spin_unlock(&self->pi_lock);
++
++ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK);
++ BUG_ON(ret);
++
++ for (;;) {
++ /* Try to acquire the lock again. */
++ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL))
++ break;
++
++ top_waiter = rt_mutex_top_waiter(lock);
++ lock_owner = rt_mutex_owner(lock);
++
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++ if (top_waiter != waiter || adaptive_wait(lock, lock_owner))
++ schedule();
++
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
++
++ raw_spin_lock(&self->pi_lock);
++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++ raw_spin_unlock(&self->pi_lock);
++ }
++
++ /*
++ * Restore the task state to current->saved_state. We set it
++ * to the original state above and the try_to_wake_up() code
++ * has possibly updated it when a real (non-rtmutex) wakeup
++ * happened while we were blocked. Clear saved_state so
++ * try_to_wakeup() does not get confused.
++ */
++ raw_spin_lock(&self->pi_lock);
++ __set_current_state_no_track(self->saved_state);
++ self->saved_state = TASK_RUNNING;
++ raw_spin_unlock(&self->pi_lock);
++
++ /*
++ * try_to_take_rt_mutex() sets the waiter bit
++ * unconditionally. We might have to fix that up:
++ */
++ fixup_rt_mutex_waiters(lock);
++
++ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock));
++ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry));
++}
++
++static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
++{
++ struct rt_mutex_waiter waiter;
++ unsigned long flags;
++
++ rt_mutex_init_waiter(&waiter, true);
++
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
++ rt_spin_lock_slowlock_locked(lock, &waiter, flags);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
++ debug_rt_mutex_free_waiter(&waiter);
++}
++
++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
++ struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper);
++/*
++ * Slow path to release a rt_mutex spin_lock style
++ */
++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
++{
++ unsigned long flags;
++ DEFINE_WAKE_Q(wake_q);
++ DEFINE_WAKE_Q(wake_sleeper_q);
++ bool postunlock;
++
++ raw_spin_lock_irqsave(&lock->wait_lock, flags);
++ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q);
++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++ if (postunlock)
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
++}
++
++void __lockfunc rt_spin_lock(spinlock_t *lock)
++{
++ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ migrate_disable();
++}
++EXPORT_SYMBOL(rt_spin_lock);
++
++void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
++{
++ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
++}
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
++{
++ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ migrate_disable();
++}
++EXPORT_SYMBOL(rt_spin_lock_nested);
++
++void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock,
++ struct lockdep_map *nest_lock)
++{
++ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ migrate_disable();
++}
++EXPORT_SYMBOL(rt_spin_lock_nest_lock);
++#endif
++
++void __lockfunc rt_spin_unlock(spinlock_t *lock)
++{
++ /* NOTE: we always pass in '1' for nested, for simplicity */
++ spin_release(&lock->dep_map, _RET_IP_);
++ migrate_enable();
++ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
++}
++EXPORT_SYMBOL(rt_spin_unlock);
++
++void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
++{
++ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
++}
++EXPORT_SYMBOL(__rt_spin_unlock);
++
++/*
++ * Wait for the lock to get unlocked: instead of polling for an unlock
++ * (like raw spinlocks do), we lock and unlock, to force the kernel to
++ * schedule if there's contention:
++ */
++void __lockfunc rt_spin_lock_unlock(spinlock_t *lock)
++{
++ spin_lock(lock);
++ spin_unlock(lock);
++}
++EXPORT_SYMBOL(rt_spin_lock_unlock);
++
++int __lockfunc rt_spin_trylock(spinlock_t *lock)
++{
++ int ret;
++
++ ret = __rt_mutex_trylock(&lock->lock);
++ if (ret) {
++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++ migrate_disable();
++ }
++ return ret;
++}
++EXPORT_SYMBOL(rt_spin_trylock);
++
++int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
++{
++ int ret;
++
++ local_bh_disable();
++ ret = __rt_mutex_trylock(&lock->lock);
++ if (ret) {
++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++ migrate_disable();
++ } else {
++ local_bh_enable();
++ }
++ return ret;
++}
++EXPORT_SYMBOL(rt_spin_trylock_bh);
++
++void
++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held lock:
++ */
++ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
++ lockdep_init_map(&lock->dep_map, name, key, 0);
++#endif
++}
++EXPORT_SYMBOL(__rt_spin_lock_init);
++
++#endif /* PREEMPT_RT */
++
++static inline int
++try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
++ struct rt_mutex_waiter *waiter)
++{
++ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
++}
++
+ /*
+ * Task blocks on lock.
+ *
+@@ -1017,6 +1336,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ * Called with lock->wait_lock held and interrupts disabled.
+ */
+ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
++ struct wake_q_head *wake_sleeper_q,
+ struct rt_mutex *lock)
+ {
+ struct rt_mutex_waiter *waiter;
+@@ -1056,7 +1376,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
+ * Pairs with preempt_enable() in rt_mutex_postunlock();
+ */
+ preempt_disable();
+- wake_q_add(wake_q, waiter->task);
++ if (waiter->savestate)
++ wake_q_add_sleeper(wake_sleeper_q, waiter->task);
++ else
++ wake_q_add(wake_q, waiter->task);
+ raw_spin_unlock(&current->pi_lock);
+ }
+
+@@ -1140,21 +1463,22 @@ void rt_mutex_adjust_pi(struct task_struct *task)
+ return;
+ }
+ next_lock = waiter->lock;
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+ /* gets dropped in rt_mutex_adjust_prio_chain()! */
+ get_task_struct(task);
+
++ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
+ next_lock, NULL, task);
+ }
+
+-void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
++void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
+ {
+ debug_rt_mutex_init_waiter(waiter);
+ RB_CLEAR_NODE(&waiter->pi_tree_entry);
+ RB_CLEAR_NODE(&waiter->tree_entry);
+ waiter->task = NULL;
++ waiter->savestate = savestate;
+ }
+
+ /**
+@@ -1265,7 +1589,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ unsigned long flags;
+ int ret = 0;
+
+- rt_mutex_init_waiter(&waiter);
++ rt_mutex_init_waiter(&waiter, false);
+
+ /*
+ * Technically we could use raw_spin_[un]lock_irq() here, but this can
+@@ -1338,7 +1662,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
+ * Return whether the current task needs to call rt_mutex_postunlock().
+ */
+ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+- struct wake_q_head *wake_q)
++ struct wake_q_head *wake_q,
++ struct wake_q_head *wake_sleeper_q)
+ {
+ unsigned long flags;
+
+@@ -1392,7 +1717,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+ *
+ * Queue the next waiter for wakeup once we release the wait_lock.
+ */
+- mark_wakeup_next_waiter(wake_q, lock);
++ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ return true; /* call rt_mutex_postunlock() */
+@@ -1429,9 +1754,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
+ /*
+ * Performs the wakeup of the top-waiter and re-enables preemption.
+ */
+-void rt_mutex_postunlock(struct wake_q_head *wake_q)
++void rt_mutex_postunlock(struct wake_q_head *wake_q,
++ struct wake_q_head *wake_sleeper_q)
+ {
+ wake_up_q(wake_q);
++ wake_up_q_sleeper(wake_sleeper_q);
+
+ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
+ preempt_enable();
+@@ -1440,15 +1767,17 @@ void rt_mutex_postunlock(struct wake_q_head *wake_q)
+ static inline void
+ rt_mutex_fastunlock(struct rt_mutex *lock,
+ bool (*slowfn)(struct rt_mutex *lock,
+- struct wake_q_head *wqh))
++ struct wake_q_head *wqh,
++ struct wake_q_head *wq_sleeper))
+ {
+ DEFINE_WAKE_Q(wake_q);
++ DEFINE_WAKE_Q(wake_sleeper_q);
+
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ return;
+
+- if (slowfn(lock, &wake_q))
+- rt_mutex_postunlock(&wake_q);
++ if (slowfn(lock, &wake_q, &wake_sleeper_q))
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
+ }
+
+ int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state)
+@@ -1579,19 +1908,13 @@ void __sched __rt_mutex_unlock(struct rt_mutex *lock)
+ void __sched rt_mutex_unlock(struct rt_mutex *lock)
+ {
+ mutex_release(&lock->dep_map, _RET_IP_);
+- rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
++ __rt_mutex_unlock(lock);
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
+
+-/**
+- * __rt_mutex_futex_unlock - Futex variant, that since futex variants
+- * do not use the fast-path, can be simple and will not need to retry.
+- *
+- * @lock: The rt_mutex to be unlocked
+- * @wake_q: The wake queue head from which to get the next lock waiter
+- */
+-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
+- struct wake_q_head *wake_q)
++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
++ struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper)
+ {
+ lockdep_assert_held(&lock->wait_lock);
+
+@@ -1608,23 +1931,39 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ * avoid inversion prior to the wakeup. preempt_disable()
+ * therein pairs with rt_mutex_postunlock().
+ */
+- mark_wakeup_next_waiter(wake_q, lock);
++ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock);
+
+ return true; /* call postunlock() */
+ }
+
++/**
++ * __rt_mutex_futex_unlock - Futex variant, that since futex variants
++ * do not use the fast-path, can be simple and will not need to retry.
++ *
++ * @lock: The rt_mutex to be unlocked
++ * @wake_q: The wake queue head from which to get the next lock waiter
++ * @wq_sleeper: The sleeper wake queue head from which to get the next lock waiter
++ */
++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
++ struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper)
++{
++ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper);
++}
++
+ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
+ {
+ DEFINE_WAKE_Q(wake_q);
++ DEFINE_WAKE_Q(wake_sleeper_q);
+ unsigned long flags;
+ bool postunlock;
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+- postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ if (postunlock)
+- rt_mutex_postunlock(&wake_q);
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
+ }
+
+ /**
+@@ -1662,7 +2001,7 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name,
+ if (name && key)
+ debug_rt_mutex_init(lock, name, key);
+ }
+-EXPORT_SYMBOL_GPL(__rt_mutex_init);
++EXPORT_SYMBOL(__rt_mutex_init);
+
+ /**
+ * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
+@@ -1682,6 +2021,14 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+ struct task_struct *proxy_owner)
+ {
+ __rt_mutex_init(lock, NULL, NULL);
++#ifdef CONFIG_DEBUG_SPINLOCK
++ /*
++ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is
++ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping
++ * lock.
++ */
++ raw_spin_lock_init(&lock->wait_lock);
++#endif
+ debug_rt_mutex_proxy_lock(lock, proxy_owner);
+ rt_mutex_set_owner(lock, proxy_owner);
+ }
+@@ -1704,6 +2051,26 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock)
+ rt_mutex_set_owner(lock, NULL);
+ }
+
++static void fixup_rt_mutex_blocked(struct rt_mutex *lock)
++{
++ struct task_struct *tsk = current;
++ /*
++ * RT has a problem here when the wait got interrupted by a timeout
++ * or a signal. task->pi_blocked_on is still set. The task must
++ * acquire the hash bucket lock when returning from this function.
++ *
++ * If the hash bucket lock is contended then the
++ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
++ * task_blocks_on_rt_mutex() will trigger. This can be avoided by
++ * clearing task->pi_blocked_on which removes the task from the
++ * boosting chain of the rtmutex. That's correct because the task
++ * is not longer blocked on it.
++ */
++ raw_spin_lock(&tsk->pi_lock);
++ tsk->pi_blocked_on = NULL;
++ raw_spin_unlock(&tsk->pi_lock);
++}
++
+ /**
+ * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock: the rt_mutex to take
+@@ -1776,6 +2143,9 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ ret = 0;
+ }
+
++ if (ret)
++ fixup_rt_mutex_blocked(lock);
++
+ return ret;
+ }
+
+@@ -1865,6 +2235,9 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+ * have to fix that up.
+ */
+ fixup_rt_mutex_waiters(lock);
++ if (ret)
++ fixup_rt_mutex_blocked(lock);
++
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index 9d1e974ca9c3..c1a280167e3c 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -31,6 +31,7 @@ struct rt_mutex_waiter {
+ struct task_struct *task;
+ struct rt_mutex *lock;
+ int prio;
++ bool savestate;
+ u64 deadline;
+ };
+
+@@ -133,7 +134,7 @@ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+ struct task_struct *proxy_owner);
+ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
+-extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
++extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate);
+ extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task);
+@@ -151,9 +152,12 @@ extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
+
+ extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
+ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+- struct wake_q_head *wqh);
++ struct wake_q_head *wqh,
++ struct wake_q_head *wq_sleeper);
++
++extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
++ struct wake_q_head *wake_sleeper_q);
+
+-extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
+ /* RW semaphore special interface */
+
+ extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state);
+@@ -163,6 +167,10 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
+ struct hrtimer_sleeper *timeout,
+ enum rtmutex_chainwalk chwalk,
+ struct rt_mutex_waiter *waiter);
++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
++ struct rt_mutex_waiter *waiter,
++ unsigned long flags);
++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock);
+
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 4efe6fd72719..46e7db92b343 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -502,9 +502,15 @@ static bool set_nr_if_polling(struct task_struct *p)
+ #endif
+ #endif
+
+-static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
++static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task,
++ bool sleeper)
+ {
+- struct wake_q_node *node = &task->wake_q;
++ struct wake_q_node *node;
++
++ if (sleeper)
++ node = &task->wake_q_sleeper;
++ else
++ node = &task->wake_q;
+
+ /*
+ * Atomically grab the task, if ->wake_q is !nil already it means
+@@ -540,7 +546,13 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
+ */
+ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+ {
+- if (__wake_q_add(head, task))
++ if (__wake_q_add(head, task, false))
++ get_task_struct(task);
++}
++
++void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task)
++{
++ if (__wake_q_add(head, task, true))
+ get_task_struct(task);
+ }
+
+@@ -563,28 +575,39 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+ */
+ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
+ {
+- if (!__wake_q_add(head, task))
++ if (!__wake_q_add(head, task, false))
+ put_task_struct(task);
+ }
+
+-void wake_up_q(struct wake_q_head *head)
++void __wake_up_q(struct wake_q_head *head, bool sleeper)
+ {
+ struct wake_q_node *node = head->first;
+
+ while (node != WAKE_Q_TAIL) {
+ struct task_struct *task;
+
+- task = container_of(node, struct task_struct, wake_q);
++ if (sleeper)
++ task = container_of(node, struct task_struct, wake_q_sleeper);
++ else
++ task = container_of(node, struct task_struct, wake_q);
++
+ BUG_ON(!task);
+ /* Task can safely be re-inserted now: */
+ node = node->next;
+- task->wake_q.next = NULL;
+
++ if (sleeper)
++ task->wake_q_sleeper.next = NULL;
++ else
++ task->wake_q.next = NULL;
+ /*
+ * wake_up_process() executes a full barrier, which pairs with
+ * the queueing in wake_q_add() so as not to miss wakeups.
+ */
+- wake_up_process(task);
++ if (sleeper)
++ wake_up_lock_sleeper(task);
++ else
++ wake_up_process(task);
++
+ put_task_struct(task);
+ }
+ }
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch b/features/rt/locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch
new file mode 100644
index 00000000..4453ee0a
--- /dev/null
+++ b/features/rt/locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch
@@ -0,0 +1,455 @@
+From 301251ee29aa4e2fc60f83f4826f4c1d15ad6bdc Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 12 Oct 2017 17:34:38 +0200
+Subject: [PATCH 081/191] locking/rtmutex: add ww_mutex addon for mutex-rt
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/mutex.h | 8 -
+ include/linux/ww_mutex.h | 8 +
+ kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++--
+ kernel/locking/rtmutex_common.h | 2 +
+ kernel/locking/rwsem-rt.c | 2 +-
+ 5 files changed, 262 insertions(+), 20 deletions(-)
+
+diff --git a/include/linux/mutex.h b/include/linux/mutex.h
+index 50c873744ab0..d7cfc5dc817b 100644
+--- a/include/linux/mutex.h
++++ b/include/linux/mutex.h
+@@ -82,14 +82,6 @@ struct mutex {
+ struct ww_class;
+ struct ww_acquire_ctx;
+
+-struct ww_mutex {
+- struct mutex base;
+- struct ww_acquire_ctx *ctx;
+-#ifdef CONFIG_DEBUG_MUTEXES
+- struct ww_class *ww_class;
+-#endif
+-};
+-
+ /*
+ * This is the control structure for tasks blocked on mutex,
+ * which resides on the blocked task's kernel stack:
+diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
+index 850424e5d030..8ef2feb0d8fe 100644
+--- a/include/linux/ww_mutex.h
++++ b/include/linux/ww_mutex.h
+@@ -28,6 +28,14 @@ struct ww_class {
+ unsigned int is_wait_die;
+ };
+
++struct ww_mutex {
++ struct mutex base;
++ struct ww_acquire_ctx *ctx;
++#ifdef CONFIG_DEBUG_MUTEXES
++ struct ww_class *ww_class;
++#endif
++};
++
+ struct ww_acquire_ctx {
+ struct task_struct *task;
+ unsigned long stamp;
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index f0bc7fcae441..3fc947dd2e60 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -24,6 +24,7 @@
+ #include <linux/sched/wake_q.h>
+ #include <linux/sched/debug.h>
+ #include <linux/timer.h>
++#include <linux/ww_mutex.h>
+
+ #include "rtmutex_common.h"
+
+@@ -1216,6 +1217,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
+
+ #endif /* PREEMPT_RT */
+
++#ifdef CONFIG_PREEMPT_RT
++ static inline int __sched
++__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
++{
++ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
++ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
++
++ if (!hold_ctx)
++ return 0;
++
++ if (unlikely(ctx == hold_ctx))
++ return -EALREADY;
++
++ if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
++ (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
++#ifdef CONFIG_DEBUG_MUTEXES
++ DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
++ ctx->contending_lock = ww;
++#endif
++ return -EDEADLK;
++ }
++
++ return 0;
++}
++#else
++ static inline int __sched
++__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
++{
++ BUG();
++ return 0;
++}
++
++#endif
++
+ static inline int
+ try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
+@@ -1494,7 +1529,8 @@ void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
+ static int __sched
+ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ struct hrtimer_sleeper *timeout,
+- struct rt_mutex_waiter *waiter)
++ struct rt_mutex_waiter *waiter,
++ struct ww_acquire_ctx *ww_ctx)
+ {
+ int ret = 0;
+
+@@ -1512,6 +1548,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ break;
+ }
+
++ if (ww_ctx && ww_ctx->acquired > 0) {
++ ret = __mutex_lock_check_stamp(lock, ww_ctx);
++ if (ret)
++ break;
++ }
++
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ schedule();
+@@ -1540,16 +1582,106 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
+ }
+ }
+
++static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
++ struct ww_acquire_ctx *ww_ctx)
++{
++#ifdef CONFIG_DEBUG_MUTEXES
++ /*
++ * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
++ * but released with a normal mutex_unlock in this call.
++ *
++ * This should never happen, always use ww_mutex_unlock.
++ */
++ DEBUG_LOCKS_WARN_ON(ww->ctx);
++
++ /*
++ * Not quite done after calling ww_acquire_done() ?
++ */
++ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
++
++ if (ww_ctx->contending_lock) {
++ /*
++ * After -EDEADLK you tried to
++ * acquire a different ww_mutex? Bad!
++ */
++ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
++
++ /*
++ * You called ww_mutex_lock after receiving -EDEADLK,
++ * but 'forgot' to unlock everything else first?
++ */
++ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
++ ww_ctx->contending_lock = NULL;
++ }
++
++ /*
++ * Naughty, using a different class will lead to undefined behavior!
++ */
++ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
++#endif
++ ww_ctx->acquired++;
++}
++
++#ifdef CONFIG_PREEMPT_RT
++static void ww_mutex_account_lock(struct rt_mutex *lock,
++ struct ww_acquire_ctx *ww_ctx)
++{
++ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
++ struct rt_mutex_waiter *waiter, *n;
++
++ /*
++ * This branch gets optimized out for the common case,
++ * and is only important for ww_mutex_lock.
++ */
++ ww_mutex_lock_acquired(ww, ww_ctx);
++ ww->ctx = ww_ctx;
++
++ /*
++ * Give any possible sleeping processes the chance to wake up,
++ * so they can recheck if they have to back off.
++ */
++ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root,
++ tree_entry) {
++ /* XXX debug rt mutex waiter wakeup */
++
++ BUG_ON(waiter->lock != lock);
++ rt_mutex_wake_waiter(waiter);
++ }
++}
++
++#else
++
++static void ww_mutex_account_lock(struct rt_mutex *lock,
++ struct ww_acquire_ctx *ww_ctx)
++{
++ BUG();
++}
++#endif
++
+ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
+ struct hrtimer_sleeper *timeout,
+ enum rtmutex_chainwalk chwalk,
++ struct ww_acquire_ctx *ww_ctx,
+ struct rt_mutex_waiter *waiter)
+ {
+ int ret;
+
++#ifdef CONFIG_PREEMPT_RT
++ if (ww_ctx) {
++ struct ww_mutex *ww;
++
++ ww = container_of(lock, struct ww_mutex, base.lock);
++ if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
++ return -EALREADY;
++ }
++#endif
++
+ /* Try to acquire the lock again: */
+- if (try_to_take_rt_mutex(lock, current, NULL))
++ if (try_to_take_rt_mutex(lock, current, NULL)) {
++ if (ww_ctx)
++ ww_mutex_account_lock(lock, ww_ctx);
+ return 0;
++ }
+
+ set_current_state(state);
+
+@@ -1559,14 +1691,24 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
+
+ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
+
+- if (likely(!ret))
++ if (likely(!ret)) {
+ /* sleep on the mutex */
+- ret = __rt_mutex_slowlock(lock, state, timeout, waiter);
++ ret = __rt_mutex_slowlock(lock, state, timeout, waiter,
++ ww_ctx);
++ } else if (ww_ctx) {
++ /* ww_mutex received EDEADLK, let it become EALREADY */
++ ret = __mutex_lock_check_stamp(lock, ww_ctx);
++ BUG_ON(!ret);
++ }
+
+ if (unlikely(ret)) {
+ __set_current_state(TASK_RUNNING);
+ remove_waiter(lock, waiter);
+- rt_mutex_handle_deadlock(ret, chwalk, waiter);
++ /* ww_mutex wants to report EDEADLK/EALREADY, let it */
++ if (!ww_ctx)
++ rt_mutex_handle_deadlock(ret, chwalk, waiter);
++ } else if (ww_ctx) {
++ ww_mutex_account_lock(lock, ww_ctx);
+ }
+
+ /*
+@@ -1583,7 +1725,8 @@ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
+ static int __sched
+ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ struct hrtimer_sleeper *timeout,
+- enum rtmutex_chainwalk chwalk)
++ enum rtmutex_chainwalk chwalk,
++ struct ww_acquire_ctx *ww_ctx)
+ {
+ struct rt_mutex_waiter waiter;
+ unsigned long flags;
+@@ -1601,7 +1744,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ */
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+
+- ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter);
++ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx,
++ &waiter);
+
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+@@ -1731,14 +1875,16 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+ */
+ static inline int
+ rt_mutex_fastlock(struct rt_mutex *lock, int state,
++ struct ww_acquire_ctx *ww_ctx,
+ int (*slowfn)(struct rt_mutex *lock, int state,
+ struct hrtimer_sleeper *timeout,
+- enum rtmutex_chainwalk chwalk))
++ enum rtmutex_chainwalk chwalk,
++ struct ww_acquire_ctx *ww_ctx))
+ {
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
+ return 0;
+
+- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
++ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx);
+ }
+
+ static inline int
+@@ -1783,7 +1929,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
+ int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state)
+ {
+ might_sleep();
+- return rt_mutex_fastlock(lock, state, rt_mutex_slowlock);
++ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock);
+ }
+
+ /**
+@@ -2233,7 +2379,7 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+ raw_spin_lock_irq(&lock->wait_lock);
+ /* sleep on the mutex */
+ set_current_state(TASK_INTERRUPTIBLE);
+- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
++ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
+ /*
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ * have to fix that up.
+@@ -2303,3 +2449,97 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+
+ return cleanup;
+ }
++
++static inline int
++ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
++{
++#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
++ unsigned int tmp;
++
++ if (ctx->deadlock_inject_countdown-- == 0) {
++ tmp = ctx->deadlock_inject_interval;
++ if (tmp > UINT_MAX/4)
++ tmp = UINT_MAX;
++ else
++ tmp = tmp*2 + tmp + tmp/2;
++
++ ctx->deadlock_inject_interval = tmp;
++ ctx->deadlock_inject_countdown = tmp;
++ ctx->contending_lock = lock;
++
++ ww_mutex_unlock(lock);
++
++ return -EDEADLK;
++ }
++#endif
++
++ return 0;
++}
++
++#ifdef CONFIG_PREEMPT_RT
++int __sched
++ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
++{
++ int ret;
++
++ might_sleep();
++
++ mutex_acquire_nest(&lock->base.dep_map, 0, 0,
++ ctx ? &ctx->dep_map : NULL, _RET_IP_);
++ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0,
++ ctx);
++ if (ret)
++ mutex_release(&lock->base.dep_map, _RET_IP_);
++ else if (!ret && ctx && ctx->acquired > 1)
++ return ww_mutex_deadlock_injection(lock, ctx);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
++
++int __sched
++ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
++{
++ int ret;
++
++ might_sleep();
++
++ mutex_acquire_nest(&lock->base.dep_map, 0, 0,
++ ctx ? &ctx->dep_map : NULL, _RET_IP_);
++ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0,
++ ctx);
++ if (ret)
++ mutex_release(&lock->base.dep_map, _RET_IP_);
++ else if (!ret && ctx && ctx->acquired > 1)
++ return ww_mutex_deadlock_injection(lock, ctx);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(ww_mutex_lock);
++
++void __sched ww_mutex_unlock(struct ww_mutex *lock)
++{
++ /*
++ * The unlocking fastpath is the 0->1 transition from 'locked'
++ * into 'unlocked' state:
++ */
++ if (lock->ctx) {
++#ifdef CONFIG_DEBUG_MUTEXES
++ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
++#endif
++ if (lock->ctx->acquired > 0)
++ lock->ctx->acquired--;
++ lock->ctx = NULL;
++ }
++
++ mutex_release(&lock->base.dep_map, _RET_IP_);
++ __rt_mutex_unlock(&lock->base.lock);
++}
++EXPORT_SYMBOL(ww_mutex_unlock);
++
++int __rt_mutex_owner_current(struct rt_mutex *lock)
++{
++ return rt_mutex_owner(lock) == current;
++}
++EXPORT_SYMBOL(__rt_mutex_owner_current);
++#endif
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index c1a280167e3c..248a7d91583b 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -159,6 +159,7 @@ extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
+ struct wake_q_head *wake_sleeper_q);
+
+ /* RW semaphore special interface */
++struct ww_acquire_ctx;
+
+ extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state);
+ extern int __rt_mutex_trylock(struct rt_mutex *lock);
+@@ -166,6 +167,7 @@ extern void __rt_mutex_unlock(struct rt_mutex *lock);
+ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
+ struct hrtimer_sleeper *timeout,
+ enum rtmutex_chainwalk chwalk,
++ struct ww_acquire_ctx *ww_ctx,
+ struct rt_mutex_waiter *waiter);
+ void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c
+index a0771c150041..274172d5bb3a 100644
+--- a/kernel/locking/rwsem-rt.c
++++ b/kernel/locking/rwsem-rt.c
+@@ -138,7 +138,7 @@ static int __sched __down_read_common(struct rw_semaphore *sem, int state)
+ */
+ rt_mutex_init_waiter(&waiter, false);
+ ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK,
+- &waiter);
++ NULL, &waiter);
+ /*
+ * The slowlock() above is guaranteed to return with the rtmutex (for
+ * ret = 0) is now held, so there can't be a writer active. Increment
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch b/features/rt/locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch
new file mode 100644
index 00000000..af6400f3
--- /dev/null
+++ b/features/rt/locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch
@@ -0,0 +1,129 @@
+From 9d29157866e3702ace5facd72443ded3418db861 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 12 Oct 2017 16:36:39 +0200
+Subject: [PATCH 073/191] locking/rtmutex: export lockdep-less version of
+ rt_mutex's lock, trylock and unlock
+
+Required for lock implementation ontop of rtmutex.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 54 +++++++++++++++++++++++----------
+ kernel/locking/rtmutex_common.h | 3 ++
+ 2 files changed, 41 insertions(+), 16 deletions(-)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 670c4a577322..86012b16f0c2 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1451,12 +1451,33 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
+ rt_mutex_postunlock(&wake_q);
+ }
+
+-static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass)
++int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state)
+ {
+ might_sleep();
++ return rt_mutex_fastlock(lock, state, rt_mutex_slowlock);
++}
++
++/**
++ * rt_mutex_lock_state - lock a rt_mutex with a given state
++ *
++ * @lock: The rt_mutex to be locked
++ * @state: The state to set when blocking on the rt_mutex
++ */
++static inline int __sched rt_mutex_lock_state(struct rt_mutex *lock,
++ unsigned int subclass, int state)
++{
++ int ret;
+
+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
++ ret = __rt_mutex_lock_state(lock, state);
++ if (ret)
++ mutex_release(&lock->dep_map, _RET_IP_);
++ return ret;
++}
++
++static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass)
++{
++ rt_mutex_lock_state(lock, subclass, TASK_UNINTERRUPTIBLE);
+ }
+
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+@@ -1497,16 +1518,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
+ */
+ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
+ {
+- int ret;
+-
+- might_sleep();
+-
+- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+- ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+- if (ret)
+- mutex_release(&lock->dep_map, _RET_IP_);
+-
+- return ret;
++ return rt_mutex_lock_state(lock, 0, TASK_INTERRUPTIBLE);
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
+
+@@ -1523,6 +1535,14 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+ return __rt_mutex_slowtrylock(lock);
+ }
+
++int __sched __rt_mutex_trylock(struct rt_mutex *lock)
++{
++ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
++ return 0;
++
++ return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
++}
++
+ /**
+ * rt_mutex_trylock - try to lock a rt_mutex
+ *
+@@ -1538,10 +1558,7 @@ int __sched rt_mutex_trylock(struct rt_mutex *lock)
+ {
+ int ret;
+
+- if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
+- return 0;
+-
+- ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
++ ret = __rt_mutex_trylock(lock);
+ if (ret)
+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+
+@@ -1549,6 +1566,11 @@ int __sched rt_mutex_trylock(struct rt_mutex *lock)
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
+
++void __sched __rt_mutex_unlock(struct rt_mutex *lock)
++{
++ rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
++}
++
+ /**
+ * rt_mutex_unlock - unlock a rt_mutex
+ *
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index b5a2affa59d5..9d1e974ca9c3 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -156,6 +156,9 @@ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
+ /* RW semaphore special interface */
+
++extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state);
++extern int __rt_mutex_trylock(struct rt_mutex *lock);
++extern void __rt_mutex_unlock(struct rt_mutex *lock);
+ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
+ struct hrtimer_sleeper *timeout,
+ enum rtmutex_chainwalk chwalk,
+--
+2.19.1
+
diff --git a/features/rt/locking-rtmutex-wire-up-RT-s-locking.patch b/features/rt/locking-rtmutex-wire-up-RT-s-locking.patch
new file mode 100644
index 00000000..c6b001b5
--- /dev/null
+++ b/features/rt/locking-rtmutex-wire-up-RT-s-locking.patch
@@ -0,0 +1,346 @@
+From cd8326d4ea23e407e1168cc2c6f1005d5c0ec743 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 12 Oct 2017 17:31:14 +0200
+Subject: [PATCH 080/191] locking/rtmutex: wire up RT's locking
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/mutex.h | 25 +++++++++++++++----------
+ include/linux/rwsem.h | 12 ++++++++++++
+ include/linux/spinlock.h | 12 +++++++++++-
+ include/linux/spinlock_api_smp.h | 4 +++-
+ include/linux/spinlock_types.h | 11 ++++++++---
+ include/linux/spinlock_types_up.h | 2 +-
+ kernel/Kconfig.preempt | 1 +
+ kernel/locking/Makefile | 10 +++++++---
+ kernel/locking/rwsem.c | 6 ++++++
+ kernel/locking/spinlock.c | 7 +++++++
+ kernel/locking/spinlock_debug.c | 5 +++++
+ 11 files changed, 76 insertions(+), 19 deletions(-)
+
+diff --git a/include/linux/mutex.h b/include/linux/mutex.h
+index 0cd631a19727..50c873744ab0 100644
+--- a/include/linux/mutex.h
++++ b/include/linux/mutex.h
+@@ -22,6 +22,20 @@
+
+ struct ww_acquire_ctx;
+
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
++ , .dep_map = { \
++ .name = #lockname, \
++ .wait_type_inner = LD_WAIT_SLEEP, \
++ }
++#else
++# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
++#endif
++
++#ifdef CONFIG_PREEMPT_RT
++# include <linux/mutex_rt.h>
++#else
++
+ /*
+ * Simple, straightforward mutexes with strict semantics:
+ *
+@@ -119,16 +133,6 @@ do { \
+ __mutex_init((mutex), #mutex, &__key); \
+ } while (0)
+
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
+- , .dep_map = { \
+- .name = #lockname, \
+- .wait_type_inner = LD_WAIT_SLEEP, \
+- }
+-#else
+-# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
+-#endif
+-
+ #define __MUTEX_INITIALIZER(lockname) \
+ { .owner = ATOMIC_LONG_INIT(0) \
+ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
+@@ -199,4 +203,5 @@ extern void mutex_unlock(struct mutex *lock);
+
+ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
++#endif /* !PREEMPT_RT */
+ #endif /* __LINUX_MUTEX_H */
+diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
+index 4c715be48717..9323af8a9244 100644
+--- a/include/linux/rwsem.h
++++ b/include/linux/rwsem.h
+@@ -16,6 +16,11 @@
+ #include <linux/spinlock.h>
+ #include <linux/atomic.h>
+ #include <linux/err.h>
++
++#ifdef CONFIG_PREEMPT_RT
++#include <linux/rwsem-rt.h>
++#else /* PREEMPT_RT */
++
+ #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ #include <linux/osq_lock.h>
+ #endif
+@@ -119,6 +124,13 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem)
+ return !list_empty(&sem->wait_list);
+ }
+
++#endif /* !PREEMPT_RT */
++
++/*
++ * The functions below are the same for all rwsem implementations including
++ * the RT specific variant.
++ */
++
+ /*
+ * lock for reading
+ */
+diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
+index 79897841a2cc..c3c70291b46c 100644
+--- a/include/linux/spinlock.h
++++ b/include/linux/spinlock.h
+@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
+ })
+
+ /* Include rwlock functions */
+-#include <linux/rwlock.h>
++#ifdef CONFIG_PREEMPT_RT
++# include <linux/rwlock_rt.h>
++#else
++# include <linux/rwlock.h>
++#endif
+
+ /*
+ * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
+@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
+ # include <linux/spinlock_api_up.h>
+ #endif
+
++#ifdef CONFIG_PREEMPT_RT
++# include <linux/spinlock_rt.h>
++#else /* PREEMPT_RT */
++
+ /*
+ * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
+ */
+@@ -454,6 +462,8 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
+
+ #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock)
+
++#endif /* !PREEMPT_RT */
++
+ /*
+ * Pull the atomic_t declaration:
+ * (asm-mips/atomic.h needs above definitions)
+diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
+index 19a9be9d97ee..da38149f2843 100644
+--- a/include/linux/spinlock_api_smp.h
++++ b/include/linux/spinlock_api_smp.h
+@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
+ return 0;
+ }
+
+-#include <linux/rwlock_api_smp.h>
++#ifndef CONFIG_PREEMPT_RT
++# include <linux/rwlock_api_smp.h>
++#endif
+
+ #endif /* __LINUX_SPINLOCK_API_SMP_H */
+diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
+index 5c8664d57fb8..8d896d3e1a01 100644
+--- a/include/linux/spinlock_types.h
++++ b/include/linux/spinlock_types.h
+@@ -11,8 +11,13 @@
+
+ #include <linux/spinlock_types_raw.h>
+
+-#include <linux/spinlock_types_nort.h>
+-
+-#include <linux/rwlock_types.h>
++#ifndef CONFIG_PREEMPT_RT
++# include <linux/spinlock_types_nort.h>
++# include <linux/rwlock_types.h>
++#else
++# include <linux/rtmutex.h>
++# include <linux/spinlock_types_rt.h>
++# include <linux/rwlock_types_rt.h>
++#endif
+
+ #endif /* __LINUX_SPINLOCK_TYPES_H */
+diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
+index c09b6407ae1b..d9b371fa13e0 100644
+--- a/include/linux/spinlock_types_up.h
++++ b/include/linux/spinlock_types_up.h
+@@ -1,7 +1,7 @@
+ #ifndef __LINUX_SPINLOCK_TYPES_UP_H
+ #define __LINUX_SPINLOCK_TYPES_UP_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H)
+ # error "please don't include this file directly"
+ #endif
+
+diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
+index 416017301660..c8e516f6db20 100644
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -60,6 +60,7 @@ config PREEMPT_RT
+ bool "Fully Preemptible Kernel (Real-Time)"
+ depends on EXPERT && ARCH_SUPPORTS_RT
+ select PREEMPTION
++ select RT_MUTEXES
+ help
+ This option turns the kernel into a real-time kernel by replacing
+ various locking primitives (spinlocks, rwlocks, etc.) with
+diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
+index 8838f1d7c4a2..b4bdab93e2fc 100644
+--- a/kernel/locking/Makefile
++++ b/kernel/locking/Makefile
+@@ -3,7 +3,7 @@
+ # and is generally not a function of system call inputs.
+ KCOV_INSTRUMENT := n
+
+-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
++obj-y += semaphore.o rwsem.o percpu-rwsem.o
+
+ # Avoid recursion lockdep -> KCSAN -> ... -> lockdep.
+ KCSAN_SANITIZE_lockdep.o := n
+@@ -16,19 +16,23 @@ CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
+ endif
+
+ obj-$(CONFIG_DEBUG_IRQFLAGS) += irqflag-debug.o
+-obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
+ obj-$(CONFIG_LOCKDEP) += lockdep.o
+ ifeq ($(CONFIG_PROC_FS),y)
+ obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
+ endif
+ obj-$(CONFIG_SMP) += spinlock.o
+-obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
+ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+ obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
+ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
+ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
+ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
++ifneq ($(CONFIG_PREEMPT_RT),y)
++obj-y += mutex.o
++obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
++obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
++endif
++obj-$(CONFIG_PREEMPT_RT) += mutex-rt.o rwsem-rt.o rwlock-rt.o
+ obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
+ obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
+ obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index abba5df50006..bed9a5e32a16 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -28,6 +28,7 @@
+ #include <linux/rwsem.h>
+ #include <linux/atomic.h>
+
++#ifndef CONFIG_PREEMPT_RT
+ #include "lock_events.h"
+
+ /*
+@@ -1343,6 +1344,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
+ if (tmp & RWSEM_FLAG_WAITERS)
+ rwsem_downgrade_wake(sem);
+ }
++#endif
+
+ /*
+ * lock for reading
+@@ -1506,7 +1508,9 @@ void down_read_non_owner(struct rw_semaphore *sem)
+ {
+ might_sleep();
+ __down_read(sem);
++#ifndef CONFIG_PREEMPT_RT
+ __rwsem_set_reader_owned(sem, NULL);
++#endif
+ }
+ EXPORT_SYMBOL(down_read_non_owner);
+
+@@ -1535,7 +1539,9 @@ EXPORT_SYMBOL(down_write_killable_nested);
+
+ void up_read_non_owner(struct rw_semaphore *sem)
+ {
++#ifndef CONFIG_PREEMPT_RT
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
++#endif
+ __up_read(sem);
+ }
+ EXPORT_SYMBOL(up_read_non_owner);
+diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
+index 0ff08380f531..45445a2f1799 100644
+--- a/kernel/locking/spinlock.c
++++ b/kernel/locking/spinlock.c
+@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
+ * __[spin|read|write]_lock_bh()
+ */
+ BUILD_LOCK_OPS(spin, raw_spinlock);
++
++#ifndef CONFIG_PREEMPT_RT
+ BUILD_LOCK_OPS(read, rwlock);
+ BUILD_LOCK_OPS(write, rwlock);
++#endif
+
+ #endif
+
+@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
+ EXPORT_SYMBOL(_raw_spin_unlock_bh);
+ #endif
+
++#ifndef CONFIG_PREEMPT_RT
++
+ #ifndef CONFIG_INLINE_READ_TRYLOCK
+ int __lockfunc _raw_read_trylock(rwlock_t *lock)
+ {
+@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
+ EXPORT_SYMBOL(_raw_write_unlock_bh);
+ #endif
+
++#endif /* !PREEMPT_RT */
++
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
+diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
+index b9d93087ee66..72e306e0e8a3 100644
+--- a/kernel/locking/spinlock_debug.c
++++ b/kernel/locking/spinlock_debug.c
+@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
+
+ EXPORT_SYMBOL(__raw_spin_lock_init);
+
++#ifndef CONFIG_PREEMPT_RT
+ void __rwlock_init(rwlock_t *lock, const char *name,
+ struct lock_class_key *key)
+ {
+@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
+ }
+
+ EXPORT_SYMBOL(__rwlock_init);
++#endif
+
+ static void spin_dump(raw_spinlock_t *lock, const char *msg)
+ {
+@@ -139,6 +141,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock)
+ arch_spin_unlock(&lock->raw_lock);
+ }
+
++#ifndef CONFIG_PREEMPT_RT
+ static void rwlock_bug(rwlock_t *lock, const char *msg)
+ {
+ if (!debug_locks_off())
+@@ -228,3 +231,5 @@ void do_raw_write_unlock(rwlock_t *lock)
+ debug_write_unlock(lock);
+ arch_write_unlock(&lock->raw_lock);
+ }
++
++#endif
+--
+2.19.1
+
diff --git a/features/rt/locking-spinlock-Split-the-lock-types-header.patch b/features/rt/locking-spinlock-Split-the-lock-types-header.patch
new file mode 100644
index 00000000..6dccd49d
--- /dev/null
+++ b/features/rt/locking-spinlock-Split-the-lock-types-header.patch
@@ -0,0 +1,252 @@
+From e51cfeec84cee412a53528dc1069ace2ab24ae44 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 19:34:01 +0200
+Subject: [PATCH 068/191] locking/spinlock: Split the lock types header
+
+Split raw_spinlock into its own file and the remaining spinlock_t into
+its own non-RT header. The non-RT header will be replaced later by sleeping
+spinlocks.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rwlock_types.h | 4 ++
+ include/linux/spinlock_types.h | 87 +----------------------------
+ include/linux/spinlock_types_nort.h | 39 +++++++++++++
+ include/linux/spinlock_types_raw.h | 65 +++++++++++++++++++++
+ 4 files changed, 110 insertions(+), 85 deletions(-)
+ create mode 100644 include/linux/spinlock_types_nort.h
+ create mode 100644 include/linux/spinlock_types_raw.h
+
+diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
+index 3bd03e18061c..0ad226b5d8fd 100644
+--- a/include/linux/rwlock_types.h
++++ b/include/linux/rwlock_types.h
+@@ -1,6 +1,10 @@
+ #ifndef __LINUX_RWLOCK_TYPES_H
+ #define __LINUX_RWLOCK_TYPES_H
+
++#if !defined(__LINUX_SPINLOCK_TYPES_H)
++# error "Do not include directly, include spinlock_types.h"
++#endif
++
+ /*
+ * include/linux/rwlock_types.h - generic rwlock type definitions
+ * and initializers
+diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
+index b981caafe8bf..5c8664d57fb8 100644
+--- a/include/linux/spinlock_types.h
++++ b/include/linux/spinlock_types.h
+@@ -9,92 +9,9 @@
+ * Released under the General Public License (GPL).
+ */
+
+-#if defined(CONFIG_SMP)
+-# include <asm/spinlock_types.h>
+-#else
+-# include <linux/spinlock_types_up.h>
+-#endif
++#include <linux/spinlock_types_raw.h>
+
+-#include <linux/lockdep_types.h>
+-
+-typedef struct raw_spinlock {
+- arch_spinlock_t raw_lock;
+-#ifdef CONFIG_DEBUG_SPINLOCK
+- unsigned int magic, owner_cpu;
+- void *owner;
+-#endif
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+- struct lockdep_map dep_map;
+-#endif
+-} raw_spinlock_t;
+-
+-#define SPINLOCK_MAGIC 0xdead4ead
+-
+-#define SPINLOCK_OWNER_INIT ((void *)-1L)
+-
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define RAW_SPIN_DEP_MAP_INIT(lockname) \
+- .dep_map = { \
+- .name = #lockname, \
+- .wait_type_inner = LD_WAIT_SPIN, \
+- }
+-# define SPIN_DEP_MAP_INIT(lockname) \
+- .dep_map = { \
+- .name = #lockname, \
+- .wait_type_inner = LD_WAIT_CONFIG, \
+- }
+-#else
+-# define RAW_SPIN_DEP_MAP_INIT(lockname)
+-# define SPIN_DEP_MAP_INIT(lockname)
+-#endif
+-
+-#ifdef CONFIG_DEBUG_SPINLOCK
+-# define SPIN_DEBUG_INIT(lockname) \
+- .magic = SPINLOCK_MAGIC, \
+- .owner_cpu = -1, \
+- .owner = SPINLOCK_OWNER_INIT,
+-#else
+-# define SPIN_DEBUG_INIT(lockname)
+-#endif
+-
+-#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
+- { \
+- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
+- SPIN_DEBUG_INIT(lockname) \
+- RAW_SPIN_DEP_MAP_INIT(lockname) }
+-
+-#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
+- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
+-
+-#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
+-
+-typedef struct spinlock {
+- union {
+- struct raw_spinlock rlock;
+-
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
+- struct {
+- u8 __padding[LOCK_PADSIZE];
+- struct lockdep_map dep_map;
+- };
+-#endif
+- };
+-} spinlock_t;
+-
+-#define ___SPIN_LOCK_INITIALIZER(lockname) \
+- { \
+- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
+- SPIN_DEBUG_INIT(lockname) \
+- SPIN_DEP_MAP_INIT(lockname) }
+-
+-#define __SPIN_LOCK_INITIALIZER(lockname) \
+- { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } }
+-
+-#define __SPIN_LOCK_UNLOCKED(lockname) \
+- (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname)
+-
+-#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
++#include <linux/spinlock_types_nort.h>
+
+ #include <linux/rwlock_types.h>
+
+diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h
+new file mode 100644
+index 000000000000..e4549f0dd197
+--- /dev/null
++++ b/include/linux/spinlock_types_nort.h
+@@ -0,0 +1,39 @@
++#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
++#define __LINUX_SPINLOCK_TYPES_NORT_H
++
++#ifndef __LINUX_SPINLOCK_TYPES_H
++#error "Do not include directly. Include spinlock_types.h instead"
++#endif
++
++/*
++ * The non RT version maps spinlocks to raw_spinlocks
++ */
++typedef struct spinlock {
++ union {
++ struct raw_spinlock rlock;
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
++ struct {
++ u8 __padding[LOCK_PADSIZE];
++ struct lockdep_map dep_map;
++ };
++#endif
++ };
++} spinlock_t;
++
++#define ___SPIN_LOCK_INITIALIZER(lockname) \
++{ \
++ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
++ SPIN_DEBUG_INIT(lockname) \
++ SPIN_DEP_MAP_INIT(lockname) }
++
++#define __SPIN_LOCK_INITIALIZER(lockname) \
++ { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } }
++
++#define __SPIN_LOCK_UNLOCKED(lockname) \
++ (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname)
++
++#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
++
++#endif
+diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h
+new file mode 100644
+index 000000000000..1d4a180e983d
+--- /dev/null
++++ b/include/linux/spinlock_types_raw.h
+@@ -0,0 +1,65 @@
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
++#define __LINUX_SPINLOCK_TYPES_RAW_H
++
++#include <linux/types.h>
++
++#if defined(CONFIG_SMP)
++# include <asm/spinlock_types.h>
++#else
++# include <linux/spinlock_types_up.h>
++#endif
++
++#include <linux/lockdep_types.h>
++
++typedef struct raw_spinlock {
++ arch_spinlock_t raw_lock;
++#ifdef CONFIG_DEBUG_SPINLOCK
++ unsigned int magic, owner_cpu;
++ void *owner;
++#endif
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++} raw_spinlock_t;
++
++#define SPINLOCK_MAGIC 0xdead4ead
++
++#define SPINLOCK_OWNER_INIT ((void *)-1L)
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define RAW_SPIN_DEP_MAP_INIT(lockname) \
++ .dep_map = { \
++ .name = #lockname, \
++ .wait_type_inner = LD_WAIT_SPIN, \
++ }
++# define SPIN_DEP_MAP_INIT(lockname) \
++ .dep_map = { \
++ .name = #lockname, \
++ .wait_type_inner = LD_WAIT_CONFIG, \
++ }
++#else
++# define RAW_SPIN_DEP_MAP_INIT(lockname)
++# define SPIN_DEP_MAP_INIT(lockname)
++#endif
++
++#ifdef CONFIG_DEBUG_SPINLOCK
++# define SPIN_DEBUG_INIT(lockname) \
++ .magic = SPINLOCK_MAGIC, \
++ .owner_cpu = -1, \
++ .owner = SPINLOCK_OWNER_INIT,
++#else
++# define SPIN_DEBUG_INIT(lockname)
++#endif
++
++#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
++{ \
++ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
++ SPIN_DEBUG_INIT(lockname) \
++ RAW_SPIN_DEP_MAP_INIT(lockname) }
++
++#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
++ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
++
++#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
++
++#endif
+--
+2.19.1
+
diff --git a/features/rt/locking-split-out-the-rbtree-definition.patch b/features/rt/locking-split-out-the-rbtree-definition.patch
new file mode 100644
index 00000000..8af42b5d
--- /dev/null
+++ b/features/rt/locking-split-out-the-rbtree-definition.patch
@@ -0,0 +1,119 @@
+From e13218659647185acc44046be640f023fd392f7b Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 14 Aug 2020 17:08:41 +0200
+Subject: [PATCH 071/191] locking: split out the rbtree definition
+
+rtmutex.h needs the definition for rb_root_cached. By including kernel.h
+we will get to spinlock.h which requires rtmutex.h again.
+
+Split out the required struct definition and move it into its own header
+file which can be included by rtmutex.h
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/rbtree.h | 27 +--------------------------
+ include/linux/rbtree_type.h | 31 +++++++++++++++++++++++++++++++
+ include/linux/rtmutex.h | 2 +-
+ 3 files changed, 33 insertions(+), 27 deletions(-)
+ create mode 100644 include/linux/rbtree_type.h
+
+diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
+index d31ecaf4fdd3..e711efc2e009 100644
+--- a/include/linux/rbtree.h
++++ b/include/linux/rbtree.h
+@@ -19,19 +19,9 @@
+
+ #include <linux/kernel.h>
+ #include <linux/stddef.h>
++#include <linux/rbtree_type.h>
+ #include <linux/rcupdate.h>
+
+-struct rb_node {
+- unsigned long __rb_parent_color;
+- struct rb_node *rb_right;
+- struct rb_node *rb_left;
+-} __attribute__((aligned(sizeof(long))));
+- /* The alignment might seem pointless, but allegedly CRIS needs it */
+-
+-struct rb_root {
+- struct rb_node *rb_node;
+-};
+-
+ #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
+
+ #define RB_ROOT (struct rb_root) { NULL, }
+@@ -112,21 +102,6 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
+ typeof(*pos), field); 1; }); \
+ pos = n)
+
+-/*
+- * Leftmost-cached rbtrees.
+- *
+- * We do not cache the rightmost node based on footprint
+- * size vs number of potential users that could benefit
+- * from O(1) rb_last(). Just not worth it, users that want
+- * this feature can always implement the logic explicitly.
+- * Furthermore, users that want to cache both pointers may
+- * find it a bit asymmetric, but that's ok.
+- */
+-struct rb_root_cached {
+- struct rb_root rb_root;
+- struct rb_node *rb_leftmost;
+-};
+-
+ #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
+
+ /* Same as rb_first(), but O(1) */
+diff --git a/include/linux/rbtree_type.h b/include/linux/rbtree_type.h
+new file mode 100644
+index 000000000000..77a89dd2c7c6
+--- /dev/null
++++ b/include/linux/rbtree_type.h
+@@ -0,0 +1,31 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++#ifndef _LINUX_RBTREE_TYPE_H
++#define _LINUX_RBTREE_TYPE_H
++
++struct rb_node {
++ unsigned long __rb_parent_color;
++ struct rb_node *rb_right;
++ struct rb_node *rb_left;
++} __attribute__((aligned(sizeof(long))));
++/* The alignment might seem pointless, but allegedly CRIS needs it */
++
++struct rb_root {
++ struct rb_node *rb_node;
++};
++
++/*
++ * Leftmost-cached rbtrees.
++ *
++ * We do not cache the rightmost node based on footprint
++ * size vs number of potential users that could benefit
++ * from O(1) rb_last(). Just not worth it, users that want
++ * this feature can always implement the logic explicitly.
++ * Furthermore, users that want to cache both pointers may
++ * find it a bit asymmetric, but that's ok.
++ */
++struct rb_root_cached {
++ struct rb_root rb_root;
++ struct rb_node *rb_leftmost;
++};
++
++#endif
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index add1dab27df5..b828b938c876 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -14,7 +14,7 @@
+ #define __LINUX_RT_MUTEX_H
+
+ #include <linux/linkage.h>
+-#include <linux/rbtree.h>
++#include <linux/rbtree_type.h>
+ #include <linux/spinlock_types_raw.h>
+
+ extern int max_lock_depth; /* for sysctl */
+--
+2.19.1
+
diff --git a/features/rt/md-raid5-Make-raid5_percpu-handling-RT-aware.patch b/features/rt/md-raid5-Make-raid5_percpu-handling-RT-aware.patch
new file mode 100644
index 00000000..43380924
--- /dev/null
+++ b/features/rt/md-raid5-Make-raid5_percpu-handling-RT-aware.patch
@@ -0,0 +1,68 @@
+From 28893cffb6465718d2196b201ec4dc9e41aca776 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 6 Apr 2010 16:51:31 +0200
+Subject: [PATCH 135/191] md: raid5: Make raid5_percpu handling RT aware
+
+__raid_run_ops() disables preemption with get_cpu() around the access
+to the raid5_percpu variables. That causes scheduling while atomic
+spews on RT.
+
+Serialize the access to the percpu data with a lock and keep the code
+preemptible.
+
+Reported-by: Udo van den Heuvel <udovdh@xs4all.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Udo van den Heuvel <udovdh@xs4all.nl>
+---
+ drivers/md/raid5.c | 7 +++++--
+ drivers/md/raid5.h | 1 +
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
+index 5d57a5bd171f..86d6a676a509 100644
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -2216,8 +2216,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+ struct raid5_percpu *percpu;
+ unsigned long cpu;
+
+- cpu = get_cpu();
++ cpu = get_cpu_light();
+ percpu = per_cpu_ptr(conf->percpu, cpu);
++ spin_lock(&percpu->lock);
+ if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
+ ops_run_biofill(sh);
+ overlap_clear++;
+@@ -2276,7 +2277,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+ if (test_and_clear_bit(R5_Overlap, &dev->flags))
+ wake_up(&sh->raid_conf->wait_for_overlap);
+ }
+- put_cpu();
++ spin_unlock(&percpu->lock);
++ put_cpu_light();
+ }
+
+ static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
+@@ -7079,6 +7081,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
+ __func__, cpu);
+ return -ENOMEM;
+ }
++ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
+ return 0;
+ }
+
+diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
+index 5c05acf20e1f..665fe138ab4f 100644
+--- a/drivers/md/raid5.h
++++ b/drivers/md/raid5.h
+@@ -635,6 +635,7 @@ struct r5conf {
+ int recovery_disabled;
+ /* per cpu variables */
+ struct raid5_percpu {
++ spinlock_t lock; /* Protection for -RT */
+ struct page *spare_page; /* Used when checking P/Q in raid6 */
+ void *scribble; /* space for constructing buffer
+ * lists and performing address
+--
+2.19.1
+
diff --git a/features/rt/mm-Allow-only-SLUB-on-RT.patch b/features/rt/mm-Allow-only-SLUB-on-RT.patch
new file mode 100644
index 00000000..54012244
--- /dev/null
+++ b/features/rt/mm-Allow-only-SLUB-on-RT.patch
@@ -0,0 +1,46 @@
+From 3049abb997a5dbe4e0c89c1fd51cb372c85ce15a Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:44:03 -0500
+Subject: [PATCH 094/191] mm: Allow only SLUB on RT
+
+Memory allocation disables interrupts as part of the allocation and freeing
+process. For -RT it is important that this section remain short and don't
+depend on the size of the request or an internal state of the memory allocator.
+At the beginning the SLAB memory allocator was adopted for RT's needs and it
+required substantial changes. Later, with the addition of the SLUB memory
+allocator we adopted this one as well and the changes were smaller. More
+important, due to the design of the SLUB allocator it performs better and its
+worst case latency was smaller. In the end only SLUB remained supported.
+
+Disable SLAB and SLOB on -RT. Only SLUB is adopted to -RT needs.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ init/Kconfig | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/init/Kconfig b/init/Kconfig
+index 5f5c776ef192..d51c16a3f355 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1886,6 +1886,7 @@ choice
+
+ config SLAB
+ bool "SLAB"
++ depends on !PREEMPT_RT
+ select HAVE_HARDENED_USERCOPY_ALLOCATOR
+ help
+ The regular slab allocator that is established and known to work
+@@ -1906,6 +1907,7 @@ config SLUB
+ config SLOB
+ depends on EXPERT
+ bool "SLOB (Simple Allocator)"
++ depends on !PREEMPT_RT
+ help
+ SLOB replaces the stock allocator with a drastically simpler
+ allocator. SLOB is generally more space efficient but
+--
+2.19.1
+
diff --git a/features/rt/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch b/features/rt/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch
new file mode 100644
index 00000000..077738b2
--- /dev/null
+++ b/features/rt/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch
@@ -0,0 +1,43 @@
+From 96e36ffab463f8f5823778279568a9cdd349c6c3 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 28 Oct 2020 18:15:32 +0100
+Subject: [PATCH 086/191] mm/memcontrol: Disable preemption in
+ __mod_memcg_lruvec_state()
+
+The callers expect disabled preemption/interrupts while invoking
+__mod_memcg_lruvec_state(). This works mainline because a lock of
+somekind is acquired.
+
+Use preempt_disable_rt() where per-CPU variables are accessed and a
+stable pointer is expected. This is also done in __mod_zone_page_state()
+for the same reason.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/memcontrol.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index e064ac0d850a..24091f7a64e2 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -809,6 +809,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ memcg = pn->memcg;
+
++ preempt_disable_rt();
+ /* Update memcg */
+ __mod_memcg_state(memcg, idx, val);
+
+@@ -828,6 +829,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ x = 0;
+ }
+ __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
++ preempt_enable_rt();
+ }
+
+ /**
+--
+2.19.1
+
diff --git a/features/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch b/features/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch
new file mode 100644
index 00000000..bc68ff78
--- /dev/null
+++ b/features/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch
@@ -0,0 +1,73 @@
+From 0951cba216f7340f7bdb138df663828817b3261a Mon Sep 17 00:00:00 2001
+From: Yang Shi <yang.shi@windriver.com>
+Date: Wed, 30 Oct 2013 11:48:33 -0700
+Subject: [PATCH 114/191] mm/memcontrol: Don't call schedule_work_on in
+ preemption disabled context
+
+The following trace is triggered when running ltp oom test cases:
+
+BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
+in_atomic(): 1, irqs_disabled(): 0, pid: 17188, name: oom03
+Preemption disabled at:[<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0
+
+CPU: 2 PID: 17188 Comm: oom03 Not tainted 3.10.10-rt3 #2
+Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 08/18/2010
+ffff88007684d730 ffff880070df9b58 ffffffff8169918d ffff880070df9b70
+ffffffff8106db31 ffff88007688b4a0 ffff880070df9b88 ffffffff8169d9c0
+ffff88007688b4a0 ffff880070df9bc8 ffffffff81059da1 0000000170df9bb0
+Call Trace:
+[<ffffffff8169918d>] dump_stack+0x19/0x1b
+[<ffffffff8106db31>] __might_sleep+0xf1/0x170
+[<ffffffff8169d9c0>] rt_spin_lock+0x20/0x50
+[<ffffffff81059da1>] queue_work_on+0x61/0x100
+[<ffffffff8112b361>] drain_all_stock+0xe1/0x1c0
+[<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0
+[<ffffffff8112beda>] __mem_cgroup_try_charge+0x41a/0xc40
+[<ffffffff810f1c91>] ? release_pages+0x1b1/0x1f0
+[<ffffffff8106f200>] ? sched_exec+0x40/0xb0
+[<ffffffff8112cc87>] mem_cgroup_charge_common+0x37/0x70
+[<ffffffff8112e2c6>] mem_cgroup_newpage_charge+0x26/0x30
+[<ffffffff8110af68>] handle_pte_fault+0x618/0x840
+[<ffffffff8103ecf6>] ? unpin_current_cpu+0x16/0x70
+[<ffffffff81070f94>] ? migrate_enable+0xd4/0x200
+[<ffffffff8110cde5>] handle_mm_fault+0x145/0x1e0
+[<ffffffff810301e1>] __do_page_fault+0x1a1/0x4c0
+[<ffffffff8169c9eb>] ? preempt_schedule_irq+0x4b/0x70
+[<ffffffff8169e3b7>] ? retint_kernel+0x37/0x40
+[<ffffffff8103053e>] do_page_fault+0xe/0x10
+[<ffffffff8169e4c2>] page_fault+0x22/0x30
+
+So, to prevent schedule_work_on from being called in preempt disabled context,
+replace the pair of get/put_cpu() to get/put_cpu_light().
+
+Signed-off-by: Yang Shi <yang.shi@windriver.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/memcontrol.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 74a65d87866c..0d741fcf9f53 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2357,7 +2357,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+ * as well as workers from this path always operate on the local
+ * per-cpu data. CPU up doesn't touch memcg_stock at all.
+ */
+- curcpu = get_cpu();
++ curcpu = get_cpu_light();
+ for_each_online_cpu(cpu) {
+ struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
+ struct mem_cgroup *memcg;
+@@ -2380,7 +2380,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+ schedule_work_on(cpu, &stock->work);
+ }
+ }
+- put_cpu();
++ put_cpu_light();
+ mutex_unlock(&percpu_charge_mutex);
+ }
+
+--
+2.19.1
+
diff --git a/features/rt/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch b/features/rt/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch
new file mode 100644
index 00000000..d85633ab
--- /dev/null
+++ b/features/rt/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch
@@ -0,0 +1,143 @@
+From a597d1d521af35edd6fbb86d1934a0f92bc9d54e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 18 Aug 2020 10:30:00 +0200
+Subject: [PATCH 113/191] mm: memcontrol: Provide a local_lock for per-CPU
+ memcg_stock
+
+The interrupts are disabled to ensure CPU-local access to the per-CPU
+variable `memcg_stock'.
+As the code inside the interrupt disabled section acquires regular
+spinlocks, which are converted to 'sleeping' spinlocks on a PREEMPT_RT
+kernel, this conflicts with the RT semantics.
+
+Convert it to a local_lock which allows RT kernels to substitute them with
+a real per CPU lock. On non RT kernels this maps to local_irq_save() as
+before, but provides also lockdep coverage of the critical region.
+No functional change.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/memcontrol.c | 31 ++++++++++++++++++-------------
+ 1 file changed, 18 insertions(+), 13 deletions(-)
+
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 24091f7a64e2..74a65d87866c 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2210,6 +2210,7 @@ void unlock_page_memcg(struct page *page)
+ EXPORT_SYMBOL(unlock_page_memcg);
+
+ struct memcg_stock_pcp {
++ local_lock_t lock;
+ struct mem_cgroup *cached; /* this never be root cgroup */
+ unsigned int nr_pages;
+
+@@ -2261,7 +2262,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ if (nr_pages > MEMCG_CHARGE_BATCH)
+ return ret;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&memcg_stock.lock, flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
+ if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
+@@ -2269,7 +2270,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ ret = true;
+ }
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&memcg_stock.lock, flags);
+
+ return ret;
+ }
+@@ -2304,14 +2305,14 @@ static void drain_local_stock(struct work_struct *dummy)
+ * The only protection from memory hotplug vs. drain_stock races is
+ * that we always operate on local CPU stock here with IRQ disabled
+ */
+- local_irq_save(flags);
++ local_lock_irqsave(&memcg_stock.lock, flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
+ drain_obj_stock(stock);
+ drain_stock(stock);
+ clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&memcg_stock.lock, flags);
+ }
+
+ /*
+@@ -2323,7 +2324,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&memcg_stock.lock, flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
+ if (stock->cached != memcg) { /* reset if necessary */
+@@ -2336,7 +2337,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ if (stock->nr_pages > MEMCG_CHARGE_BATCH)
+ drain_stock(stock);
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&memcg_stock.lock, flags);
+ }
+
+ /*
+@@ -3158,7 +3159,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+ unsigned long flags;
+ bool ret = false;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&memcg_stock.lock, flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
+ if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
+@@ -3166,7 +3167,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+ ret = true;
+ }
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&memcg_stock.lock, flags);
+
+ return ret;
+ }
+@@ -3225,7 +3226,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&memcg_stock.lock, flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
+ if (stock->cached_objcg != objcg) { /* reset if necessary */
+@@ -3239,7 +3240,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+ if (stock->nr_bytes > PAGE_SIZE)
+ drain_obj_stock(stock);
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&memcg_stock.lock, flags);
+ }
+
+ int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
+@@ -7065,9 +7066,13 @@ static int __init mem_cgroup_init(void)
+ cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
+ memcg_hotplug_cpu_dead);
+
+- for_each_possible_cpu(cpu)
+- INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
+- drain_local_stock);
++ for_each_possible_cpu(cpu) {
++ struct memcg_stock_pcp *stock;
++
++ stock = per_cpu_ptr(&memcg_stock, cpu);
++ INIT_WORK(&stock->work, drain_local_stock);
++ local_lock_init(&stock->lock);
++ }
+
+ for_each_node(node) {
+ struct mem_cgroup_tree_per_node *rtpn;
+--
+2.19.1
+
diff --git a/features/rt/mm-memcontrol-Replace-local_irq_disable-with-local-l.patch b/features/rt/mm-memcontrol-Replace-local_irq_disable-with-local-l.patch
new file mode 100644
index 00000000..0fd436ea
--- /dev/null
+++ b/features/rt/mm-memcontrol-Replace-local_irq_disable-with-local-l.patch
@@ -0,0 +1,122 @@
+From d4e7b31f217ee00026ee090133e88a9bcc33826b Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 28 Jan 2015 17:14:16 +0100
+Subject: [PATCH 115/191] mm/memcontrol: Replace local_irq_disable with local
+ locks
+
+There are a few local_irq_disable() which then take sleeping locks. This
+patch converts them local locks.
+
+[bigeasy: Move unlock after memcg_check_events() in mem_cgroup_swapout(),
+ pointed out by Matt Fleming <matt@codeblueprint.co.uk>]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/memcontrol.c | 29 +++++++++++++++++++++--------
+ 1 file changed, 21 insertions(+), 8 deletions(-)
+
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 0d741fcf9f53..ae52cebfecfc 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -66,6 +66,7 @@
+ #include <net/sock.h>
+ #include <net/ip.h>
+ #include "slab.h"
++#include <linux/local_lock.h>
+
+ #include <linux/uaccess.h>
+
+@@ -96,6 +97,13 @@ bool cgroup_memory_noswap __read_mostly;
+ static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
+ #endif
+
++struct event_lock {
++ local_lock_t l;
++};
++static DEFINE_PER_CPU(struct event_lock, event_lock) = {
++ .l = INIT_LOCAL_LOCK(l),
++};
++
+ /* Whether legacy memory+swap accounting is active */
+ static bool do_memsw_account(void)
+ {
+@@ -5677,12 +5685,12 @@ static int mem_cgroup_move_account(struct page *page,
+
+ ret = 0;
+
+- local_irq_disable();
++ local_lock_irq(&event_lock.l);
+ mem_cgroup_charge_statistics(to, page, nr_pages);
+ memcg_check_events(to, page);
+ mem_cgroup_charge_statistics(from, page, -nr_pages);
+ memcg_check_events(from, page);
+- local_irq_enable();
++ local_unlock_irq(&event_lock.l);
+ out_unlock:
+ unlock_page(page);
+ out:
+@@ -6739,10 +6747,10 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
+ css_get(&memcg->css);
+ commit_charge(page, memcg);
+
+- local_irq_disable();
++ local_lock_irq(&event_lock.l);
+ mem_cgroup_charge_statistics(memcg, page, nr_pages);
+ memcg_check_events(memcg, page);
+- local_irq_enable();
++ local_unlock_irq(&event_lock.l);
+
+ /*
+ * Cgroup1's unified memory+swap counter has been charged with the
+@@ -6798,11 +6806,11 @@ static void uncharge_batch(const struct uncharge_gather *ug)
+ memcg_oom_recover(ug->memcg);
+ }
+
+- local_irq_save(flags);
++ local_lock_irqsave(&event_lock.l, flags);
+ __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
+ __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_pages);
+ memcg_check_events(ug->memcg, ug->dummy_page);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&event_lock.l, flags);
+
+ /* drop reference from uncharge_page */
+ css_put(&ug->memcg->css);
+@@ -6935,10 +6943,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
+ css_get(&memcg->css);
+ commit_charge(newpage, memcg);
+
+- local_irq_save(flags);
++ local_lock_irqsave(&event_lock.l, flags);
+ mem_cgroup_charge_statistics(memcg, newpage, nr_pages);
+ memcg_check_events(memcg, newpage);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&event_lock.l, flags);
+ }
+
+ DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
+@@ -7121,6 +7129,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ struct mem_cgroup *memcg, *swap_memcg;
+ unsigned int nr_entries;
+ unsigned short oldid;
++ unsigned long flags;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+@@ -7169,9 +7178,13 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ * important here to have the interrupts disabled because it is the
+ * only synchronisation we have for updating the per-CPU variables.
+ */
++ local_lock_irqsave(&event_lock.l, flags);
++#ifndef CONFIG_PREEMPT_RT
+ VM_BUG_ON(!irqs_disabled());
++#endif
+ mem_cgroup_charge_statistics(memcg, page, -nr_entries);
+ memcg_check_events(memcg, page);
++ local_unlock_irqrestore(&event_lock.l, flags);
+
+ css_put(&memcg->css);
+ }
+--
+2.19.1
+
diff --git a/features/rt/mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch b/features/rt/mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch
new file mode 100644
index 00000000..619ba657
--- /dev/null
+++ b/features/rt/mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch
@@ -0,0 +1,210 @@
+From 1398d3c431f6fd53914f1b71dd30732092e6baa7 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:37 -0500
+Subject: [PATCH 111/191] mm: page_alloc: Use a local_lock instead of explicit
+ local_irq_save().
+
+The page-allocator disables interrupts for a few reasons:
+- Decouple interrupt the irqsave operation from spin_lock() so it can be
+ extended over the actual lock region and cover other areas. Areas like
+ counters increments where the preemptible version can be avoided.
+
+- Access to the per-CPU pcp from struct zone.
+
+Replace the irqsave with a local-lock. The counters are expected to be
+always modified with disabled preemption and no access from interrupt
+context.
+
+Contains fixes from:
+ Peter Zijlstra <a.p.zijlstra@chello.nl>
+ Thomas Gleixner <tglx@linutronix.de>
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/page_alloc.c | 49 ++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 30 insertions(+), 19 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index db713dd3e08e..72993fb19c99 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -62,6 +62,7 @@
+ #include <linux/hugetlb.h>
+ #include <linux/sched/rt.h>
+ #include <linux/sched/mm.h>
++#include <linux/local_lock.h>
+ #include <linux/page_owner.h>
+ #include <linux/kthread.h>
+ #include <linux/memcontrol.h>
+@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes);
+
+ int page_group_by_mobility_disabled __read_mostly;
+
++struct pa_lock {
++ local_lock_t l;
++};
++static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
++ .l = INIT_LOCAL_LOCK(l),
++};
++
+ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+ /*
+ * During boot we initialize deferred pages on-demand, as needed, but once
+@@ -1541,11 +1549,11 @@ static void __free_pages_ok(struct page *page, unsigned int order,
+ return;
+
+ migratetype = get_pfnblock_migratetype(page, pfn);
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ __count_vm_events(PGFREE, 1 << order);
+ free_one_page(page_zone(page), page, pfn, order, migratetype,
+ fpi_flags);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
+
+ void __free_pages_core(struct page *page, unsigned int order)
+@@ -2962,12 +2970,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
+ unsigned long flags;
+ int to_drain, batch;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ batch = READ_ONCE(pcp->batch);
+ to_drain = min(pcp->count, batch);
+ if (to_drain > 0)
+ free_pcppages_bulk(zone, to_drain, pcp);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
+ #endif
+
+@@ -2984,13 +2992,13 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
+ struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ pset = per_cpu_ptr(zone->pageset, cpu);
+
+ pcp = &pset->pcp;
+ if (pcp->count)
+ free_pcppages_bulk(zone, pcp->count, pcp);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
+
+ /*
+@@ -3253,9 +3261,9 @@ void free_unref_page(struct page *page)
+ if (!free_unref_page_prepare(page, pfn))
+ return;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ free_unref_page_commit(page, pfn);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
+
+ /*
+@@ -3275,7 +3283,7 @@ void free_unref_page_list(struct list_head *list)
+ set_page_private(page, pfn);
+ }
+
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ list_for_each_entry_safe(page, next, list, lru) {
+ unsigned long pfn = page_private(page);
+
+@@ -3288,12 +3296,12 @@ void free_unref_page_list(struct list_head *list)
+ * a large list of pages to free.
+ */
+ if (++batch_count == SWAP_CLUSTER_MAX) {
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ batch_count = 0;
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ }
+ }
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
+
+ /*
+@@ -3449,7 +3457,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+ struct page *page;
+ unsigned long flags;
+
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ list = &pcp->lists[migratetype];
+ page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
+@@ -3457,7 +3465,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
+ zone_statistics(preferred_zone, zone);
+ }
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ return page;
+ }
+
+@@ -3491,7 +3499,9 @@ struct page *rmqueue(struct zone *preferred_zone,
+ * allocate greater than order-1 page units with __GFP_NOFAIL.
+ */
+ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+- spin_lock_irqsave(&zone->lock, flags);
++
++ local_lock_irqsave(&pa_lock.l, flags);
++ spin_lock(&zone->lock);
+
+ do {
+ page = NULL;
+@@ -3512,12 +3522,13 @@ struct page *rmqueue(struct zone *preferred_zone,
+ spin_unlock(&zone->lock);
+ if (!page)
+ goto failed;
++
+ __mod_zone_freepage_state(zone, -(1 << order),
+ get_pcppage_migratetype(page));
+
+ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+ zone_statistics(preferred_zone, zone);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+
+ out:
+ /* Separate test+clear to avoid unnecessary atomics */
+@@ -3530,7 +3541,7 @@ struct page *rmqueue(struct zone *preferred_zone,
+ return page;
+
+ failed:
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ return NULL;
+ }
+
+@@ -8810,7 +8821,7 @@ void zone_pcp_reset(struct zone *zone)
+ struct per_cpu_pageset *pset;
+
+ /* avoid races with drain_pages() */
+- local_irq_save(flags);
++ local_lock_irqsave(&pa_lock.l, flags);
+ if (zone->pageset != &boot_pageset) {
+ for_each_online_cpu(cpu) {
+ pset = per_cpu_ptr(zone->pageset, cpu);
+@@ -8819,7 +8830,7 @@ void zone_pcp_reset(struct zone *zone)
+ free_percpu(zone->pageset);
+ zone->pageset = &boot_pageset;
+ }
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
+
+ #ifdef CONFIG_MEMORY_HOTREMOVE
+--
+2.19.1
+
diff --git a/features/rt/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch b/features/rt/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch
new file mode 100644
index 00000000..3d068182
--- /dev/null
+++ b/features/rt/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch
@@ -0,0 +1,38 @@
+From 6e618c2aeb0d445978c2cbdec84fc15eea6edb82 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 2 Jul 2020 14:27:23 +0200
+Subject: [PATCH 110/191] mm: page_alloc: Use migrate_disable() in
+ drain_local_pages_wq()
+
+drain_local_pages_wq() disables preemption to avoid CPU migration during
+CPU hotplug and can't use cpus_read_lock().
+
+Using migrate_disable() works here, too. The scheduler won't take the
+CPU offline until the task left the migrate-disable section.
+
+Use migrate_disable() in drain_local_pages_wq().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/page_alloc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index cfc72873961d..db713dd3e08e 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3038,9 +3038,9 @@ static void drain_local_pages_wq(struct work_struct *work)
+ * cpu which is allright but we also have to make sure to not move to
+ * a different one.
+ */
+- preempt_disable();
++ migrate_disable();
+ drain_local_pages(drain->zone);
+- preempt_enable();
++ migrate_enable();
+ }
+
+ /*
+--
+2.19.1
+
diff --git a/features/rt/mm-scatterlist-Do-not-disable-irqs-on-RT.patch b/features/rt/mm-scatterlist-Do-not-disable-irqs-on-RT.patch
new file mode 100644
index 00000000..de9d9047
--- /dev/null
+++ b/features/rt/mm-scatterlist-Do-not-disable-irqs-on-RT.patch
@@ -0,0 +1,29 @@
+From e548a9846277b0cf05cd81fc6441d578728d20f3 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:34 -0500
+Subject: [PATCH 162/191] mm/scatterlist: Do not disable irqs on RT
+
+For -RT it is enough to keep pagefault disabled (which is currently handled by
+kmap_atomic()).
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/scatterlist.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/scatterlist.c b/lib/scatterlist.c
+index a59778946404..907f59045998 100644
+--- a/lib/scatterlist.c
++++ b/lib/scatterlist.c
+@@ -892,7 +892,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
+ flush_kernel_dcache_page(miter->page);
+
+ if (miter->__flags & SG_MITER_ATOMIC) {
+- WARN_ON_ONCE(preemptible());
++ WARN_ON_ONCE(!pagefault_disabled());
+ kunmap_atomic(miter->addr);
+ } else
+ kunmap(miter->page);
+--
+2.19.1
+
diff --git a/features/rt/mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch b/features/rt/mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch
new file mode 100644
index 00000000..c6c7ab42
--- /dev/null
+++ b/features/rt/mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch
@@ -0,0 +1,602 @@
+From 8583607fe6b30d9952332612f6093f40282d0906 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 28 May 2018 15:24:22 +0200
+Subject: [PATCH 104/191] mm: sl[au]b: Change list_lock to raw_spinlock_t
+
+The list_lock is used with used with IRQs off on PREEMPT_RT. Make it a
+raw_spinlock_t otherwise the interrupts won't be disabled on PREEMPT_RT.
+The locking rules remain unchanged.
+The lock is updated for SLAB and SLUB since both share the same header
+file for struct kmem_cache_node defintion.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slab.c | 90 +++++++++++++++++++++++++++----------------------------
+ mm/slab.h | 2 +-
+ mm/slub.c | 50 +++++++++++++++----------------
+ 3 files changed, 71 insertions(+), 71 deletions(-)
+
+diff --git a/mm/slab.c b/mm/slab.c
+index ae651bf540b7..afe196fcf579 100644
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -234,7 +234,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
+ parent->shared = NULL;
+ parent->alien = NULL;
+ parent->colour_next = 0;
+- spin_lock_init(&parent->list_lock);
++ raw_spin_lock_init(&parent->list_lock);
+ parent->free_objects = 0;
+ parent->free_touched = 0;
+ }
+@@ -559,9 +559,9 @@ static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
+ page_node = page_to_nid(page);
+ n = get_node(cachep, page_node);
+
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ free_block(cachep, &objp, 1, page_node, &list);
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ slabs_destroy(cachep, &list);
+ }
+@@ -699,7 +699,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
+ struct kmem_cache_node *n = get_node(cachep, node);
+
+ if (ac->avail) {
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ /*
+ * Stuff objects into the remote nodes shared array first.
+ * That way we could avoid the overhead of putting the objects
+@@ -710,7 +710,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
+
+ free_block(cachep, ac->entry, ac->avail, node, list);
+ ac->avail = 0;
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ }
+ }
+
+@@ -783,9 +783,9 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
+ slabs_destroy(cachep, &list);
+ } else {
+ n = get_node(cachep, page_node);
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ free_block(cachep, &objp, 1, page_node, &list);
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
+ }
+ return 1;
+@@ -826,10 +826,10 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
+ */
+ n = get_node(cachep, node);
+ if (n) {
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+ n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
+ cachep->num;
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+
+ return 0;
+ }
+@@ -908,7 +908,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
+ goto fail;
+
+ n = get_node(cachep, node);
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+ if (n->shared && force_change) {
+ free_block(cachep, n->shared->entry,
+ n->shared->avail, node, &list);
+@@ -926,7 +926,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
+ new_alien = NULL;
+ }
+
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
+
+ /*
+@@ -965,7 +965,7 @@ static void cpuup_canceled(long cpu)
+ if (!n)
+ continue;
+
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+
+ /* Free limit for this kmem_cache_node */
+ n->free_limit -= cachep->batchcount;
+@@ -976,7 +976,7 @@ static void cpuup_canceled(long cpu)
+ nc->avail = 0;
+
+ if (!cpumask_empty(mask)) {
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+ goto free_slab;
+ }
+
+@@ -990,7 +990,7 @@ static void cpuup_canceled(long cpu)
+ alien = n->alien;
+ n->alien = NULL;
+
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+
+ kfree(shared);
+ if (alien) {
+@@ -1174,7 +1174,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *
+ /*
+ * Do not assume that spinlocks can be initialized via memcpy:
+ */
+- spin_lock_init(&ptr->list_lock);
++ raw_spin_lock_init(&ptr->list_lock);
+
+ MAKE_ALL_LISTS(cachep, ptr, nodeid);
+ cachep->node[nodeid] = ptr;
+@@ -1345,11 +1345,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
+ for_each_kmem_cache_node(cachep, node, n) {
+ unsigned long total_slabs, free_slabs, free_objs;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+ total_slabs = n->total_slabs;
+ free_slabs = n->free_slabs;
+ free_objs = n->free_objects;
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+
+ pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
+ node, total_slabs - free_slabs, total_slabs,
+@@ -2107,7 +2107,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
+ {
+ #ifdef CONFIG_SMP
+ check_irq_off();
+- assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
++ assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
+ #endif
+ }
+
+@@ -2115,7 +2115,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
+ {
+ #ifdef CONFIG_SMP
+ check_irq_off();
+- assert_spin_locked(&get_node(cachep, node)->list_lock);
++ assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
+ #endif
+ }
+
+@@ -2155,9 +2155,9 @@ static void do_drain(void *arg)
+ check_irq_off();
+ ac = cpu_cache_get(cachep);
+ n = get_node(cachep, node);
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node, &list);
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ ac->avail = 0;
+ slabs_destroy(cachep, &list);
+ }
+@@ -2175,9 +2175,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
+ drain_alien_cache(cachep, n->alien);
+
+ for_each_kmem_cache_node(cachep, node, n) {
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+ drain_array_locked(cachep, n->shared, node, true, &list);
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+
+ slabs_destroy(cachep, &list);
+ }
+@@ -2199,10 +2199,10 @@ static int drain_freelist(struct kmem_cache *cache,
+ nr_freed = 0;
+ while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
+
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+ p = n->slabs_free.prev;
+ if (p == &n->slabs_free) {
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+ goto out;
+ }
+
+@@ -2215,7 +2215,7 @@ static int drain_freelist(struct kmem_cache *cache,
+ * to the cache.
+ */
+ n->free_objects -= cache->num;
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+ slab_destroy(cache, page);
+ nr_freed++;
+ }
+@@ -2651,7 +2651,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+ INIT_LIST_HEAD(&page->slab_list);
+ n = get_node(cachep, page_to_nid(page));
+
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ n->total_slabs++;
+ if (!page->active) {
+ list_add_tail(&page->slab_list, &n->slabs_free);
+@@ -2661,7 +2661,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+
+ STATS_INC_GROWN(cachep);
+ n->free_objects += cachep->num - page->active;
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ fixup_objfreelist_debug(cachep, &list);
+ }
+@@ -2827,7 +2827,7 @@ static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
+ {
+ struct page *page;
+
+- assert_spin_locked(&n->list_lock);
++ assert_raw_spin_locked(&n->list_lock);
+ page = list_first_entry_or_null(&n->slabs_partial, struct page,
+ slab_list);
+ if (!page) {
+@@ -2854,10 +2854,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
+ if (!gfp_pfmemalloc_allowed(flags))
+ return NULL;
+
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ page = get_first_slab(n, true);
+ if (!page) {
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ return NULL;
+ }
+
+@@ -2866,7 +2866,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
+
+ fixup_slab_list(cachep, n, page, &list);
+
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ fixup_objfreelist_debug(cachep, &list);
+
+ return obj;
+@@ -2925,7 +2925,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+ if (!n->free_objects && (!shared || !shared->avail))
+ goto direct_grow;
+
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ shared = READ_ONCE(n->shared);
+
+ /* See if we can refill from the shared array */
+@@ -2949,7 +2949,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+ must_grow:
+ n->free_objects -= ac->avail;
+ alloc_done:
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ fixup_objfreelist_debug(cachep, &list);
+
+ direct_grow:
+@@ -3174,7 +3174,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
+ BUG_ON(!n);
+
+ check_irq_off();
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ page = get_first_slab(n, false);
+ if (!page)
+ goto must_grow;
+@@ -3192,12 +3192,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
+
+ fixup_slab_list(cachep, n, page, &list);
+
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ fixup_objfreelist_debug(cachep, &list);
+ return obj;
+
+ must_grow:
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
+ if (page) {
+ /* This slab isn't counted yet so don't update free_objects */
+@@ -3385,7 +3385,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
+
+ check_irq_off();
+ n = get_node(cachep, node);
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ if (n->shared) {
+ struct array_cache *shared_array = n->shared;
+ int max = shared_array->limit - shared_array->avail;
+@@ -3414,7 +3414,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
+ STATS_SET_FREEABLE(cachep, i);
+ }
+ #endif
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ ac->avail -= batchcount;
+ memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
+ slabs_destroy(cachep, &list);
+@@ -3870,9 +3870,9 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+
+ node = cpu_to_mem(cpu);
+ n = get_node(cachep, node);
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node, &list);
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
+ }
+ free_percpu(prev);
+@@ -3967,9 +3967,9 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
+ return;
+ }
+
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+ drain_array_locked(cachep, ac, node, false, &list);
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+
+ slabs_destroy(cachep, &list);
+ }
+@@ -4053,7 +4053,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
+
+ for_each_kmem_cache_node(cachep, node, n) {
+ check_irq_on();
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+
+ total_slabs += n->total_slabs;
+ free_slabs += n->free_slabs;
+@@ -4062,7 +4062,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
+ if (n->shared)
+ shared_avail += n->shared->avail;
+
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+ }
+ num_objs = total_slabs * cachep->num;
+ active_slabs = total_slabs - free_slabs;
+diff --git a/mm/slab.h b/mm/slab.h
+index 076582f58f68..21c670da13ac 100644
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -527,7 +527,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
+ * The slab lists for all objects.
+ */
+ struct kmem_cache_node {
+- spinlock_t list_lock;
++ raw_spinlock_t list_lock;
+
+ #ifdef CONFIG_SLAB
+ struct list_head slabs_partial; /* partial list first, better asm code */
+diff --git a/mm/slub.c b/mm/slub.c
+index 3021ce9bf1b3..07e662d2aef0 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1225,7 +1225,7 @@ static noinline int free_debug_processing(
+ unsigned long flags;
+ int ret = 0;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+ slab_lock(page);
+
+ if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+@@ -1260,7 +1260,7 @@ static noinline int free_debug_processing(
+ bulk_cnt, cnt);
+
+ slab_unlock(page);
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ if (!ret)
+ slab_fix(s, "Object at 0x%p not freed", object);
+ return ret;
+@@ -1984,7 +1984,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ if (!n || !n->nr_partial)
+ return NULL;
+
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
+ void *t;
+
+@@ -2009,7 +2009,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ break;
+
+ }
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ return object;
+ }
+
+@@ -2252,7 +2252,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+ * that acquire_slab() will see a slab page that
+ * is frozen
+ */
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ }
+ } else {
+ m = M_FULL;
+@@ -2263,7 +2263,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+ * slabs from diagnostic functions will not see
+ * any frozen slabs.
+ */
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ }
+ }
+
+@@ -2287,7 +2287,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+ goto redo;
+
+ if (lock)
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ if (m == M_PARTIAL)
+ stat(s, tail);
+@@ -2326,10 +2326,10 @@ static void unfreeze_partials(struct kmem_cache *s,
+ n2 = get_node(s, page_to_nid(page));
+ if (n != n2) {
+ if (n)
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ n = n2;
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ }
+
+ do {
+@@ -2358,7 +2358,7 @@ static void unfreeze_partials(struct kmem_cache *s,
+ }
+
+ if (n)
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ while (discard_page) {
+ page = discard_page;
+@@ -2525,10 +2525,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
+ unsigned long x = 0;
+ struct page *page;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, slab_list)
+ x += get_count(page);
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+ }
+ #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
+@@ -2997,7 +2997,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+
+ do {
+ if (unlikely(n)) {
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ n = NULL;
+ }
+ prior = page->freelist;
+@@ -3029,7 +3029,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+ * Otherwise the list_lock will synchronize with
+ * other processors updating the list of slabs.
+ */
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+
+ }
+ }
+@@ -3071,7 +3071,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+ add_partial(n, page, DEACTIVATE_TO_TAIL);
+ stat(s, FREE_ADD_PARTIAL);
+ }
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ return;
+
+ slab_empty:
+@@ -3086,7 +3086,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+ remove_full(s, n, page);
+ }
+
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ stat(s, FREE_SLAB);
+ discard_slab(s, page);
+ }
+@@ -3518,7 +3518,7 @@ static void
+ init_kmem_cache_node(struct kmem_cache_node *n)
+ {
+ n->nr_partial = 0;
+- spin_lock_init(&n->list_lock);
++ raw_spin_lock_init(&n->list_lock);
+ INIT_LIST_HEAD(&n->partial);
+ #ifdef CONFIG_SLUB_DEBUG
+ atomic_long_set(&n->nr_slabs, 0);
+@@ -3918,7 +3918,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
+ struct page *page, *h;
+
+ BUG_ON(irqs_disabled());
+- spin_lock_irq(&n->list_lock);
++ raw_spin_lock_irq(&n->list_lock);
+ list_for_each_entry_safe(page, h, &n->partial, slab_list) {
+ if (!page->inuse) {
+ remove_partial(n, page);
+@@ -3928,7 +3928,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
+ "Objects remaining in %s on __kmem_cache_shutdown()");
+ }
+ }
+- spin_unlock_irq(&n->list_lock);
++ raw_spin_unlock_irq(&n->list_lock);
+
+ list_for_each_entry_safe(page, h, &discard, slab_list)
+ discard_slab(s, page);
+@@ -4243,7 +4243,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
+ for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
+ INIT_LIST_HEAD(promote + i);
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+
+ /*
+ * Build lists of slabs to discard or promote.
+@@ -4274,7 +4274,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
+ for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
+ list_splice(promote + i, &n->partial);
+
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+
+ /* Release empty slabs */
+ list_for_each_entry_safe(page, t, &discard, slab_list)
+@@ -4644,7 +4644,7 @@ static int validate_slab_node(struct kmem_cache *s,
+ struct page *page;
+ unsigned long flags;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+
+ list_for_each_entry(page, &n->partial, slab_list) {
+ validate_slab(s, page);
+@@ -4666,7 +4666,7 @@ static int validate_slab_node(struct kmem_cache *s,
+ s->name, count, atomic_long_read(&n->nr_slabs));
+
+ out:
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ return count;
+ }
+
+@@ -4845,12 +4845,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
+ if (!atomic_long_read(&n->nr_slabs))
+ continue;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, slab_list)
+ process_slab(&t, s, page, alloc);
+ list_for_each_entry(page, &n->full, slab_list)
+ process_slab(&t, s, page, alloc);
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ }
+
+ for (i = 0; i < t.count; i++) {
+--
+2.19.1
+
diff --git a/features/rt/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch b/features/rt/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch
new file mode 100644
index 00000000..d3c75a7e
--- /dev/null
+++ b/features/rt/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch
@@ -0,0 +1,32 @@
+From b49dbaa0391f05adefc6384ea61cb9e60c4a452a Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 2 Mar 2021 18:58:04 +0100
+Subject: [PATCH 112/191] mm: slub: Don't enable partial CPU caches on
+ PREEMPT_RT by default
+
+SLUB's partial CPU caches lead to higher latencies in a hackbench
+benchmark.
+
+Don't enable partial CPU caches by default on PREEMPT_RT.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ init/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/init/Kconfig b/init/Kconfig
+index 37686a22a769..45f2231c7131 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1974,7 +1974,7 @@ config SHUFFLE_PAGE_ALLOCATOR
+ Say Y if unsure.
+
+ config SLUB_CPU_PARTIAL
+- default y
++ default y if !PREEMPT_RT
+ depends on SLUB && SMP
+ bool "SLUB per cpu partial cache"
+ help
+--
+2.19.1
+
diff --git a/features/rt/mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch b/features/rt/mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch
new file mode 100644
index 00000000..601403f6
--- /dev/null
+++ b/features/rt/mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch
@@ -0,0 +1,35 @@
+From cc489539ca4480363e75c1f71bf9e47f835fb2ef Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 26 Feb 2021 17:26:04 +0100
+Subject: [PATCH 109/191] mm: slub: Don't resize the location tracking cache on
+ PREEMPT_RT
+
+The location tracking cache has a size of a page and is resized if its
+current size is too small.
+This allocation happens with disabled interrupts and can't happen on
+PREEMPT_RT.
+Should one page be too small, then we have to allocate more at the
+beginning. The only downside is that less callers will be visible.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index ec608c1d5fdb..26cf2872a7ff 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -4819,6 +4819,9 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
+ struct location *l;
+ int order;
+
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC)
++ return 0;
++
+ order = get_order(sizeof(struct location) * max);
+
+ l = (void *)__get_free_pages(flags, order);
+--
+2.19.1
+
diff --git a/features/rt/mm-slub-Enable-irqs-for-__GFP_WAIT.patch b/features/rt/mm-slub-Enable-irqs-for-__GFP_WAIT.patch
new file mode 100644
index 00000000..82d38ebf
--- /dev/null
+++ b/features/rt/mm-slub-Enable-irqs-for-__GFP_WAIT.patch
@@ -0,0 +1,76 @@
+From 9a1bfa7cf125b54a5ad856c32adf4a21359db777 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 9 Jan 2013 12:08:15 +0100
+Subject: [PATCH 106/191] mm: slub: Enable irqs for __GFP_WAIT
+
+SYSTEM_RUNNING might be too late for enabling interrupts. Allocations
+with GFP_WAIT can happen before that. So use this as an indicator.
+
+[bigeasy: Add warning on RT for allocations in atomic context.
+ Don't enable interrupts on allocations during SYSTEM_SUSPEND. This is done
+ during suspend by ACPI, noticed by Liwei Song <liwei.song@windriver.com>
+]
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index 863b0304b229..1382845c3802 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1759,10 +1759,18 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+ void *start, *p, *next;
+ int idx;
+ bool shuffle;
++ bool enableirqs = false;
+
+ flags &= gfp_allowed_mask;
+
+ if (gfpflags_allow_blocking(flags))
++ enableirqs = true;
++
++#ifdef CONFIG_PREEMPT_RT
++ if (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND)
++ enableirqs = true;
++#endif
++ if (enableirqs)
+ local_irq_enable();
+
+ flags |= s->allocflags;
+@@ -1823,7 +1831,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+ page->frozen = 1;
+
+ out:
+- if (gfpflags_allow_blocking(flags))
++ if (enableirqs)
+ local_irq_disable();
+ if (!page)
+ return NULL;
+@@ -2823,6 +2831,10 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
+ unsigned long tid;
+ struct obj_cgroup *objcg = NULL;
+
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
++ WARN_ON_ONCE(!preemptible() &&
++ (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND));
++
+ s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
+ if (!s)
+ return NULL;
+@@ -3304,6 +3316,10 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ int i;
+ struct obj_cgroup *objcg = NULL;
+
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
++ WARN_ON_ONCE(!preemptible() &&
++ (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND));
++
+ /* memcg and kmem_cache debug support */
+ s = slab_pre_alloc_hook(s, &objcg, size, flags);
+ if (unlikely(!s))
+--
+2.19.1
+
diff --git a/features/rt/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch b/features/rt/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch
new file mode 100644
index 00000000..c2597842
--- /dev/null
+++ b/features/rt/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch
@@ -0,0 +1,49 @@
+From ebdeb55b60cffd57cc349a1a5c657c5905bc01ab Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 16 Jul 2020 18:47:50 +0200
+Subject: [PATCH 105/191] mm: slub: Make object_map_lock a raw_spinlock_t
+
+The variable object_map is protected by object_map_lock. The lock is always
+acquired in debug code and within already atomic context
+
+Make object_map_lock a raw_spinlock_t.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index 07e662d2aef0..863b0304b229 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -445,7 +445,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
+
+ #ifdef CONFIG_SLUB_DEBUG
+ static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
+-static DEFINE_SPINLOCK(object_map_lock);
++static DEFINE_RAW_SPINLOCK(object_map_lock);
+
+ /*
+ * Determine a map of object in use on a page.
+@@ -461,7 +461,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page)
+
+ VM_BUG_ON(!irqs_disabled());
+
+- spin_lock(&object_map_lock);
++ raw_spin_lock(&object_map_lock);
+
+ bitmap_zero(object_map, page->objects);
+
+@@ -474,7 +474,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page)
+ static void put_map(unsigned long *map) __releases(&object_map_lock)
+ {
+ VM_BUG_ON(map != object_map);
+- spin_unlock(&object_map_lock);
++ raw_spin_unlock(&object_map_lock);
+ }
+
+ static inline unsigned int size_from_object(struct kmem_cache *s)
+--
+2.19.1
+
diff --git a/features/rt/mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch b/features/rt/mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch
new file mode 100644
index 00000000..9f53a2dd
--- /dev/null
+++ b/features/rt/mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch
@@ -0,0 +1,416 @@
+From a554a721d714cba4bf3c8eb17e25913fa593a6bf Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 26 Feb 2021 15:14:15 +0100
+Subject: [PATCH 107/191] mm: slub: Move discard_slab() invocations out of
+ IRQ-off sections
+
+discard_slab() gives the memory back to the page-allocator. Some of its
+invocation occur from IRQ-disabled sections which were disabled by SLUB.
+An example is the deactivate_slab() invocation from within
+___slab_alloc() or put_cpu_partial().
+
+Instead of giving the memory back directly, put the pages on a list and
+process it once the caller is out of the known IRQ-off region.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 114 +++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 78 insertions(+), 36 deletions(-)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index 1382845c3802..af9c0fbe2cf5 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1889,12 +1889,29 @@ static void free_slab(struct kmem_cache *s, struct page *page)
+ __free_slab(s, page);
+ }
+
++static void discard_slab_delayed(struct kmem_cache *s, struct page *page,
++ struct list_head *delayed_free)
++{
++ dec_slabs_node(s, page_to_nid(page), page->objects);
++ list_add(&page->lru, delayed_free);
++}
++
+ static void discard_slab(struct kmem_cache *s, struct page *page)
+ {
+ dec_slabs_node(s, page_to_nid(page), page->objects);
+ free_slab(s, page);
+ }
+
++static void discard_delayed(struct list_head *l)
++{
++ while (!list_empty(l)) {
++ struct page *page = list_first_entry(l, struct page, lru);
++
++ list_del(&page->lru);
++ __free_slab(page->slab_cache, page);
++ }
++}
++
+ /*
+ * Management of partially allocated slabs.
+ */
+@@ -1968,15 +1985,16 @@ static inline void *acquire_slab(struct kmem_cache *s,
+ WARN_ON(!freelist);
+ return freelist;
+ }
+-
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++ struct list_head *delayed_free);
+ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
+
+ /*
+ * Try to allocate a partial slab from a specific node.
+ */
+ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+- struct kmem_cache_cpu *c, gfp_t flags)
++ struct kmem_cache_cpu *c, gfp_t flags,
++ struct list_head *delayed_free)
+ {
+ struct page *page, *page2;
+ void *object = NULL;
+@@ -2009,7 +2027,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ stat(s, ALLOC_FROM_PARTIAL);
+ object = t;
+ } else {
+- put_cpu_partial(s, page, 0);
++ put_cpu_partial(s, page, 0, delayed_free);
+ stat(s, CPU_PARTIAL_NODE);
+ }
+ if (!kmem_cache_has_cpu_partial(s)
+@@ -2025,7 +2043,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ * Get a page from somewhere. Search in increasing NUMA distances.
+ */
+ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+- struct kmem_cache_cpu *c)
++ struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_NUMA
+ struct zonelist *zonelist;
+@@ -2067,7 +2086,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+
+ if (n && cpuset_zone_allowed(zone, flags) &&
+ n->nr_partial > s->min_partial) {
+- object = get_partial_node(s, n, c, flags);
++ object = get_partial_node(s, n, c, flags, delayed_free);
+ if (object) {
+ /*
+ * Don't check read_mems_allowed_retry()
+@@ -2089,7 +2108,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+ * Get a partial page, lock it and return it.
+ */
+ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
+- struct kmem_cache_cpu *c)
++ struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ void *object;
+ int searchnode = node;
+@@ -2097,11 +2117,12 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
+ if (node == NUMA_NO_NODE)
+ searchnode = numa_mem_id();
+
+- object = get_partial_node(s, get_node(s, searchnode), c, flags);
++ object = get_partial_node(s, get_node(s, searchnode), c, flags,
++ delayed_free);
+ if (object || node != NUMA_NO_NODE)
+ return object;
+
+- return get_any_partial(s, flags, c);
++ return get_any_partial(s, flags, c, delayed_free);
+ }
+
+ #ifdef CONFIG_PREEMPTION
+@@ -2177,7 +2198,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
+ * Remove the cpu slab
+ */
+ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+- void *freelist, struct kmem_cache_cpu *c)
++ void *freelist, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+@@ -2303,7 +2325,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+ stat(s, DEACTIVATE_FULL);
+ else if (m == M_FREE) {
+ stat(s, DEACTIVATE_EMPTY);
+- discard_slab(s, page);
++ discard_slab_delayed(s, page, delayed_free);
+ stat(s, FREE_SLAB);
+ }
+
+@@ -2318,8 +2340,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+ * for the cpu using c (or some other guarantee must be there
+ * to guarantee no concurrent accesses).
+ */
+-static void unfreeze_partials(struct kmem_cache *s,
+- struct kmem_cache_cpu *c)
++static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ struct kmem_cache_node *n = NULL, *n2 = NULL;
+@@ -2373,7 +2395,7 @@ static void unfreeze_partials(struct kmem_cache *s,
+ discard_page = discard_page->next;
+
+ stat(s, DEACTIVATE_EMPTY);
+- discard_slab(s, page);
++ discard_slab_delayed(s, page, delayed_free);
+ stat(s, FREE_SLAB);
+ }
+ #endif /* CONFIG_SLUB_CPU_PARTIAL */
+@@ -2386,7 +2408,8 @@ static void unfreeze_partials(struct kmem_cache *s,
+ * If we did not find a slot then simply move all the partials to the
+ * per node partial list.
+ */
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++ struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ struct page *oldpage;
+@@ -2409,7 +2432,8 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+ * set to the per node partial list.
+ */
+ local_irq_save(flags);
+- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab),
++ delayed_free);
+ local_irq_restore(flags);
+ oldpage = NULL;
+ pobjects = 0;
+@@ -2431,17 +2455,18 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+ unsigned long flags;
+
+ local_irq_save(flags);
+- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free);
+ local_irq_restore(flags);
+ }
+ preempt_enable();
+ #endif /* CONFIG_SLUB_CPU_PARTIAL */
+ }
+
+-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
++static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ stat(s, CPUSLAB_FLUSH);
+- deactivate_slab(s, c->page, c->freelist, c);
++ deactivate_slab(s, c->page, c->freelist, c, delayed_free);
+
+ c->tid = next_tid(c->tid);
+ }
+@@ -2451,21 +2476,24 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
+ *
+ * Called from IPI handler with interrupts disabled.
+ */
+-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
++static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu,
++ struct list_head *delayed_free)
+ {
+ struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+
+ if (c->page)
+- flush_slab(s, c);
++ flush_slab(s, c, delayed_free);
+
+- unfreeze_partials(s, c);
++ unfreeze_partials(s, c, delayed_free);
+ }
+
+ static void flush_cpu_slab(void *d)
+ {
+ struct kmem_cache *s = d;
++ LIST_HEAD(delayed_free);
+
+- __flush_cpu_slab(s, smp_processor_id());
++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++ discard_delayed(&delayed_free);
+ }
+
+ static bool has_cpu_slab(int cpu, void *info)
+@@ -2489,13 +2517,15 @@ static int slub_cpu_dead(unsigned int cpu)
+ {
+ struct kmem_cache *s;
+ unsigned long flags;
++ LIST_HEAD(delayed_free);
+
+ mutex_lock(&slab_mutex);
+ list_for_each_entry(s, &slab_caches, list) {
+ local_irq_save(flags);
+- __flush_cpu_slab(s, cpu);
++ __flush_cpu_slab(s, cpu, &delayed_free);
+ local_irq_restore(flags);
+ }
++ discard_delayed(&delayed_free);
+ mutex_unlock(&slab_mutex);
+ return 0;
+ }
+@@ -2579,7 +2609,8 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
+ }
+
+ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+- int node, struct kmem_cache_cpu **pc)
++ int node, struct kmem_cache_cpu **pc,
++ struct list_head *delayed_free)
+ {
+ void *freelist;
+ struct kmem_cache_cpu *c = *pc;
+@@ -2587,7 +2618,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+
+ WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
+
+- freelist = get_partial(s, flags, node, c);
++ freelist = get_partial(s, flags, node, c, delayed_free);
+
+ if (freelist)
+ return freelist;
+@@ -2596,7 +2627,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+ if (page) {
+ c = raw_cpu_ptr(s->cpu_slab);
+ if (c->page)
+- flush_slab(s, c);
++ flush_slab(s, c, delayed_free);
+
+ /*
+ * No other reference to the page yet so we can
+@@ -2675,7 +2706,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+ * already disabled (which is the case for bulk allocation).
+ */
+ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+- unsigned long addr, struct kmem_cache_cpu *c)
++ unsigned long addr, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ void *freelist;
+ struct page *page;
+@@ -2705,7 +2737,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ goto redo;
+ } else {
+ stat(s, ALLOC_NODE_MISMATCH);
+- deactivate_slab(s, page, c->freelist, c);
++ deactivate_slab(s, page, c->freelist, c, delayed_free);
+ goto new_slab;
+ }
+ }
+@@ -2716,7 +2748,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ * information when the page leaves the per-cpu allocator
+ */
+ if (unlikely(!pfmemalloc_match(page, gfpflags))) {
+- deactivate_slab(s, page, c->freelist, c);
++ deactivate_slab(s, page, c->freelist, c, delayed_free);
+ goto new_slab;
+ }
+
+@@ -2755,7 +2787,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ goto redo;
+ }
+
+- freelist = new_slab_objects(s, gfpflags, node, &c);
++ freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free);
+
+ if (unlikely(!freelist)) {
+ slab_out_of_memory(s, gfpflags, node);
+@@ -2771,7 +2803,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ !alloc_debug_processing(s, page, freelist, addr))
+ goto new_slab; /* Slab failed checks. Next slab needed */
+
+- deactivate_slab(s, page, get_freepointer(s, freelist), c);
++ deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free);
+ return freelist;
+ }
+
+@@ -2784,6 +2816,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ {
+ void *p;
+ unsigned long flags;
++ LIST_HEAD(delayed_free);
+
+ local_irq_save(flags);
+ #ifdef CONFIG_PREEMPTION
+@@ -2795,8 +2828,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ c = this_cpu_ptr(s->cpu_slab);
+ #endif
+
+- p = ___slab_alloc(s, gfpflags, node, addr, c);
++ p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free);
+ local_irq_restore(flags);
++ discard_delayed(&delayed_free);
+ return p;
+ }
+
+@@ -3060,11 +3094,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+ */
+ stat(s, FREE_FROZEN);
+ } else if (new.frozen) {
++ LIST_HEAD(delayed_free);
+ /*
+ * If we just froze the page then put it onto the
+ * per cpu partial list.
+ */
+- put_cpu_partial(s, page, 1);
++ put_cpu_partial(s, page, 1, &delayed_free);
++ discard_delayed(&delayed_free);
+ stat(s, CPU_PARTIAL_FREE);
+ }
+
+@@ -3315,6 +3351,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ struct kmem_cache_cpu *c;
+ int i;
+ struct obj_cgroup *objcg = NULL;
++ LIST_HEAD(delayed_free);
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
+ WARN_ON_ONCE(!preemptible() &&
+@@ -3356,7 +3393,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ * of re-populating per CPU c->freelist
+ */
+ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
+- _RET_IP_, c);
++ _RET_IP_, c, &delayed_free);
+ if (unlikely(!p[i]))
+ goto error;
+
+@@ -3372,6 +3409,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ c->tid = next_tid(c->tid);
+ local_irq_enable();
+
++ discard_delayed(&delayed_free);
++
+ /* Clear memory outside IRQ disabled fastpath loop */
+ if (unlikely(slab_want_init_on_alloc(flags, s))) {
+ int j;
+@@ -3385,6 +3424,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ return i;
+ error:
+ local_irq_enable();
++ discard_delayed(&delayed_free);
+ slab_post_alloc_hook(s, objcg, flags, i, p);
+ __kmem_cache_free_bulk(s, i, p);
+ return 0;
+@@ -4437,6 +4477,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
+ int node;
+ struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ struct kmem_cache_node *n;
++ LIST_HEAD(delayed_free);
+
+ memcpy(s, static_cache, kmem_cache->object_size);
+
+@@ -4445,7 +4486,8 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
+ * up. Even if it weren't true, IRQs are not up so we couldn't fire
+ * IPIs around.
+ */
+- __flush_cpu_slab(s, smp_processor_id());
++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++ discard_delayed(&delayed_free);
+ for_each_kmem_cache_node(s, node, n) {
+ struct page *p;
+
+--
+2.19.1
+
diff --git a/features/rt/mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch b/features/rt/mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch
new file mode 100644
index 00000000..119f020a
--- /dev/null
+++ b/features/rt/mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch
@@ -0,0 +1,120 @@
+From 3bd87df897f0a51299da79bf24091cc937f1706e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 26 Feb 2021 17:11:55 +0100
+Subject: [PATCH 108/191] mm: slub: Move flush_cpu_slab() invocations
+ __free_slab() invocations out of IRQ context
+
+flush_all() flushes a specific SLAB cache on each CPU (where the cache
+is present). The discard_delayed()/__free_slab() invocation happens
+within IPI handler and is problematic for PREEMPT_RT.
+
+The flush operation is not a frequent operation or a hot path. The
+per-CPU flush operation can be moved to within a workqueue.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 52 insertions(+), 8 deletions(-)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index af9c0fbe2cf5..ec608c1d5fdb 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2487,26 +2487,70 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu,
+ unfreeze_partials(s, c, delayed_free);
+ }
+
+-static void flush_cpu_slab(void *d)
++struct slub_flush_work {
++ struct work_struct work;
++ struct kmem_cache *s;
++ bool skip;
++};
++
++static void flush_cpu_slab(struct work_struct *w)
+ {
+- struct kmem_cache *s = d;
++ struct slub_flush_work *sfw;
+ LIST_HEAD(delayed_free);
+
+- __flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++ sfw = container_of(w, struct slub_flush_work, work);
++
++ local_irq_disable();
++ __flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free);
++ local_irq_enable();
++
+ discard_delayed(&delayed_free);
+ }
+
+-static bool has_cpu_slab(int cpu, void *info)
++static bool has_cpu_slab(int cpu, struct kmem_cache *s)
+ {
+- struct kmem_cache *s = info;
+ struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+
+ return c->page || slub_percpu_partial(c);
+ }
+
++static DEFINE_MUTEX(flush_lock);
++static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
++
++static void flush_all_locked(struct kmem_cache *s)
++{
++ struct slub_flush_work *sfw;
++ unsigned int cpu;
++
++ mutex_lock(&flush_lock);
++
++ for_each_online_cpu(cpu) {
++ sfw = &per_cpu(slub_flush, cpu);
++ if (!has_cpu_slab(cpu, s)) {
++ sfw->skip = true;
++ continue;
++ }
++ INIT_WORK(&sfw->work, flush_cpu_slab);
++ sfw->skip = false;
++ sfw->s = s;
++ schedule_work_on(cpu, &sfw->work);
++ }
++
++ for_each_online_cpu(cpu) {
++ sfw = &per_cpu(slub_flush, cpu);
++ if (sfw->skip)
++ continue;
++ flush_work(&sfw->work);
++ }
++
++ mutex_unlock(&flush_lock);
++}
++
+ static void flush_all(struct kmem_cache *s)
+ {
+- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
++ cpus_read_lock();
++ flush_all_locked(s);
++ cpus_read_unlock();
+ }
+
+ /*
+@@ -4009,7 +4053,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
+ int node;
+ struct kmem_cache_node *n;
+
+- flush_all(s);
++ flush_all_locked(s);
+ /* Attempt to free all objects */
+ for_each_kmem_cache_node(s, node, n) {
+ free_partial(s, n);
+@@ -4293,7 +4337,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
+ unsigned long flags;
+ int ret = 0;
+
+- flush_all(s);
++ flush_all_locked(s);
+ for_each_kmem_cache_node(s, node, n) {
+ INIT_LIST_HEAD(&discard);
+ for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
+--
+2.19.1
+
diff --git a/features/rt/mm-vmalloc-Another-preempt-disable-region-which-suck.patch b/features/rt/mm-vmalloc-Another-preempt-disable-region-which-suck.patch
new file mode 100644
index 00000000..18978e06
--- /dev/null
+++ b/features/rt/mm-vmalloc-Another-preempt-disable-region-which-suck.patch
@@ -0,0 +1,72 @@
+From 9834be9d8d752c94f6c88a3a380898da34cf003b Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2011 11:39:36 +0200
+Subject: [PATCH 133/191] mm/vmalloc: Another preempt disable region which
+ sucks
+
+Avoid the preempt disable version of get_cpu_var(). The inner-lock should
+provide enough serialisation.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/vmalloc.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index 4f5f8c907897..e6cd482a11b9 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1558,7 +1558,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
+ struct vmap_block *vb;
+ struct vmap_area *va;
+ unsigned long vb_idx;
+- int node, err;
++ int node, err, cpu;
+ void *vaddr;
+
+ node = numa_node_id();
+@@ -1595,11 +1595,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
+ return ERR_PTR(err);
+ }
+
+- vbq = &get_cpu_var(vmap_block_queue);
++ cpu = get_cpu_light();
++ vbq = this_cpu_ptr(&vmap_block_queue);
+ spin_lock(&vbq->lock);
+ list_add_tail_rcu(&vb->free_list, &vbq->free);
+ spin_unlock(&vbq->lock);
+- put_cpu_var(vmap_block_queue);
++ put_cpu_light();
+
+ return vaddr;
+ }
+@@ -1664,6 +1665,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ struct vmap_block *vb;
+ void *vaddr = NULL;
+ unsigned int order;
++ int cpu;
+
+ BUG_ON(offset_in_page(size));
+ BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+@@ -1678,7 +1680,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ order = get_order(size);
+
+ rcu_read_lock();
+- vbq = &get_cpu_var(vmap_block_queue);
++ cpu = get_cpu_light();
++ vbq = this_cpu_ptr(&vmap_block_queue);
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+ unsigned long pages_off;
+
+@@ -1701,7 +1704,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ break;
+ }
+
+- put_cpu_var(vmap_block_queue);
++ put_cpu_light();
+ rcu_read_unlock();
+
+ /* Allocate new block if nothing was found */
+--
+2.19.1
+
diff --git a/features/rt/mm-vmstat-Protect-per-cpu-variables-with-preempt-dis.patch b/features/rt/mm-vmstat-Protect-per-cpu-variables-with-preempt-dis.patch
new file mode 100644
index 00000000..cdffd95d
--- /dev/null
+++ b/features/rt/mm-vmstat-Protect-per-cpu-variables-with-preempt-dis.patch
@@ -0,0 +1,144 @@
+From 3032c5b7a5a91389d663bd54367df7ed1a92a7a6 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:13 -0500
+Subject: [PATCH 085/191] mm/vmstat: Protect per cpu variables with preempt
+ disable on RT
+
+Disable preemption on -RT for the vmstat code. On vanila the code runs in
+IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the
+same ressources is not updated in parallel due to preemption.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/vmstat.h | 4 ++++
+ mm/vmstat.c | 12 ++++++++++++
+ 2 files changed, 16 insertions(+)
+
+diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
+index 506d625163a1..09a7e4c714b8 100644
+--- a/include/linux/vmstat.h
++++ b/include/linux/vmstat.h
+@@ -63,7 +63,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
+ */
+ static inline void __count_vm_event(enum vm_event_item item)
+ {
++ preempt_disable_rt();
+ raw_cpu_inc(vm_event_states.event[item]);
++ preempt_enable_rt();
+ }
+
+ static inline void count_vm_event(enum vm_event_item item)
+@@ -73,7 +75,9 @@ static inline void count_vm_event(enum vm_event_item item)
+
+ static inline void __count_vm_events(enum vm_event_item item, long delta)
+ {
++ preempt_disable_rt();
+ raw_cpu_add(vm_event_states.event[item], delta);
++ preempt_enable_rt();
+ }
+
+ static inline void count_vm_events(enum vm_event_item item, long delta)
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index 74b2c374b86c..4ef7b8a8f5ce 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -321,6 +321,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ long x;
+ long t;
+
++ preempt_disable_rt();
+ x = delta + __this_cpu_read(*p);
+
+ t = __this_cpu_read(pcp->stat_threshold);
+@@ -330,6 +331,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ x = 0;
+ }
+ __this_cpu_write(*p, x);
++ preempt_enable_rt();
+ }
+ EXPORT_SYMBOL(__mod_zone_page_state);
+
+@@ -352,6 +354,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
+ delta >>= PAGE_SHIFT;
+ }
+
++ preempt_disable_rt();
+ x = delta + __this_cpu_read(*p);
+
+ t = __this_cpu_read(pcp->stat_threshold);
+@@ -361,6 +364,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
+ x = 0;
+ }
+ __this_cpu_write(*p, x);
++ preempt_enable_rt();
+ }
+ EXPORT_SYMBOL(__mod_node_page_state);
+
+@@ -393,6 +397,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
+
++ preempt_disable_rt();
+ v = __this_cpu_inc_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v > t)) {
+@@ -401,6 +406,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
+ zone_page_state_add(v + overstep, zone, item);
+ __this_cpu_write(*p, -overstep);
+ }
++ preempt_enable_rt();
+ }
+
+ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+@@ -411,6 +417,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+
+ VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
+
++ preempt_disable_rt();
+ v = __this_cpu_inc_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v > t)) {
+@@ -419,6 +426,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+ node_page_state_add(v + overstep, pgdat, item);
+ __this_cpu_write(*p, -overstep);
+ }
++ preempt_enable_rt();
+ }
+
+ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
+@@ -439,6 +447,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
+
++ preempt_disable_rt();
+ v = __this_cpu_dec_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v < - t)) {
+@@ -447,6 +456,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
+ zone_page_state_add(v - overstep, zone, item);
+ __this_cpu_write(*p, overstep);
+ }
++ preempt_enable_rt();
+ }
+
+ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+@@ -457,6 +467,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+
+ VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
+
++ preempt_disable_rt();
+ v = __this_cpu_dec_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v < - t)) {
+@@ -465,6 +476,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+ node_page_state_add(v - overstep, pgdat, item);
+ __this_cpu_write(*p, overstep);
+ }
++ preempt_enable_rt();
+ }
+
+ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+--
+2.19.1
+
diff --git a/features/rt/mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch b/features/rt/mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch
new file mode 100644
index 00000000..d2cd6abd
--- /dev/null
+++ b/features/rt/mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch
@@ -0,0 +1,48 @@
+From f8847b3d3ebc1ea59be319e5833c585bacd2d5f2 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 11 Feb 2019 10:40:46 +0100
+Subject: [PATCH 036/191] mm: workingset: replace IRQ-off check with a lockdep
+ assert.
+
+Commit
+
+ 68d48e6a2df57 ("mm: workingset: add vmstat counter for shadow nodes")
+
+introduced an IRQ-off check to ensure that a lock is held which also
+disabled interrupts. This does not work the same way on -RT because none
+of the locks, that are held, disable interrupts.
+Replace this check with a lockdep assert which ensures that the lock is
+held.
+
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/workingset.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/mm/workingset.c b/mm/workingset.c
+index cd39902c1062..3d34c1309f54 100644
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -430,6 +430,8 @@ static struct list_lru shadow_nodes;
+
+ void workingset_update_node(struct xa_node *node)
+ {
++ struct address_space *mapping;
++
+ /*
+ * Track non-empty nodes that contain only shadow entries;
+ * unlink those that contain pages or are being freed.
+@@ -438,7 +440,8 @@ void workingset_update_node(struct xa_node *node)
+ * already where they should be. The list_empty() test is safe
+ * as node->private_list is protected by the i_pages lock.
+ */
+- VM_WARN_ON_ONCE(!irqs_disabled()); /* For __inc_lruvec_page_state */
++ mapping = container_of(node->array, struct address_space, i_pages);
++ lockdep_assert_held(&mapping->i_pages.xa_lock);
+
+ if (node->count && node->count == node->nr_values) {
+ if (list_empty(&node->private_list)) {
+--
+2.19.1
+
diff --git a/features/rt/mm-zsmalloc-copy-with-get_cpu_var-and-locking.patch b/features/rt/mm-zsmalloc-copy-with-get_cpu_var-and-locking.patch
new file mode 100644
index 00000000..20da73a1
--- /dev/null
+++ b/features/rt/mm-zsmalloc-copy-with-get_cpu_var-and-locking.patch
@@ -0,0 +1,211 @@
+From 58041154aa39fc127b4c5d0f6179c606403ced5d Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <umgwanakikbuti@gmail.com>
+Date: Tue, 22 Mar 2016 11:16:09 +0100
+Subject: [PATCH 116/191] mm/zsmalloc: copy with get_cpu_var() and locking
+
+get_cpu_var() disables preemption and triggers a might_sleep() splat later.
+This is replaced with get_locked_var().
+This bitspinlocks are replaced with a proper mutex which requires a slightly
+larger struct to allocate.
+
+Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
+[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then
+fixed the size magic]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 79 insertions(+), 6 deletions(-)
+
+diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
+index 30c358b72025..6177b736d018 100644
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -57,6 +57,7 @@
+ #include <linux/wait.h>
+ #include <linux/pagemap.h>
+ #include <linux/fs.h>
++#include <linux/local_lock.h>
+
+ #define ZSPAGE_MAGIC 0x58
+
+@@ -77,6 +78,20 @@
+
+ #define ZS_HANDLE_SIZE (sizeof(unsigned long))
+
++#ifdef CONFIG_PREEMPT_RT
++
++struct zsmalloc_handle {
++ unsigned long addr;
++ struct mutex lock;
++};
++
++#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
++
++#else
++
++#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
++#endif
++
+ /*
+ * Object location (<PFN>, <obj_idx>) is encoded as
+ * a single (unsigned long) handle value.
+@@ -293,6 +308,7 @@ struct zspage {
+ };
+
+ struct mapping_area {
++ local_lock_t lock;
+ char *vm_buf; /* copy buffer for objects that span pages */
+ char *vm_addr; /* address of kmap_atomic()'ed pages */
+ enum zs_mapmode vm_mm; /* mapping mode */
+@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
+
+ static int create_cache(struct zs_pool *pool)
+ {
+- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
++ pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
+ 0, 0, NULL);
+ if (!pool->handle_cachep)
+ return 1;
+@@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool *pool)
+
+ static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
+ {
+- return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
+- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
++ void *p;
++
++ p = kmem_cache_alloc(pool->handle_cachep,
++ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
++#ifdef CONFIG_PREEMPT_RT
++ if (p) {
++ struct zsmalloc_handle *zh = p;
++
++ mutex_init(&zh->lock);
++ }
++#endif
++ return (unsigned long)p;
++}
++
++#ifdef CONFIG_PREEMPT_RT
++static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
++{
++ return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
+ }
++#endif
+
+ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
+ {
+@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+
+ static void record_obj(unsigned long handle, unsigned long obj)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++ WRITE_ONCE(zh->addr, obj);
++#else
+ /*
+ * lsb of @obj represents handle lock while other bits
+ * represent object value the handle is pointing so
+ * updating shouldn't do store tearing.
+ */
+ WRITE_ONCE(*(unsigned long *)handle, obj);
++#endif
+ }
+
+ /* zpool driver */
+@@ -455,7 +494,10 @@ MODULE_ALIAS("zpool-zsmalloc");
+ #endif /* CONFIG_ZPOOL */
+
+ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
+-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
++static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
++ /* XXX remove this and use a spin_lock_t in pin_tag() */
++ .lock = INIT_LOCAL_LOCK(lock),
++};
+
+ static bool is_zspage_isolated(struct zspage *zspage)
+ {
+@@ -862,7 +904,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
+
+ static unsigned long handle_to_obj(unsigned long handle)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++ return zh->addr;
++#else
+ return *(unsigned long *)handle;
++#endif
+ }
+
+ static unsigned long obj_to_head(struct page *page, void *obj)
+@@ -876,22 +924,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
+
+ static inline int testpin_tag(unsigned long handle)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++ return mutex_is_locked(&zh->lock);
++#else
+ return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+
+ static inline int trypin_tag(unsigned long handle)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++ return mutex_trylock(&zh->lock);
++#else
+ return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+
+ static void pin_tag(unsigned long handle) __acquires(bitlock)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++ return mutex_lock(&zh->lock);
++#else
+ bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+
+ static void unpin_tag(unsigned long handle) __releases(bitlock)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++ return mutex_unlock(&zh->lock);
++#else
+ bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+
+ static void reset_page(struct page *page)
+@@ -1274,7 +1346,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ class = pool->size_class[class_idx];
+ off = (class->size * obj_idx) & ~PAGE_MASK;
+
+- area = &get_cpu_var(zs_map_area);
++ local_lock(&zs_map_area.lock);
++ area = this_cpu_ptr(&zs_map_area);
+ area->vm_mm = mm;
+ if (off + class->size <= PAGE_SIZE) {
+ /* this object is contained entirely within a page */
+@@ -1328,7 +1401,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+
+ __zs_unmap_object(area, pages, off, class->size);
+ }
+- put_cpu_var(zs_map_area);
++ local_unlock(&zs_map_area.lock);
+
+ migrate_read_unlock(zspage);
+ unpin_tag(handle);
+--
+2.19.1
+
diff --git a/features/rt/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch b/features/rt/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch
new file mode 100644
index 00000000..81ff8d7a
--- /dev/null
+++ b/features/rt/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch
@@ -0,0 +1,36 @@
+From 792214a4ddc6c2bca52779e1665f52c5587868f9 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 16 Sep 2020 16:15:39 +0200
+Subject: [PATCH 142/191] net: Dequeue in dev_cpu_dead() without the lock
+
+Upstream uses skb_dequeue() to acquire lock of `input_pkt_queue'. The reason is
+to synchronize against a remote CPU which still thinks that the CPU is online
+enqueues packets to this CPU.
+There are no guarantees that the packet is enqueued before the callback is run,
+it just hope.
+RT however complains about an not initialized lock because it uses another lock
+for `input_pkt_queue' due to the IRQ-off nature of the context.
+
+Use the unlocked dequeue version for `input_pkt_queue'.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 16c9aa19ede2..aab963be5655 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -11162,7 +11162,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
+ netif_rx_ni(skb);
+ input_queue_head_incr(oldsd);
+ }
+- while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
++ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+ netif_rx_ni(skb);
+ input_queue_head_incr(oldsd);
+ }
+--
+2.19.1
+
diff --git a/features/rt/net-Move-lockdep-where-it-belongs.patch b/features/rt/net-Move-lockdep-where-it-belongs.patch
new file mode 100644
index 00000000..d6d1ff0d
--- /dev/null
+++ b/features/rt/net-Move-lockdep-where-it-belongs.patch
@@ -0,0 +1,46 @@
+From abf7b145f704064b5de40759c313fe711a89234f Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 8 Sep 2020 07:32:20 +0200
+Subject: [PATCH 038/191] net: Move lockdep where it belongs
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/sock.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 0ed98f20448a..f1d1cee0dddd 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -3064,12 +3064,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
+ if (sk->sk_lock.owned)
+ __lock_sock(sk);
+ sk->sk_lock.owned = 1;
+- spin_unlock(&sk->sk_lock.slock);
++ spin_unlock_bh(&sk->sk_lock.slock);
+ /*
+ * The sk_lock has mutex_lock() semantics here:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+- local_bh_enable();
+ }
+ EXPORT_SYMBOL(lock_sock_nested);
+
+@@ -3118,13 +3117,12 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
+
+ __lock_sock(sk);
+ sk->sk_lock.owned = 1;
+- spin_unlock(&sk->sk_lock.slock);
++ spin_unlock_bh(&sk->sk_lock.slock);
+ /*
+ * The sk_lock has mutex_lock() semantics here:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+ __acquire(&sk->sk_lock.slock);
+- local_bh_enable();
+ return true;
+ }
+ EXPORT_SYMBOL(lock_sock_fast);
+--
+2.19.1
+
diff --git a/features/rt/net-Properly-annotate-the-try-lock-for-the-seqlock.patch b/features/rt/net-Properly-annotate-the-try-lock-for-the-seqlock.patch
new file mode 100644
index 00000000..59f37318
--- /dev/null
+++ b/features/rt/net-Properly-annotate-the-try-lock-for-the-seqlock.patch
@@ -0,0 +1,69 @@
+From 363d9082c806cbde11d4468cfaaac47e4b529c82 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 8 Sep 2020 16:57:11 +0200
+Subject: [PATCH 092/191] net: Properly annotate the try-lock for the seqlock
+
+In patch
+ ("net/Qdisc: use a seqlock instead seqcount")
+
+the seqcount has been replaced with a seqlock to allow to reader to
+boost the preempted writer.
+The try_write_seqlock() acquired the lock with a try-lock but the
+seqcount annotation was "lock".
+
+Opencode write_seqcount_t_begin() and use the try-lock annotation for
+lockdep.
+
+Reported-by: Mike Galbraith <efault@gmx.de>
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/net/net_seq_lock.h | 9 ---------
+ include/net/sch_generic.h | 10 +++++++++-
+ 2 files changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h
+index 95a497a72e51..67710bace741 100644
+--- a/include/net/net_seq_lock.h
++++ b/include/net/net_seq_lock.h
+@@ -6,15 +6,6 @@
+ # define net_seq_begin(__r) read_seqbegin(__r)
+ # define net_seq_retry(__r, __s) read_seqretry(__r, __s)
+
+-static inline int try_write_seqlock(seqlock_t *sl)
+-{
+- if (spin_trylock(&sl->lock)) {
+- write_seqcount_begin(&sl->seqcount);
+- return 1;
+- }
+- return 0;
+-}
+-
+ #else
+ # define net_seqlock_t seqcount_t
+ # define net_seq_begin(__r) read_seqcount_begin(__r)
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index 1650446d3bfa..bf0865d642f5 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -171,8 +171,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+ return false;
+ }
+ #ifdef CONFIG_PREEMPT_RT
+- if (try_write_seqlock(&qdisc->running))
++ if (spin_trylock(&qdisc->running.lock)) {
++ seqcount_t *s = &qdisc->running.seqcount.seqcount;
++ /*
++ * Variant of write_seqcount_t_begin() telling lockdep that a
++ * trylock was attempted.
++ */
++ do_raw_write_seqcount_begin(s);
++ seqcount_acquire(&s->dep_map, 0, 1, _RET_IP_);
+ return true;
++ }
+ return false;
+ #else
+ /* Variant of write_seqcount_begin() telling lockdep a trylock
+--
+2.19.1
+
diff --git a/features/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch b/features/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch
new file mode 100644
index 00000000..7ef9f7ad
--- /dev/null
+++ b/features/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch
@@ -0,0 +1,298 @@
+From 0532faba565e7c8cf62950d7f1a9a7bc7ab9840b Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 14 Sep 2016 17:36:35 +0200
+Subject: [PATCH 091/191] net/Qdisc: use a seqlock instead seqcount
+
+The seqcount disables preemption on -RT while it is held which can't
+remove. Also we don't want the reader to spin for ages if the writer is
+scheduled out. The seqlock on the other hand will serialize / sleep on
+the lock while writer is active.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/net/gen_stats.h | 11 ++++++-----
+ include/net/net_seq_lock.h | 24 ++++++++++++++++++++++++
+ include/net/sch_generic.h | 19 +++++++++++++++++--
+ net/core/gen_estimator.c | 6 +++---
+ net/core/gen_stats.c | 12 ++++++------
+ net/sched/sch_api.c | 2 +-
+ net/sched/sch_generic.c | 10 ++++++++++
+ 7 files changed, 67 insertions(+), 17 deletions(-)
+ create mode 100644 include/net/net_seq_lock.h
+
+diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
+index 1424e02cef90..163f8415e5db 100644
+--- a/include/net/gen_stats.h
++++ b/include/net/gen_stats.h
+@@ -6,6 +6,7 @@
+ #include <linux/socket.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/pkt_sched.h>
++#include <net/net_seq_lock.h>
+
+ /* Note: this used to be in include/uapi/linux/gen_stats.h */
+ struct gnet_stats_basic_packed {
+@@ -42,15 +43,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
+ spinlock_t *lock, struct gnet_dump *d,
+ int padattr);
+
+-int gnet_stats_copy_basic(const seqcount_t *running,
++int gnet_stats_copy_basic(net_seqlock_t *running,
+ struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b);
+-void __gnet_stats_copy_basic(const seqcount_t *running,
++void __gnet_stats_copy_basic(net_seqlock_t *running,
+ struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b);
+-int gnet_stats_copy_basic_hw(const seqcount_t *running,
++int gnet_stats_copy_basic_hw(net_seqlock_t *running,
+ struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b);
+@@ -70,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt);
++ net_seqlock_t *running, struct nlattr *opt);
+ void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
+ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **ptr,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt);
++ net_seqlock_t *running, struct nlattr *opt);
+ bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
+ bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
+ struct gnet_stats_rate_est64 *sample);
+diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h
+new file mode 100644
+index 000000000000..95a497a72e51
+--- /dev/null
++++ b/include/net/net_seq_lock.h
+@@ -0,0 +1,24 @@
++#ifndef __NET_NET_SEQ_LOCK_H__
++#define __NET_NET_SEQ_LOCK_H__
++
++#ifdef CONFIG_PREEMPT_RT
++# define net_seqlock_t seqlock_t
++# define net_seq_begin(__r) read_seqbegin(__r)
++# define net_seq_retry(__r, __s) read_seqretry(__r, __s)
++
++static inline int try_write_seqlock(seqlock_t *sl)
++{
++ if (spin_trylock(&sl->lock)) {
++ write_seqcount_begin(&sl->seqcount);
++ return 1;
++ }
++ return 0;
++}
++
++#else
++# define net_seqlock_t seqcount_t
++# define net_seq_begin(__r) read_seqcount_begin(__r)
++# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s)
++#endif
++
++#endif
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index 2d6eb60c58c8..1650446d3bfa 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -10,6 +10,7 @@
+ #include <linux/percpu.h>
+ #include <linux/dynamic_queue_limits.h>
+ #include <linux/list.h>
++#include <net/net_seq_lock.h>
+ #include <linux/refcount.h>
+ #include <linux/workqueue.h>
+ #include <linux/mutex.h>
+@@ -100,7 +101,7 @@ struct Qdisc {
+ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
+ struct qdisc_skb_head q;
+ struct gnet_stats_basic_packed bstats;
+- seqcount_t running;
++ net_seqlock_t running;
+ struct gnet_stats_queue qstats;
+ unsigned long state;
+ struct Qdisc *next_sched;
+@@ -141,7 +142,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
+ {
+ if (qdisc->flags & TCQ_F_NOLOCK)
+ return spin_is_locked(&qdisc->seqlock);
++#ifdef CONFIG_PREEMPT_RT
++ return spin_is_locked(&qdisc->running.lock) ? true : false;
++#else
+ return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
++#endif
+ }
+
+ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
+@@ -165,17 +170,27 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+ } else if (qdisc_is_running(qdisc)) {
+ return false;
+ }
++#ifdef CONFIG_PREEMPT_RT
++ if (try_write_seqlock(&qdisc->running))
++ return true;
++ return false;
++#else
+ /* Variant of write_seqcount_begin() telling lockdep a trylock
+ * was attempted.
+ */
+ raw_write_seqcount_begin(&qdisc->running);
+ seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
+ return true;
++#endif
+ }
+
+ static inline void qdisc_run_end(struct Qdisc *qdisc)
+ {
++#ifdef CONFIG_PREEMPT_RT
++ write_sequnlock(&qdisc->running);
++#else
+ write_seqcount_end(&qdisc->running);
++#endif
+ if (qdisc->flags & TCQ_F_NOLOCK)
+ spin_unlock(&qdisc->seqlock);
+ }
+@@ -540,7 +555,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
+ return qdisc_lock(root);
+ }
+
+-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
++static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+ {
+ struct Qdisc *root = qdisc_root_sleeping(qdisc);
+
+diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
+index 8e582e29a41e..e51f4854d8b2 100644
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -42,7 +42,7 @@
+ struct net_rate_estimator {
+ struct gnet_stats_basic_packed *bstats;
+ spinlock_t *stats_lock;
+- seqcount_t *running;
++ net_seqlock_t *running;
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ u8 ewma_log;
+ u8 intvl_log; /* period : (250ms << intvl_log) */
+@@ -125,7 +125,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running,
++ net_seqlock_t *running,
+ struct nlattr *opt)
+ {
+ struct gnet_estimator *parm = nla_data(opt);
+@@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt)
++ net_seqlock_t *running, struct nlattr *opt)
+ {
+ return gen_new_estimator(bstats, cpu_bstats, rate_est,
+ lock, running, opt);
+diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
+index e491b083b348..ef432cea2e10 100644
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -137,7 +137,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
+ }
+
+ void
+-__gnet_stats_copy_basic(const seqcount_t *running,
++__gnet_stats_copy_basic(net_seqlock_t *running,
+ struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b)
+@@ -150,15 +150,15 @@ __gnet_stats_copy_basic(const seqcount_t *running,
+ }
+ do {
+ if (running)
+- seq = read_seqcount_begin(running);
++ seq = net_seq_begin(running);
+ bstats->bytes = b->bytes;
+ bstats->packets = b->packets;
+- } while (running && read_seqcount_retry(running, seq));
++ } while (running && net_seq_retry(running, seq));
+ }
+ EXPORT_SYMBOL(__gnet_stats_copy_basic);
+
+ static int
+-___gnet_stats_copy_basic(const seqcount_t *running,
++___gnet_stats_copy_basic(net_seqlock_t *running,
+ struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b,
+@@ -204,7 +204,7 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
+ * if the room in the socket buffer was not sufficient.
+ */
+ int
+-gnet_stats_copy_basic(const seqcount_t *running,
++gnet_stats_copy_basic(net_seqlock_t *running,
+ struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b)
+@@ -228,7 +228,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
+ * if the room in the socket buffer was not sufficient.
+ */
+ int
+-gnet_stats_copy_basic_hw(const seqcount_t *running,
++gnet_stats_copy_basic_hw(net_seqlock_t *running,
+ struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b)
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index f87d07736a14..7a627b208393 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -1258,7 +1258,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
+ rcu_assign_pointer(sch->stab, stab);
+ }
+ if (tca[TCA_RATE]) {
+- seqcount_t *running;
++ net_seqlock_t *running;
+
+ err = -EOPNOTSUPP;
+ if (sch->flags & TCQ_F_MQROOT) {
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 49eae93d1489..512a39d6edec 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -553,7 +553,11 @@ struct Qdisc noop_qdisc = {
+ .ops = &noop_qdisc_ops,
+ .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+ .dev_queue = &noop_netdev_queue,
++#ifdef CONFIG_PREEMPT_RT
++ .running = __SEQLOCK_UNLOCKED(noop_qdisc.running),
++#else
+ .running = SEQCNT_ZERO(noop_qdisc.running),
++#endif
+ .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
+ .gso_skb = {
+ .next = (struct sk_buff *)&noop_qdisc.gso_skb,
+@@ -845,9 +849,15 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+ lockdep_set_class(&sch->busylock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
++#ifdef CONFIG_PREEMPT_RT
++ seqlock_init(&sch->running);
++ lockdep_set_class(&sch->running.lock,
++ dev->qdisc_running_key ?: &qdisc_running_key);
++#else
+ seqcount_init(&sch->running);
+ lockdep_set_class(&sch->running,
+ dev->qdisc_running_key ?: &qdisc_running_key);
++#endif
+
+ sch->ops = ops;
+ sch->flags = ops->static_flags;
+--
+2.19.1
+
diff --git a/features/rt/net-Remove-preemption-disabling-in-netif_rx.patch b/features/rt/net-Remove-preemption-disabling-in-netif_rx.patch
new file mode 100644
index 00000000..b92de5fc
--- /dev/null
+++ b/features/rt/net-Remove-preemption-disabling-in-netif_rx.patch
@@ -0,0 +1,67 @@
+From efa7c96f4b8dec2a51711a6eef658093b42a6d91 Mon Sep 17 00:00:00 2001
+From: Priyanka Jain <Priyanka.Jain@freescale.com>
+Date: Thu, 17 May 2012 09:35:11 +0530
+Subject: [PATCH 150/191] net: Remove preemption disabling in netif_rx()
+
+1)enqueue_to_backlog() (called from netif_rx) should be
+ bind to a particluar CPU. This can be achieved by
+ disabling migration. No need to disable preemption
+
+2)Fixes crash "BUG: scheduling while atomic: ksoftirqd"
+ in case of RT.
+ If preemption is disabled, enqueue_to_backog() is called
+ in atomic context. And if backlog exceeds its count,
+ kfree_skb() is called. But in RT, kfree_skb() might
+ gets scheduled out, so it expects non atomic context.
+
+-Replace preempt_enable(), preempt_disable() with
+ migrate_enable(), migrate_disable() respectively
+-Replace get_cpu(), put_cpu() with get_cpu_light(),
+ put_cpu_light() respectively
+
+Signed-off-by: Priyanka Jain <Priyanka.Jain@freescale.com>
+Acked-by: Rajan Srivastava <Rajan.Srivastava@freescale.com>
+Cc: <rostedt@goodmis.orgn>
+Link: http://lkml.kernel.org/r/1337227511-2271-1-git-send-email-Priyanka.Jain@freescale.com
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bigeasy: Remove assumption about migrate_disable() from the description.]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/dev.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 52c928940167..ca150f02eed1 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4837,7 +4837,7 @@ static int netif_rx_internal(struct sk_buff *skb)
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu;
+
+- preempt_disable();
++ migrate_disable();
+ rcu_read_lock();
+
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
+@@ -4847,14 +4847,14 @@ static int netif_rx_internal(struct sk_buff *skb)
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
+ rcu_read_unlock();
+- preempt_enable();
++ migrate_enable();
+ } else
+ #endif
+ {
+ unsigned int qtail;
+
+- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+- put_cpu();
++ ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
++ put_cpu_light();
+ }
+ return ret;
+ }
+--
+2.19.1
+
diff --git a/features/rt/net-Use-skbufhead-with-raw-lock.patch b/features/rt/net-Use-skbufhead-with-raw-lock.patch
new file mode 100644
index 00000000..3309dda6
--- /dev/null
+++ b/features/rt/net-Use-skbufhead-with-raw-lock.patch
@@ -0,0 +1,73 @@
+From d7ca8b65392672151fbaaa0c0ec93b3be0dd2d51 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2011 15:38:34 +0200
+Subject: [PATCH 141/191] net: Use skbufhead with raw lock
+
+Use the rps lock as rawlock so we can keep irq-off regions. It looks low
+latency. However we can't kfree() from this context therefore we defer this
+to the softirq and use the tofree_queue list for it (similar to process_queue).
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/skbuff.h | 7 +++++++
+ net/core/dev.c | 6 +++---
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 6d0a33d1c0db..3402903b5cf0 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -295,6 +295,7 @@ struct sk_buff_head {
+
+ __u32 qlen;
+ spinlock_t lock;
++ raw_spinlock_t raw_lock;
+ };
+
+ struct sk_buff;
+@@ -1902,6 +1903,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
+ __skb_queue_head_init(list);
+ }
+
++static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
++{
++ raw_spin_lock_init(&list->raw_lock);
++ __skb_queue_head_init(list);
++}
++
+ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
+ struct lock_class_key *class)
+ {
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 2d6ee43e02b8..16c9aa19ede2 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -223,14 +223,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
+ static inline void rps_lock(struct softnet_data *sd)
+ {
+ #ifdef CONFIG_RPS
+- spin_lock(&sd->input_pkt_queue.lock);
++ raw_spin_lock(&sd->input_pkt_queue.raw_lock);
+ #endif
+ }
+
+ static inline void rps_unlock(struct softnet_data *sd)
+ {
+ #ifdef CONFIG_RPS
+- spin_unlock(&sd->input_pkt_queue.lock);
++ raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
+ #endif
+ }
+
+@@ -11478,7 +11478,7 @@ static int __init net_dev_init(void)
+
+ INIT_WORK(flush, flush_backlog);
+
+- skb_queue_head_init(&sd->input_pkt_queue);
++ skb_queue_head_init_raw(&sd->input_pkt_queue);
+ skb_queue_head_init(&sd->process_queue);
+ #ifdef CONFIG_XFRM_OFFLOAD
+ skb_queue_head_init(&sd->xfrm_backlog);
+--
+2.19.1
+
diff --git a/features/rt/net-core-disable-NET_RX_BUSY_POLL-on-RT.patch b/features/rt/net-core-disable-NET_RX_BUSY_POLL-on-RT.patch
new file mode 100644
index 00000000..4612f762
--- /dev/null
+++ b/features/rt/net-core-disable-NET_RX_BUSY_POLL-on-RT.patch
@@ -0,0 +1,43 @@
+From 116e028b98d7a297eccf815f0a734e80154b53c3 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Sat, 27 May 2017 19:02:06 +0200
+Subject: [PATCH 096/191] net/core: disable NET_RX_BUSY_POLL on RT
+
+napi_busy_loop() disables preemption and performs a NAPI poll. We can't acquire
+sleeping locks with disabled preemption so we would have to work around this
+and add explicit locking for synchronisation against ksoftirqd.
+Without explicit synchronisation a low priority process would "own" the NAPI
+state (by setting NAPIF_STATE_SCHED) and could be scheduled out (no
+preempt_disable() and BH is preemptible on RT).
+In case a network packages arrives then the interrupt handler would set
+NAPIF_STATE_MISSED and the system would wait until the task owning the NAPI
+would be scheduled in again.
+Should a task with RT priority busy poll then it would consume the CPU instead
+allowing tasks with lower priority to run.
+
+The NET_RX_BUSY_POLL is disabled by default (the system wide sysctls for
+poll/read are set to zero) so disable NET_RX_BUSY_POLL on RT to avoid wrong
+locking context on RT. Should this feature be considered useful on RT systems
+then it could be enabled again with proper locking and synchronisation.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/Kconfig b/net/Kconfig
+index 8cea808ad9e8..32f101108361 100644
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -286,7 +286,7 @@ config CGROUP_NET_CLASSID
+
+ config NET_RX_BUSY_POLL
+ bool
+- default y
++ default y if !PREEMPT_RT
+
+ config BQL
+ bool
+--
+2.19.1
+
diff --git a/features/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch b/features/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch
new file mode 100644
index 00000000..ba6256a3
--- /dev/null
+++ b/features/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch
@@ -0,0 +1,40 @@
+From 44a59612611dac7147b1bc47494ed5e8aa95855e Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 16 Jun 2017 19:03:16 +0200
+Subject: [PATCH 126/191] net/core: use local_bh_disable() in netif_rx_ni()
+
+In 2004 netif_rx_ni() gained a preempt_disable() section around
+netif_rx() and its do_softirq() + testing for it. The do_softirq() part
+is required because netif_rx() raises the softirq but does not invoke
+it. The preempt_disable() is required to remain on the same CPU which added the
+skb to the per-CPU list.
+All this can be avoided be putting this into a local_bh_disable()ed
+section. The local_bh_enable() part will invoke do_softirq() if
+required.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/dev.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 86a599a41062..2d6ee43e02b8 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4889,11 +4889,9 @@ int netif_rx_ni(struct sk_buff *skb)
+
+ trace_netif_rx_ni_entry(skb);
+
+- preempt_disable();
++ local_bh_disable();
+ err = netif_rx_internal(skb);
+- if (local_softirq_pending())
+- do_softirq();
+- preempt_enable();
++ local_bh_enable();
+ trace_netif_rx_ni_exit(err);
+
+ return err;
+--
+2.19.1
+
diff --git a/features/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch b/features/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch
new file mode 100644
index 00000000..75ccf34f
--- /dev/null
+++ b/features/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch
@@ -0,0 +1,41 @@
+From 2e3722be67eda134ba0ed9bc8507e6f3ca3e0011 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 30 Mar 2016 13:36:29 +0200
+Subject: [PATCH 143/191] net: dev: always take qdisc's busylock in
+ __dev_xmit_skb()
+
+The root-lock is dropped before dev_hard_start_xmit() is invoked and after
+setting the __QDISC___STATE_RUNNING bit. If this task is now pushed away
+by a task with a higher priority then the task with the higher priority
+won't be able to submit packets to the NIC directly instead they will be
+enqueued into the Qdisc. The NIC will remain idle until the task(s) with
+higher priority leave the CPU and the task with lower priority gets back
+and finishes the job.
+
+If we take always the busylock we ensure that the RT task can boost the
+low-prio task and submit the packet.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/dev.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index aab963be5655..52c928940167 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3807,7 +3807,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+ * This permits qdisc->running owner to get the lock more
+ * often and dequeue packets faster.
+ */
++#ifdef CONFIG_PREEMPT_RT
++ contended = true;
++#else
+ contended = qdisc_is_running(q);
++#endif
+ if (unlikely(contended))
+ spin_lock(&q->busylock);
+
+--
+2.19.1
+
diff --git a/features/rt/net-jme-Replace-link-change-tasklet-with-work.patch b/features/rt/net-jme-Replace-link-change-tasklet-with-work.patch
new file mode 100644
index 00000000..8373eb7f
--- /dev/null
+++ b/features/rt/net-jme-Replace-link-change-tasklet-with-work.patch
@@ -0,0 +1,87 @@
+From f5a683610df7d5b147cf18ed631f98f1813afec5 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 9 Mar 2021 09:42:11 +0100
+Subject: [PATCH 048/191] net: jme: Replace link-change tasklet with work
+
+The link change tasklet disables the tasklets for tx/rx processing while
+upating hw parameters and then enables the tasklets again.
+
+This update can also be pushed into a workqueue where it can be performed
+in preemptible context. This allows tasklet_disable() to become sleeping.
+
+Replace the linkch_task tasklet with a work.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/net/ethernet/jme.c | 10 +++++-----
+ drivers/net/ethernet/jme.h | 2 +-
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
+index e9efe074edc1..f1b9284e0bea 100644
+--- a/drivers/net/ethernet/jme.c
++++ b/drivers/net/ethernet/jme.c
+@@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapter *jme)
+ jwrite32f(jme, JME_APMC, apmc);
+ }
+
+-static void jme_link_change_tasklet(struct tasklet_struct *t)
++static void jme_link_change_work(struct work_struct *work)
+ {
+- struct jme_adapter *jme = from_tasklet(jme, t, linkch_task);
++ struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task);
+ struct net_device *netdev = jme->dev;
+ int rc;
+
+@@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u32 intrstat)
+ * all other events are ignored
+ */
+ jwrite32(jme, JME_IEVE, intrstat);
+- tasklet_schedule(&jme->linkch_task);
++ schedule_work(&jme->linkch_task);
+ goto out_reenable;
+ }
+
+@@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev)
+ jme_clear_pm_disable_wol(jme);
+ JME_NAPI_ENABLE(jme);
+
+- tasklet_setup(&jme->linkch_task, jme_link_change_tasklet);
+ tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet);
+ tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet);
+ tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet);
+@@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev)
+
+ JME_NAPI_DISABLE(jme);
+
+- tasklet_kill(&jme->linkch_task);
++ cancel_work_sync(&jme->linkch_task);
+ tasklet_kill(&jme->txclean_task);
+ tasklet_kill(&jme->rxclean_task);
+ tasklet_kill(&jme->rxempty_task);
+@@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev,
+ atomic_set(&jme->rx_empty, 1);
+
+ tasklet_setup(&jme->pcc_task, jme_pcc_tasklet);
++ INIT_WORK(&jme->linkch_task, jme_link_change_work);
+ jme->dpi.cur = PCC_P1;
+
+ jme->reg_ghc = 0;
+diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h
+index a2c3b00d939d..2af76329b4a2 100644
+--- a/drivers/net/ethernet/jme.h
++++ b/drivers/net/ethernet/jme.h
+@@ -411,7 +411,7 @@ struct jme_adapter {
+ struct tasklet_struct rxempty_task;
+ struct tasklet_struct rxclean_task;
+ struct tasklet_struct txclean_task;
+- struct tasklet_struct linkch_task;
++ struct work_struct linkch_task;
+ struct tasklet_struct pcc_task;
+ unsigned long flags;
+ u32 reg_txcs;
+--
+2.19.1
+
diff --git a/features/rt/net-sundance-Use-tasklet_disable_in_atomic.patch b/features/rt/net-sundance-Use-tasklet_disable_in_atomic.patch
new file mode 100644
index 00000000..93713951
--- /dev/null
+++ b/features/rt/net-sundance-Use-tasklet_disable_in_atomic.patch
@@ -0,0 +1,38 @@
+From 8281edf5e8fa71507a09c7511ea11da232e373af Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 9 Mar 2021 09:42:12 +0100
+Subject: [PATCH 049/191] net: sundance: Use tasklet_disable_in_atomic().
+
+tasklet_disable() is used in the timer callback. This might be distangled,
+but without access to the hardware that's a bit risky.
+
+Replace it with tasklet_disable_in_atomic() so tasklet_disable() can be
+changed to a sleep wait once all remaining atomic users are converted.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Denis Kirjanov <kda@linux-powerpc.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: netdev@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/net/ethernet/dlink/sundance.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
+index e3a8858915b3..df0eab479d51 100644
+--- a/drivers/net/ethernet/dlink/sundance.c
++++ b/drivers/net/ethernet/dlink/sundance.c
+@@ -963,7 +963,7 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue)
+ unsigned long flag;
+
+ netif_stop_queue(dev);
+- tasklet_disable(&np->tx_tasklet);
++ tasklet_disable_in_atomic(&np->tx_tasklet);
+ iowrite16(0, ioaddr + IntrEnable);
+ printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x "
+ "TxFrameId %2.2x,"
+--
+2.19.1
+
diff --git a/features/rt/notifier-Make-atomic_notifiers-use-raw_spinlock.patch b/features/rt/notifier-Make-atomic_notifiers-use-raw_spinlock.patch
new file mode 100644
index 00000000..9defbe15
--- /dev/null
+++ b/features/rt/notifier-Make-atomic_notifiers-use-raw_spinlock.patch
@@ -0,0 +1,131 @@
+From 074172587a9dfb0bae37a5663abb7e439376abfc Mon Sep 17 00:00:00 2001
+From: Valentin Schneider <valentin.schneider@arm.com>
+Date: Sun, 22 Nov 2020 20:19:04 +0000
+Subject: [PATCH 005/191] notifier: Make atomic_notifiers use raw_spinlock
+
+Booting a recent PREEMPT_RT kernel (v5.10-rc3-rt7-rebase) on my arm64 Juno
+leads to the idle task blocking on an RT sleeping spinlock down some
+notifier path:
+
+ [ 1.809101] BUG: scheduling while atomic: swapper/5/0/0x00000002
+ [ 1.809116] Modules linked in:
+ [ 1.809123] Preemption disabled at:
+ [ 1.809125] secondary_start_kernel (arch/arm64/kernel/smp.c:227)
+ [ 1.809146] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.10.0-rc3-rt7 #168
+ [ 1.809153] Hardware name: ARM Juno development board (r0) (DT)
+ [ 1.809158] Call trace:
+ [ 1.809160] dump_backtrace (arch/arm64/kernel/stacktrace.c:100 (discriminator 1))
+ [ 1.809170] show_stack (arch/arm64/kernel/stacktrace.c:198)
+ [ 1.809178] dump_stack (lib/dump_stack.c:122)
+ [ 1.809188] __schedule_bug (kernel/sched/core.c:4886)
+ [ 1.809197] __schedule (./arch/arm64/include/asm/preempt.h:18 kernel/sched/core.c:4913 kernel/sched/core.c:5040)
+ [ 1.809204] preempt_schedule_lock (kernel/sched/core.c:5365 (discriminator 1))
+ [ 1.809210] rt_spin_lock_slowlock_locked (kernel/locking/rtmutex.c:1072)
+ [ 1.809217] rt_spin_lock_slowlock (kernel/locking/rtmutex.c:1110)
+ [ 1.809224] rt_spin_lock (./include/linux/rcupdate.h:647 kernel/locking/rtmutex.c:1139)
+ [ 1.809231] atomic_notifier_call_chain_robust (kernel/notifier.c:71 kernel/notifier.c:118 kernel/notifier.c:186)
+ [ 1.809240] cpu_pm_enter (kernel/cpu_pm.c:39 kernel/cpu_pm.c:93)
+ [ 1.809249] psci_enter_idle_state (drivers/cpuidle/cpuidle-psci.c:52 drivers/cpuidle/cpuidle-psci.c:129)
+ [ 1.809258] cpuidle_enter_state (drivers/cpuidle/cpuidle.c:238)
+ [ 1.809267] cpuidle_enter (drivers/cpuidle/cpuidle.c:353)
+ [ 1.809275] do_idle (kernel/sched/idle.c:132 kernel/sched/idle.c:213 kernel/sched/idle.c:273)
+ [ 1.809282] cpu_startup_entry (kernel/sched/idle.c:368 (discriminator 1))
+ [ 1.809288] secondary_start_kernel (arch/arm64/kernel/smp.c:273)
+
+Two points worth noting:
+
+1) That this is conceptually the same issue as pointed out in:
+ 313c8c16ee62 ("PM / CPU: replace raw_notifier with atomic_notifier")
+2) Only the _robust() variant of atomic_notifier callchains suffer from
+ this
+
+AFAICT only the cpu_pm_notifier_chain really needs to be changed, but
+singling it out would mean introducing a new (truly) non-blocking API. At
+the same time, callers that are fine with any blocking within the call
+chain should use blocking notifiers, so patching up all atomic_notifier's
+doesn't seem *too* crazy to me.
+
+Fixes: 70d932985757 ("notifier: Fix broken error handling pattern")
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
+Link: https://lkml.kernel.org/r/20201122201904.30940-1-valentin.schneider@arm.com
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/notifier.h | 6 +++---
+ kernel/notifier.c | 12 ++++++------
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/notifier.h b/include/linux/notifier.h
+index 2fb373a5c1ed..723bc2df6388 100644
+--- a/include/linux/notifier.h
++++ b/include/linux/notifier.h
+@@ -58,7 +58,7 @@ struct notifier_block {
+ };
+
+ struct atomic_notifier_head {
+- spinlock_t lock;
++ raw_spinlock_t lock;
+ struct notifier_block __rcu *head;
+ };
+
+@@ -78,7 +78,7 @@ struct srcu_notifier_head {
+ };
+
+ #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \
+- spin_lock_init(&(name)->lock); \
++ raw_spin_lock_init(&(name)->lock); \
+ (name)->head = NULL; \
+ } while (0)
+ #define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \
+@@ -95,7 +95,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
+ cleanup_srcu_struct(&(name)->srcu);
+
+ #define ATOMIC_NOTIFIER_INIT(name) { \
+- .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
++ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+ .head = NULL }
+ #define BLOCKING_NOTIFIER_INIT(name) { \
+ .rwsem = __RWSEM_INITIALIZER((name).rwsem), \
+diff --git a/kernel/notifier.c b/kernel/notifier.c
+index 1b019cbca594..c20782f07643 100644
+--- a/kernel/notifier.c
++++ b/kernel/notifier.c
+@@ -142,9 +142,9 @@ int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+ unsigned long flags;
+ int ret;
+
+- spin_lock_irqsave(&nh->lock, flags);
++ raw_spin_lock_irqsave(&nh->lock, flags);
+ ret = notifier_chain_register(&nh->head, n);
+- spin_unlock_irqrestore(&nh->lock, flags);
++ raw_spin_unlock_irqrestore(&nh->lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
+@@ -164,9 +164,9 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+ unsigned long flags;
+ int ret;
+
+- spin_lock_irqsave(&nh->lock, flags);
++ raw_spin_lock_irqsave(&nh->lock, flags);
+ ret = notifier_chain_unregister(&nh->head, n);
+- spin_unlock_irqrestore(&nh->lock, flags);
++ raw_spin_unlock_irqrestore(&nh->lock, flags);
+ synchronize_rcu();
+ return ret;
+ }
+@@ -182,9 +182,9 @@ int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
+ * Musn't use RCU; because then the notifier list can
+ * change between the up and down traversal.
+ */
+- spin_lock_irqsave(&nh->lock, flags);
++ raw_spin_lock_irqsave(&nh->lock, flags);
+ ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
+- spin_unlock_irqrestore(&nh->lock, flags);
++ raw_spin_unlock_irqrestore(&nh->lock, flags);
+
+ return ret;
+ }
+--
+2.19.1
+
diff --git a/features/rt/panic-skip-get_random_bytes-for-RT_FULL-in-init_oops.patch b/features/rt/panic-skip-get_random_bytes-for-RT_FULL-in-init_oops.patch
new file mode 100644
index 00000000..b8f71482
--- /dev/null
+++ b/features/rt/panic-skip-get_random_bytes-for-RT_FULL-in-init_oops.patch
@@ -0,0 +1,33 @@
+From 487bb91d3835707e8324dfddf3a2a524072e62ef Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 14 Jul 2015 14:26:34 +0200
+Subject: [PATCH 147/191] panic: skip get_random_bytes for RT_FULL in
+ init_oops_id
+
+Disable on -RT. If this is invoked from irq-context we will have problems
+to acquire the sleeping lock.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/panic.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/kernel/panic.c b/kernel/panic.c
+index 678529917d5d..14017d1b77b0 100644
+--- a/kernel/panic.c
++++ b/kernel/panic.c
+@@ -544,9 +544,11 @@ static u64 oops_id;
+
+ static int init_oops_id(void)
+ {
++#ifndef CONFIG_PREEMPT_RT
+ if (!oops_id)
+ get_random_bytes(&oops_id, sizeof(oops_id));
+ else
++#endif
+ oops_id++;
+
+ return 0;
+--
+2.19.1
+
diff --git a/features/rt/pid.h-include-atomic.h.patch b/features/rt/pid.h-include-atomic.h.patch
new file mode 100644
index 00000000..abe97a5c
--- /dev/null
+++ b/features/rt/pid.h-include-atomic.h.patch
@@ -0,0 +1,42 @@
+From e1d5c19b4e1fffa27d74e71fc8e15d416b2e15ea Mon Sep 17 00:00:00 2001
+From: Grygorii Strashko <Grygorii.Strashko@linaro.org>
+Date: Tue, 21 Jul 2015 19:43:56 +0300
+Subject: [PATCH 127/191] pid.h: include atomic.h
+
+This patch fixes build error:
+ CC kernel/pid_namespace.o
+In file included from kernel/pid_namespace.c:11:0:
+include/linux/pid.h: In function 'get_pid':
+include/linux/pid.h:78:3: error: implicit declaration of function 'atomic_inc' [-Werror=implicit-function-declaration]
+ atomic_inc(&pid->count);
+ ^
+which happens when
+ CONFIG_PROVE_LOCKING=n
+ CONFIG_DEBUG_SPINLOCK=n
+ CONFIG_DEBUG_MUTEXES=n
+ CONFIG_DEBUG_LOCK_ALLOC=n
+ CONFIG_PID_NS=y
+
+Vanilla gets this via spinlock.h.
+
+Signed-off-by: Grygorii Strashko <Grygorii.Strashko@linaro.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/pid.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/include/linux/pid.h b/include/linux/pid.h
+index fa10acb8d6a4..2f86f84e9fc1 100644
+--- a/include/linux/pid.h
++++ b/include/linux/pid.h
+@@ -3,6 +3,7 @@
+ #define _LINUX_PID_H
+
+ #include <linux/rculist.h>
++#include <linux/atomic.h>
+ #include <linux/wait.h>
+ #include <linux/refcount.h>
+
+--
+2.19.1
+
diff --git a/features/rt/powerpc-Add-support-for-lazy-preemption.patch b/features/rt/powerpc-Add-support-for-lazy-preemption.patch
new file mode 100644
index 00000000..4cc73824
--- /dev/null
+++ b/features/rt/powerpc-Add-support-for-lazy-preemption.patch
@@ -0,0 +1,215 @@
+From 2525cb41cb38be3c1639aed5deb00823f5a472f4 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 1 Nov 2012 10:14:11 +0100
+Subject: [PATCH 167/191] powerpc: Add support for lazy preemption
+
+Implement the powerpc pieces for lazy preempt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/powerpc/Kconfig | 1 +
+ arch/powerpc/include/asm/thread_info.h | 7 +++++++
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kernel/entry_32.S | 11 +++++++++--
+ arch/powerpc/kernel/exceptions-64e.S | 16 ++++++++++++----
+ arch/powerpc/kernel/interrupt.c | 10 +++++++---
+ 6 files changed, 37 insertions(+), 9 deletions(-)
+
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index 386ae12d8523..bbee9b2f2bc7 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -231,6 +231,7 @@ config PPC
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
++ select HAVE_PREEMPT_LAZY
+ select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_PAGE_SIZE
+ select HAVE_REGS_AND_STACK_ACCESS_API
+diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
+index 386d576673a1..730ce15944ce 100644
+--- a/arch/powerpc/include/asm/thread_info.h
++++ b/arch/powerpc/include/asm/thread_info.h
+@@ -48,6 +48,8 @@
+ struct thread_info {
+ int preempt_count; /* 0 => preemptable,
+ <0 => BUG */
++ int preempt_lazy_count; /* 0 => preemptable,
++ <0 => BUG */
+ unsigned long local_flags; /* private flags for thread */
+ #ifdef CONFIG_LIVEPATCH
+ unsigned long *livepatch_sp;
+@@ -94,6 +96,7 @@ void arch_setup_new_exec(void);
+ #define TIF_PATCH_PENDING 6 /* pending live patching update */
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
+ #define TIF_SINGLESTEP 8 /* singlestepping active */
++#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
+ #define TIF_SECCOMP 10 /* secure computing */
+ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
+ #define TIF_NOERROR 12 /* Force successful syscall return */
+@@ -109,6 +112,7 @@ void arch_setup_new_exec(void);
+ #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+ #define TIF_32BIT 20 /* 32 bit binary */
+
++
+ /* as above, but as bit values */
+ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
+@@ -120,6 +124,7 @@ void arch_setup_new_exec(void);
+ #define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
+ #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
++#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
+ #define _TIF_SECCOMP (1<<TIF_SECCOMP)
+ #define _TIF_RESTOREALL (1<<TIF_RESTOREALL)
+ #define _TIF_NOERROR (1<<TIF_NOERROR)
+@@ -133,10 +138,12 @@ void arch_setup_new_exec(void);
+ _TIF_SYSCALL_EMU)
+
+ #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
++ _TIF_NEED_RESCHED_LAZY | \
+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
+ _TIF_NOTIFY_SIGNAL)
+ #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
+ /* Bits in local_flags */
+ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index f3a662201a9f..1202c9c2e5b5 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -191,6 +191,7 @@ int main(void)
+ OFFSET(TI_FLAGS, thread_info, flags);
+ OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
+ OFFSET(TI_PREEMPT, thread_info, preempt_count);
++ OFFSET(TI_PREEMPT_LAZY, thread_info, preempt_lazy_count);
+
+ #ifdef CONFIG_PPC64
+ OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
+diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
+index 78c430b7f9d9..b778938b4a5b 100644
+--- a/arch/powerpc/kernel/entry_32.S
++++ b/arch/powerpc/kernel/entry_32.S
+@@ -674,7 +674,14 @@ resume_kernel:
+ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
+ bne restore_kuap
+ andi. r8,r8,_TIF_NEED_RESCHED
++ bne+ 1f
++ lwz r0,TI_PREEMPT_LAZY(r2)
++ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
++ bne restore_kuap
++ lwz r0,TI_FLAGS(r2)
++ andi. r0,r0,_TIF_NEED_RESCHED_LAZY
+ beq+ restore_kuap
++1:
+ lwz r3,_MSR(r1)
+ andi. r0,r3,MSR_EE /* interrupts off? */
+ beq restore_kuap /* don't schedule if so */
+@@ -989,7 +996,7 @@ global_dbcr0:
+ #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
+
+ do_work: /* r10 contains MSR_KERNEL here */
+- andi. r0,r9,_TIF_NEED_RESCHED
++ andi. r0,r9,_TIF_NEED_RESCHED_MASK
+ beq do_user_signal
+
+ do_resched: /* r10 contains MSR_KERNEL here */
+@@ -1008,7 +1015,7 @@ recheck:
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
+ mtmsr r10 /* disable interrupts */
+ lwz r9,TI_FLAGS(r2)
+- andi. r0,r9,_TIF_NEED_RESCHED
++ andi. r0,r9,_TIF_NEED_RESCHED_MASK
+ bne- do_resched
+ andi. r0,r9,_TIF_USER_WORK_MASK
+ beq restore_user
+diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
+index e8eb9992a270..6eb9599a3262 100644
+--- a/arch/powerpc/kernel/exceptions-64e.S
++++ b/arch/powerpc/kernel/exceptions-64e.S
+@@ -1074,7 +1074,7 @@ _GLOBAL(ret_from_except_lite)
+ li r10, -1
+ mtspr SPRN_DBSR,r10
+ b restore
+-1: andi. r0,r4,_TIF_NEED_RESCHED
++1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
+ beq 2f
+ bl restore_interrupts
+ SCHEDULE_USER
+@@ -1126,12 +1126,20 @@ resume_kernel:
+ bne- 0b
+ 1:
+
+-#ifdef CONFIG_PREEMPT
++#ifdef CONFIG_PREEMPTION
+ /* Check if we need to preempt */
++ lwz r8,TI_PREEMPT(r9)
++ cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
++ bne restore
+ andi. r0,r4,_TIF_NEED_RESCHED
++ bne+ check_count
++
++ andi. r0,r4,_TIF_NEED_RESCHED_LAZY
+ beq+ restore
++ lwz r8,TI_PREEMPT_LAZY(r9)
++
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+- lwz r8,TI_PREEMPT(r9)
++check_count:
+ cmpwi cr0,r8,0
+ bne restore
+ ld r0,SOFTE(r1)
+@@ -1152,7 +1160,7 @@ resume_kernel:
+ * interrupted after loading SRR0/1.
+ */
+ wrteei 0
+-#endif /* CONFIG_PREEMPT */
++#endif /* CONFIG_PREEMPTION */
+
+ restore:
+ /*
+diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
+index c475a229a42a..d6b69de1284c 100644
+--- a/arch/powerpc/kernel/interrupt.c
++++ b/arch/powerpc/kernel/interrupt.c
+@@ -286,7 +286,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ ti_flags = READ_ONCE(current_thread_info()->flags);
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable();
+- if (ti_flags & _TIF_NEED_RESCHED) {
++ if (ti_flags & _TIF_NEED_RESCHED_MASK) {
+ schedule();
+ } else {
+ /*
+@@ -381,7 +381,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
+ ti_flags = READ_ONCE(current_thread_info()->flags);
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable(); /* returning to user: may enable */
+- if (ti_flags & _TIF_NEED_RESCHED) {
++ if (ti_flags & _TIF_NEED_RESCHED_MASK) {
+ schedule();
+ } else {
+ if (ti_flags & _TIF_SIGPENDING)
+@@ -473,11 +473,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
+ /* Returning to a kernel context with local irqs enabled. */
+ WARN_ON_ONCE(!(regs->msr & MSR_EE));
+ again:
+- if (IS_ENABLED(CONFIG_PREEMPT)) {
++ if (IS_ENABLED(CONFIG_PREEMPTION)) {
+ /* Return to preemptible kernel context */
+ if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
+ if (preempt_count() == 0)
+ preempt_schedule_irq();
++ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
++ if ((preempt_count() == 0) &&
++ (current_thread_info()->preempt_lazy_count == 0))
++ preempt_schedule_irq();
+ }
+ }
+
+--
+2.19.1
+
diff --git a/features/rt/powerpc-Avoid-recursive-header-includes.patch b/features/rt/powerpc-Avoid-recursive-header-includes.patch
new file mode 100644
index 00000000..cfecd330
--- /dev/null
+++ b/features/rt/powerpc-Avoid-recursive-header-includes.patch
@@ -0,0 +1,47 @@
+From b8c2c7090ec3f3373205ff4b223ac447159c4c96 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 8 Jan 2021 19:48:21 +0100
+Subject: [PATCH 184/191] powerpc: Avoid recursive header includes
+
+- The include of bug.h leads to an include of printk.h which gets back
+ to spinlock.h and complains then about missing xchg().
+ Remove bug.h and add bits.h which is needed for BITS_PER_BYTE.
+
+- Avoid the "please don't include this file directly" error from
+ rwlock-rt. Allow an include from/with rtmutex.h.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/powerpc/include/asm/cmpxchg.h | 2 +-
+ arch/powerpc/include/asm/simple_spinlock_types.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
+index cf091c4c22e5..7371f7e23c35 100644
+--- a/arch/powerpc/include/asm/cmpxchg.h
++++ b/arch/powerpc/include/asm/cmpxchg.h
+@@ -5,7 +5,7 @@
+ #ifdef __KERNEL__
+ #include <linux/compiler.h>
+ #include <asm/synch.h>
+-#include <linux/bug.h>
++#include <linux/bits.h>
+
+ #ifdef __BIG_ENDIAN
+ #define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE)
+diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
+index 0f3cdd8faa95..d45561e9e6ba 100644
+--- a/arch/powerpc/include/asm/simple_spinlock_types.h
++++ b/arch/powerpc/include/asm/simple_spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+ #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H)
+ # error "please don't include this file directly"
+ #endif
+
+--
+2.19.1
+
diff --git a/features/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch b/features/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch
new file mode 100644
index 00000000..bc6061ec
--- /dev/null
+++ b/features/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch
@@ -0,0 +1,44 @@
+From 4496674f6fc7117a30d0c2ea5c994c9675808e69 Mon Sep 17 00:00:00 2001
+From: Bogdan Purcareata <bogdan.purcareata@freescale.com>
+Date: Fri, 24 Apr 2015 15:53:13 +0000
+Subject: [PATCH 182/191] powerpc/kvm: Disable in-kernel MPIC emulation for
+ PREEMPT_RT
+
+While converting the openpic emulation code to use a raw_spinlock_t enables
+guests to run on RT, there's still a performance issue. For interrupts sent in
+directed delivery mode with a multiple CPU mask, the emulated openpic will loop
+through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop
+through all the pending interrupts for that VCPU. This is done while holding the
+raw_lock, meaning that in all this time the interrupts and preemption are
+disabled on the host Linux. A malicious user app can max both these number and
+cause a DoS.
+
+This temporary fix is sent for two reasons. First is so that users who want to
+use the in-kernel MPIC emulation are aware of the potential latencies, thus
+making sure that the hardware MPIC and their usage scenario does not involve
+interrupts sent in directed delivery mode, and the number of possible pending
+interrupts is kept small. Secondly, this should incentivize the development of a
+proper openpic emulation that would be better suited for RT.
+
+Acked-by: Scott Wood <scottwood@freescale.com>
+Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/powerpc/kvm/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
+index e45644657d49..b826174ce983 100644
+--- a/arch/powerpc/kvm/Kconfig
++++ b/arch/powerpc/kvm/Kconfig
+@@ -179,6 +179,7 @@ config KVM_E500MC
+ config KVM_MPIC
+ bool "KVM in-kernel MPIC emulation"
+ depends on KVM && E500
++ depends on !PREEMPT_RT
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
+ select HAVE_KVM_IRQ_ROUTING
+--
+2.19.1
+
diff --git a/features/rt/powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch b/features/rt/powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch
new file mode 100644
index 00000000..9f28057d
--- /dev/null
+++ b/features/rt/powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch
@@ -0,0 +1,43 @@
+From 36035164cb4bbf6cc044ff5d6f481c528ec34fb3 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 19 Feb 2021 17:51:07 +0100
+Subject: [PATCH 006/191] powerpc/mm: Move the linear_mapping_mutex to the
+ ifdef where it is used
+
+The mutex linear_mapping_mutex is defined at the of the file while its
+only two user are within the CONFIG_MEMORY_HOTPLUG block.
+A compile without CONFIG_MEMORY_HOTPLUG set fails on PREEMPT_RT because
+its mutex implementation is smart enough to realize that it is unused.
+
+Move the definition of linear_mapping_mutex to ifdef block where it is
+used.
+
+Fixes: 1f73ad3e8d755 ("powerpc/mm: print warning in arch_remove_linear_mapping()")
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/powerpc/mm/mem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
+index 4e8ce6d85232..7a59a5c9aa5d 100644
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -54,7 +54,6 @@
+
+ #include <mm/mmu_decl.h>
+
+-static DEFINE_MUTEX(linear_mapping_mutex);
+ unsigned long long memory_limit;
+ bool init_mem_is_free;
+
+@@ -72,6 +71,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ EXPORT_SYMBOL(phys_mem_access_prot);
+
+ #ifdef CONFIG_MEMORY_HOTPLUG
++static DEFINE_MUTEX(linear_mapping_mutex);
+
+ #ifdef CONFIG_NUMA
+ int memory_add_physaddr_to_nid(u64 start)
+--
+2.19.1
+
diff --git a/features/rt/powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch b/features/rt/powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch
new file mode 100644
index 00000000..d3850690
--- /dev/null
+++ b/features/rt/powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch
@@ -0,0 +1,116 @@
+From 4478a8edce91a9ecc083e0a1c892bc61e03debb5 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 26 Mar 2019 18:31:54 +0100
+Subject: [PATCH 181/191] powerpc/pseries/iommu: Use a locallock instead
+ local_irq_save()
+
+The locallock protects the per-CPU variable tce_page. The function
+attempts to allocate memory while tce_page is protected (by disabling
+interrupts).
+
+Use local_irq_save() instead of local_irq_disable().
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/powerpc/platforms/pseries/iommu.c | 31 +++++++++++++++++---------
+ 1 file changed, 20 insertions(+), 11 deletions(-)
+
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index 9fc5217f0c8e..4fdb9370b913 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -24,6 +24,7 @@
+ #include <linux/of.h>
+ #include <linux/iommu.h>
+ #include <linux/rculist.h>
++#include <linux/local_lock.h>
+ #include <asm/io.h>
+ #include <asm/prom.h>
+ #include <asm/rtas.h>
+@@ -190,7 +191,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ return ret;
+ }
+
+-static DEFINE_PER_CPU(__be64 *, tce_page);
++struct tce_page {
++ __be64 * page;
++ local_lock_t lock;
++};
++static DEFINE_PER_CPU(struct tce_page, tce_page) = {
++ .lock = INIT_LOCAL_LOCK(lock),
++};
+
+ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ long npages, unsigned long uaddr,
+@@ -212,9 +219,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ direction, attrs);
+ }
+
+- local_irq_save(flags); /* to protect tcep and the page behind it */
++ /* to protect tcep and the page behind it */
++ local_lock_irqsave(&tce_page.lock, flags);
+
+- tcep = __this_cpu_read(tce_page);
++ tcep = __this_cpu_read(tce_page.page);
+
+ /* This is safe to do since interrupts are off when we're called
+ * from iommu_alloc{,_sg}()
+@@ -223,12 +231,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ /* If allocation fails, fall back to the loop implementation */
+ if (!tcep) {
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&tce_page.lock, flags);
+ return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift,
+ npages, uaddr, direction, attrs);
+ }
+- __this_cpu_write(tce_page, tcep);
++ __this_cpu_write(tce_page.page, tcep);
+ }
+
+ rpn = __pa(uaddr) >> TCE_SHIFT;
+@@ -258,7 +266,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ tcenum += limit;
+ } while (npages > 0 && !rc);
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(&tce_page.lock, flags);
+
+ if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ ret = (int)rc;
+@@ -429,16 +437,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+ DMA_BIDIRECTIONAL, 0);
+ }
+
+- local_irq_disable(); /* to protect tcep and the page behind it */
+- tcep = __this_cpu_read(tce_page);
++ /* to protect tcep and the page behind it */
++ local_lock_irq(&tce_page.lock);
++ tcep = __this_cpu_read(tce_page.page);
+
+ if (!tcep) {
+ tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ if (!tcep) {
+- local_irq_enable();
++ local_unlock_irq(&tce_page.lock);
+ return -ENOMEM;
+ }
+- __this_cpu_write(tce_page, tcep);
++ __this_cpu_write(tce_page.page, tcep);
+ }
+
+ proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+@@ -481,7 +490,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+
+ /* error cleanup: caller will clear whole range */
+
+- local_irq_enable();
++ local_unlock_irq(&tce_page.lock);
+ return rc;
+ }
+
+--
+2.19.1
+
diff --git a/features/rt/powerpc-stackprotector-work-around-stack-guard-init-.patch b/features/rt/powerpc-stackprotector-work-around-stack-guard-init-.patch
new file mode 100644
index 00000000..4d3d3734
--- /dev/null
+++ b/features/rt/powerpc-stackprotector-work-around-stack-guard-init-.patch
@@ -0,0 +1,35 @@
+From 77000da2f2cf79d495a6d07abfbe32ebf4cd279f Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 26 Mar 2019 18:31: