From cb1f7b18369783e83ecba9f7c6c6708104461527 Mon Sep 17 00:00:00 2001 From: Daniel Dragomir Date: Mon, 16 Nov 2020 21:11:52 +0200 Subject: sched/core: Fix CPU offline/online process This error was introduced in commit d02636e8d3 ("Merge branch 'v4.19/standard/base' into v4.19/standard/preempt-rt/base") when a merge conflict has overwrited the changes from commit 373491f1f4 ("sched/core: Fix illegal RCU from offline CPUs"). This BUG occurs when running cpu_hotplug_stress testcase from KTS or when stressing one CPU by switching it online/offline (echo 0 and 1 in /sys/devices/system/cpu/cpu0/online). kernel BUG at kernel/fork.c:633! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: Process sh (pid: 2016, stack limit = 0x00000000ca5ad524) CPU: 10 PID: 2016 Comm: sh Not tainted 4.19.131-rt30 axxia_gen2 Hardware name: AXM56xx Victoria (DT) pstate: 60000005 (nZCv daif -PAN -UAO) pc : __mmdrop+0x120/0x138 lr : finish_cpu+0x68/0x70 sp : ffffff8012153a40 x29: ffffff8012153a40 x28: ffffffc07b389e00 x27: 0000000000000000 x26: 000000407610f000 x25: ffffffc07ee62ee0 x24: 0000000000000000 x23: ffffff80080dfc20 x22: ffffff8008d9a9d8 x21: ffffff8008d53ee0 x20: 0000000000000055 x19: ffffff8008dc1b58 x18: 0000000000000001 x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff x14: ffffff8008d89688 x13: ffffff8088d918d7 x12: ffffff8008d918e0 x11: ffffff8008da4000 x10: ffffff8012153a00 x9 : 00000000ffffffd0 x8 : ffffff80121539d0 x7 : 0000000000000000 x6 : 0000000000000001 x5 : 00000000fffffffb x4 : 0000000000000030 x3 : 0000000000000030 x2 : ffffff8008dc1b58 x1 : ffffff8008dc1ba4 x0 : ffffff8008dc1b58 Call trace: __mmdrop+0x120/0x138 finish_cpu+0x68/0x70 cpuhp_invoke_callback+0xa8/0x800 _cpu_down+0x100/0x220 do_cpu_down+0x48/0x70 cpu_down+0x24/0x30 cpu_subsys_offline+0x20/0x30 device_offline+0xa4/0xd0 online_store+0x60/0xc0 dev_attr_store+0x44/0x60 sysfs_kf_write+0x5c/0x78 kernfs_fop_write+0xd8/0x1d0 __vfs_write+0x60/0x178 vfs_write+0xac/0x1b0 ksys_write+0x74/0xe8 __arm64_sys_write+0x24/0x30 el0_svc_common+0xa4/0x1a0 el0_svc_compat_handler+0x30/0x40 el0_svc_compat+0x8/0x18 Code: a94153f3 a9425bf5 a8c37bfd d65f03c0 (d4210000) ---[ end trace 0000000000000002 ]--- BUG: Bad rss-counter state mm:000000001136ddcb idx:0 val:1012 BUG: Bad rss-counter state mm:000000001136ddcb idx:1 val:5902 BUG: non-zero pgtables_bytes on freeing mm: 180224 Signed-off-by: Daniel Dragomir Signed-off-by: Bruce Ashfield --- kernel/sched/core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 218b93a31000..c62dbf9eebdf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5702,11 +5702,8 @@ void idle_task_exit(void) switch_mm(mm, &init_mm, current); finish_arch_post_lock_switch(); } - /* - * Defer the cleanup to an alive cpu. On RT we can neither - * call mmdrop() nor mmdrop_delayed() from here. - */ - per_cpu(idle_last_mm, smp_processor_id()) = mm; + + /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } /* -- cgit v1.2.2-1-g5e49 From 978035cc9ec3395fa8f34daa17ebd1e48bb5a13a Mon Sep 17 00:00:00 2001 From: Daniel Dragomir Date: Mon, 16 Nov 2020 21:11:53 +0200 Subject: net: core: Fix dead loop on virtual device lo Multiple alarming messages are shown on running tcp bandwidth test from lmbench suite and the test fails with timeout: Dead loop on virtual device lo, fix it urgently! The cause for this failure is commit edbe65322391 ("net: place xmit recursion in softnet data") which overwrites some preempt-rt specific changes (PREEMPT_RT_FULL) from commit 7963359759cb ("net: move xmit_recursion to per-task variable on -RT "). The original version for commit edbe65322391 from kernel.org: https://lkml.org/lkml/2020/6/29/1000 Signed-off-by: Daniel Dragomir Signed-off-by: Bruce Ashfield --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 6f7e36ef026b..17343961fa24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3834,7 +3834,11 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ +#ifdef CONFIG_PREEMPT_RT_FULL + if (txq->xmit_lock_owner != current) { +#else if (txq->xmit_lock_owner != cpu) { +#endif if (dev_xmit_recursion()) goto recursion_alert; -- cgit v1.2.2-1-g5e49