diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch new file mode 100644 index 00000000..6e359599 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch @@ -0,0 +1,141 @@ +From 31873c3b437931c67ffc98c7246c15bf213a46b1 Mon Sep 17 00:00:00 2001 +From: Philip Yang <Philip.Yang@amd.com> +Date: Fri, 6 Sep 2019 13:20:40 -0400 +Subject: [PATCH 3737/4256] drm/amdgpu: check if nbio->ras_if exist + +To avoid NULL function pointer access. This happens on VG10, reboot +command hangs and have to power off/on to reboot the machine. This is +serial console log: + +[ OK ] Reached target Unmount All Filesystems. +[ OK ] Reached target Final Step. + Starting Reboot... +[ 305.696271] systemd-shutdown[1]: Syncing filesystems and block +devices. +[ 306.947328] systemd-shutdown[1]: Sending SIGTERM to remaining +processes... +[ 306.963920] systemd-journald[1722]: Received SIGTERM from PID 1 +(systemd-shutdow). +[ 307.322717] systemd-shutdown[1]: Sending SIGKILL to remaining +processes... +[ 307.336472] systemd-shutdown[1]: Unmounting file systems. +[ 307.454202] EXT4-fs (sda2): re-mounted. Opts: errors=remount-ro +[ 307.480523] systemd-shutdown[1]: All filesystems unmounted. +[ 307.486537] systemd-shutdown[1]: Deactivating swaps. +[ 307.491962] systemd-shutdown[1]: All swaps deactivated. +[ 307.497624] systemd-shutdown[1]: Detaching loop devices. +[ 307.504418] systemd-shutdown[1]: All loop devices detached. +[ 307.510418] systemd-shutdown[1]: Detaching DM devices. +[ 307.565907] sd 2:0:0:0: [sda] Synchronizing SCSI cache +[ 307.731313] BUG: kernel NULL pointer dereference, address: +0000000000000000 +[ 307.738802] #PF: supervisor read access in kernel mode +[ 307.744326] #PF: error_code(0x0000) - not-present page +[ 307.749850] PGD 0 P4D 0 +[ 307.752568] Oops: 0000 [#1] SMP PTI +[ 307.756314] CPU: 3 PID: 1 Comm: systemd-shutdow Not tainted +5.2.0-rc1-kfd-yangp #453 +[ 307.764644] Hardware name: ASUS All Series/Z97-PRO(Wi-Fi ac)/USB 3.1, +BIOS 9001 03/07/2016 +[ 307.773580] RIP: 0010:soc15_common_hw_fini+0x33/0xc0 [amdgpu] +[ 307.779760] Code: 89 fb e8 60 f5 ff ff f6 83 50 df 01 00 04 75 3d 48 +8b b3 90 7d 00 00 48 c7 c7 17 b8 530 +[ 307.799967] RSP: 0018:ffffac9483153d40 EFLAGS: 00010286 +[ 307.805585] RAX: 0000000000000000 RBX: ffff9eb299da0000 RCX: +0000000000000006 +[ 307.813261] RDX: 0000000000000000 RSI: ffff9eb29e3508a0 RDI: +ffff9eb29e350000 +[ 307.820935] RBP: ffff9eb299da0000 R08: 0000000000000000 R09: +0000000000000000 +[ 307.828609] R10: 0000000000000000 R11: 0000000000000000 R12: +ffff9eb299dbd1f8 +[ 307.836284] R13: ffffffffc04f8368 R14: ffff9eb29cebd130 R15: +0000000000000000 +[ 307.843959] FS: 00007f06721c9940(0000) GS:ffff9eb2a18c0000(0000) +knlGS:0000000000000000 +[ 307.852663] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 307.858842] CR2: 0000000000000000 CR3: 000000081d798005 CR4: +00000000001606e0 +[ 307.866516] Call Trace: +[ 307.869169] amdgpu_device_ip_suspend_phase2+0x80/0x110 [amdgpu] +[ 307.875654] ? amdgpu_device_ip_suspend_phase1+0x4d/0xd0 [amdgpu] +[ 307.882230] amdgpu_device_ip_suspend+0x2e/0x60 [amdgpu] +[ 307.887966] amdgpu_pci_shutdown+0x2f/0x40 [amdgpu] +[ 307.893211] pci_device_shutdown+0x31/0x60 +[ 307.897613] device_shutdown+0x14c/0x1f0 +[ 307.901829] kernel_restart+0xe/0x50 +[ 307.905669] __do_sys_reboot+0x1df/0x210 +[ 307.909884] ? task_work_run+0x73/0xb0 +[ 307.913914] ? trace_hardirqs_off_thunk+0x1a/0x1c +[ 307.918970] do_syscall_64+0x4a/0x1c0 +[ 307.922904] entry_SYSCALL_64_after_hwframe+0x49/0xbe +[ 307.928336] RIP: 0033:0x7f0671cf8373 +[ 307.932176] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 +00 0f 1f 44 00 00 89 fa be 69 19 128 +[ 307.952384] RSP: 002b:00007ffdd1723d68 EFLAGS: 00000202 ORIG_RAX: +00000000000000a9 +[ 307.960527] RAX: ffffffffffffffda RBX: 0000000001234567 RCX: +00007f0671cf8373 +[ 307.968201] RDX: 0000000001234567 RSI: 0000000028121969 RDI: +00000000fee1dead +[ 307.975875] RBP: 00007ffdd1723dd0 R08: 0000000000000000 R09: +0000000000000000 +[ 307.983550] R10: 0000000000000002 R11: 0000000000000202 R12: +00007ffdd1723dd8 +[ 307.991224] R13: 0000000000000000 R14: 0000001b00000004 R15: +00007ffdd17240c8 +[ 307.998901] Modules linked in: xt_MASQUERADE nfnetlink iptable_nat +xt_addrtype xt_conntrack nf_nat nf_cos +[ 308.026505] CR2: 0000000000000000 +[ 308.039998] RIP: 0010:soc15_common_hw_fini+0x33/0xc0 [amdgpu] +[ 308.046180] Code: 89 fb e8 60 f5 ff ff f6 83 50 df 01 00 04 75 3d 48 +8b b3 90 7d 00 00 48 c7 c7 17 b8 530 +[ 308.066392] RSP: 0018:ffffac9483153d40 EFLAGS: 00010286 +[ 308.072013] RAX: 0000000000000000 RBX: ffff9eb299da0000 RCX: +0000000000000006 +[ 308.079689] RDX: 0000000000000000 RSI: ffff9eb29e3508a0 RDI: +ffff9eb29e350000 +[ 308.087366] RBP: ffff9eb299da0000 R08: 0000000000000000 R09: +0000000000000000 +[ 308.095042] R10: 0000000000000000 R11: 0000000000000000 R12: +ffff9eb299dbd1f8 +[ 308.102717] R13: ffffffffc04f8368 R14: ffff9eb29cebd130 R15: +0000000000000000 +[ 308.110394] FS: 00007f06721c9940(0000) GS:ffff9eb2a18c0000(0000) +knlGS:0000000000000000 +[ 308.119099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 308.125280] CR2: 0000000000000000 CR3: 000000081d798005 CR4: +00000000001606e0 +[ 308.135304] printk: systemd-shutdow: 3 output lines suppressed due to +ratelimiting +[ 308.143518] Kernel panic - not syncing: Attempted to kill init! +exitcode=0x00000009 +[ 308.151798] Kernel Offset: 0x15000000 from 0xffffffff81000000 +(relocation range: 0xffffffff80000000-0xff) +[ 308.171775] ---[ end Kernel panic - not syncing: Attempted to kill +init! exitcode=0x00000009 ]--- + +Change-Id: If694f75e893f95c44d594877f552380a77c03ec4 +Signed-off-by: Philip Yang <Philip.Yang@amd.com> +Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c +index cb22970c0853..7c7e9f550c02 100644 +--- a/drivers/gpu/drm/amd/amdgpu/soc15.c ++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c +@@ -1302,7 +1302,8 @@ static int soc15_common_hw_fini(void *handle) + if (amdgpu_sriov_vf(adev)) + xgpu_ai_mailbox_put_irq(adev); + +- if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { ++ if (adev->nbio.ras_if && ++ amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + if (adev->nbio.funcs->init_ras_controller_interrupt) + amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0); + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) +-- +2.17.1 + |