aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch141
1 files changed, 141 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch
new file mode 100644
index 00000000..6e359599
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3737-drm-amdgpu-check-if-nbio-ras_if-exist.patch
@@ -0,0 +1,141 @@
+From 31873c3b437931c67ffc98c7246c15bf213a46b1 Mon Sep 17 00:00:00 2001
+From: Philip Yang <Philip.Yang@amd.com>
+Date: Fri, 6 Sep 2019 13:20:40 -0400
+Subject: [PATCH 3737/4256] drm/amdgpu: check if nbio->ras_if exist
+
+To avoid NULL function pointer access. This happens on VG10, reboot
+command hangs and have to power off/on to reboot the machine. This is
+serial console log:
+
+[ OK ] Reached target Unmount All Filesystems.
+[ OK ] Reached target Final Step.
+ Starting Reboot...
+[ 305.696271] systemd-shutdown[1]: Syncing filesystems and block
+devices.
+[ 306.947328] systemd-shutdown[1]: Sending SIGTERM to remaining
+processes...
+[ 306.963920] systemd-journald[1722]: Received SIGTERM from PID 1
+(systemd-shutdow).
+[ 307.322717] systemd-shutdown[1]: Sending SIGKILL to remaining
+processes...
+[ 307.336472] systemd-shutdown[1]: Unmounting file systems.
+[ 307.454202] EXT4-fs (sda2): re-mounted. Opts: errors=remount-ro
+[ 307.480523] systemd-shutdown[1]: All filesystems unmounted.
+[ 307.486537] systemd-shutdown[1]: Deactivating swaps.
+[ 307.491962] systemd-shutdown[1]: All swaps deactivated.
+[ 307.497624] systemd-shutdown[1]: Detaching loop devices.
+[ 307.504418] systemd-shutdown[1]: All loop devices detached.
+[ 307.510418] systemd-shutdown[1]: Detaching DM devices.
+[ 307.565907] sd 2:0:0:0: [sda] Synchronizing SCSI cache
+[ 307.731313] BUG: kernel NULL pointer dereference, address:
+0000000000000000
+[ 307.738802] #PF: supervisor read access in kernel mode
+[ 307.744326] #PF: error_code(0x0000) - not-present page
+[ 307.749850] PGD 0 P4D 0
+[ 307.752568] Oops: 0000 [#1] SMP PTI
+[ 307.756314] CPU: 3 PID: 1 Comm: systemd-shutdow Not tainted
+5.2.0-rc1-kfd-yangp #453
+[ 307.764644] Hardware name: ASUS All Series/Z97-PRO(Wi-Fi ac)/USB 3.1,
+BIOS 9001 03/07/2016
+[ 307.773580] RIP: 0010:soc15_common_hw_fini+0x33/0xc0 [amdgpu]
+[ 307.779760] Code: 89 fb e8 60 f5 ff ff f6 83 50 df 01 00 04 75 3d 48
+8b b3 90 7d 00 00 48 c7 c7 17 b8 530
+[ 307.799967] RSP: 0018:ffffac9483153d40 EFLAGS: 00010286
+[ 307.805585] RAX: 0000000000000000 RBX: ffff9eb299da0000 RCX:
+0000000000000006
+[ 307.813261] RDX: 0000000000000000 RSI: ffff9eb29e3508a0 RDI:
+ffff9eb29e350000
+[ 307.820935] RBP: ffff9eb299da0000 R08: 0000000000000000 R09:
+0000000000000000
+[ 307.828609] R10: 0000000000000000 R11: 0000000000000000 R12:
+ffff9eb299dbd1f8
+[ 307.836284] R13: ffffffffc04f8368 R14: ffff9eb29cebd130 R15:
+0000000000000000
+[ 307.843959] FS: 00007f06721c9940(0000) GS:ffff9eb2a18c0000(0000)
+knlGS:0000000000000000
+[ 307.852663] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 307.858842] CR2: 0000000000000000 CR3: 000000081d798005 CR4:
+00000000001606e0
+[ 307.866516] Call Trace:
+[ 307.869169] amdgpu_device_ip_suspend_phase2+0x80/0x110 [amdgpu]
+[ 307.875654] ? amdgpu_device_ip_suspend_phase1+0x4d/0xd0 [amdgpu]
+[ 307.882230] amdgpu_device_ip_suspend+0x2e/0x60 [amdgpu]
+[ 307.887966] amdgpu_pci_shutdown+0x2f/0x40 [amdgpu]
+[ 307.893211] pci_device_shutdown+0x31/0x60
+[ 307.897613] device_shutdown+0x14c/0x1f0
+[ 307.901829] kernel_restart+0xe/0x50
+[ 307.905669] __do_sys_reboot+0x1df/0x210
+[ 307.909884] ? task_work_run+0x73/0xb0
+[ 307.913914] ? trace_hardirqs_off_thunk+0x1a/0x1c
+[ 307.918970] do_syscall_64+0x4a/0x1c0
+[ 307.922904] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+[ 307.928336] RIP: 0033:0x7f0671cf8373
+[ 307.932176] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00
+00 0f 1f 44 00 00 89 fa be 69 19 128
+[ 307.952384] RSP: 002b:00007ffdd1723d68 EFLAGS: 00000202 ORIG_RAX:
+00000000000000a9
+[ 307.960527] RAX: ffffffffffffffda RBX: 0000000001234567 RCX:
+00007f0671cf8373
+[ 307.968201] RDX: 0000000001234567 RSI: 0000000028121969 RDI:
+00000000fee1dead
+[ 307.975875] RBP: 00007ffdd1723dd0 R08: 0000000000000000 R09:
+0000000000000000
+[ 307.983550] R10: 0000000000000002 R11: 0000000000000202 R12:
+00007ffdd1723dd8
+[ 307.991224] R13: 0000000000000000 R14: 0000001b00000004 R15:
+00007ffdd17240c8
+[ 307.998901] Modules linked in: xt_MASQUERADE nfnetlink iptable_nat
+xt_addrtype xt_conntrack nf_nat nf_cos
+[ 308.026505] CR2: 0000000000000000
+[ 308.039998] RIP: 0010:soc15_common_hw_fini+0x33/0xc0 [amdgpu]
+[ 308.046180] Code: 89 fb e8 60 f5 ff ff f6 83 50 df 01 00 04 75 3d 48
+8b b3 90 7d 00 00 48 c7 c7 17 b8 530
+[ 308.066392] RSP: 0018:ffffac9483153d40 EFLAGS: 00010286
+[ 308.072013] RAX: 0000000000000000 RBX: ffff9eb299da0000 RCX:
+0000000000000006
+[ 308.079689] RDX: 0000000000000000 RSI: ffff9eb29e3508a0 RDI:
+ffff9eb29e350000
+[ 308.087366] RBP: ffff9eb299da0000 R08: 0000000000000000 R09:
+0000000000000000
+[ 308.095042] R10: 0000000000000000 R11: 0000000000000000 R12:
+ffff9eb299dbd1f8
+[ 308.102717] R13: ffffffffc04f8368 R14: ffff9eb29cebd130 R15:
+0000000000000000
+[ 308.110394] FS: 00007f06721c9940(0000) GS:ffff9eb2a18c0000(0000)
+knlGS:0000000000000000
+[ 308.119099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 308.125280] CR2: 0000000000000000 CR3: 000000081d798005 CR4:
+00000000001606e0
+[ 308.135304] printk: systemd-shutdow: 3 output lines suppressed due to
+ratelimiting
+[ 308.143518] Kernel panic - not syncing: Attempted to kill init!
+exitcode=0x00000009
+[ 308.151798] Kernel Offset: 0x15000000 from 0xffffffff81000000
+(relocation range: 0xffffffff80000000-0xff)
+[ 308.171775] ---[ end Kernel panic - not syncing: Attempted to kill
+init! exitcode=0x00000009 ]---
+
+Change-Id: If694f75e893f95c44d594877f552380a77c03ec4
+Signed-off-by: Philip Yang <Philip.Yang@amd.com>
+Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
+index cb22970c0853..7c7e9f550c02 100644
+--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
+@@ -1302,7 +1302,8 @@ static int soc15_common_hw_fini(void *handle)
+ if (amdgpu_sriov_vf(adev))
+ xgpu_ai_mailbox_put_irq(adev);
+
+- if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
++ if (adev->nbio.ras_if &&
++ amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ if (adev->nbio.funcs->init_ras_controller_interrupt)
+ amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
+ if (adev->nbio.funcs->init_ras_err_event_athub_interrupt)
+--
+2.17.1
+