aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch198
1 files changed, 198 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch
new file mode 100644
index 00000000..ed441748
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch
@@ -0,0 +1,198 @@
+From c589cc351d97965c3deb3b2719257e69af4343ee Mon Sep 17 00:00:00 2001
+From: ozeng <oak.zeng@amd.com>
+Date: Wed, 27 Sep 2017 17:53:12 -0400
+Subject: [PATCH 1943/4131] drm/amdgpu: Fixed a potential circular lock
+
+The dead circular lock senario captured is as followed.
+The idea of the fix is moving read_user_wptr outside of
+acquire_queue...release_queue critical section
+
+[ 63.477482] WARNING: possible circular locking dependency detected
+[ 63.484091] 4.12.0-kfd-ozeng #3 Not tainted
+[ 63.488531] ------------------------------------------------------
+[ 63.495146] HelloWorldLoop/2526 is trying to acquire lock:
+[ 63.501011] (&mm->mmap_sem){++++++}, at: [<ffffffff911898ce>] __might_fault+0x3e/0x90
+[ 63.509472]
+ but task is already holding lock:
+[ 63.515716] (&adev->srbm_mutex){+.+...}, at: [<ffffffffc0484feb>] lock_srbm+0x2b/0x50 [amdgpu]
+[ 63.525099]
+ which lock already depends on the new lock.
+
+[ 63.533841]
+ the existing dependency chain (in reverse order) is:
+[ 63.541839]
+ -> #2 (&adev->srbm_mutex){+.+...}:
+[ 63.548178] lock_acquire+0x6d/0x90
+[ 63.552461] __mutex_lock+0x70/0x8c0
+[ 63.556826] mutex_lock_nested+0x16/0x20
+[ 63.561603] gfx_v8_0_kiq_resume+0x1039/0x14a0 [amdgpu]
+[ 63.567817] gfx_v8_0_hw_init+0x204d/0x2210 [amdgpu]
+[ 63.573675] amdgpu_device_init+0xdea/0x1790 [amdgpu]
+[ 63.579640] amdgpu_driver_load_kms+0x63/0x220 [amdgpu]
+[ 63.585743] drm_dev_register+0x145/0x1e0
+[ 63.590605] amdgpu_pci_probe+0x11e/0x160 [amdgpu]
+[ 63.596266] local_pci_probe+0x40/0xa0
+[ 63.600803] pci_device_probe+0x134/0x150
+[ 63.605650] driver_probe_device+0x2a1/0x460
+[ 63.610785] __driver_attach+0xdc/0xe0
+[ 63.615321] bus_for_each_dev+0x5f/0x90
+[ 63.619984] driver_attach+0x19/0x20
+[ 63.624337] bus_add_driver+0x40/0x270
+[ 63.628908] driver_register+0x5b/0xe0
+[ 63.633446] __pci_register_driver+0x5b/0x60
+[ 63.638586] rtsx_pci_switch_output_voltage+0x1d/0x20 [rtsx_pci]
+[ 63.645564] do_one_initcall+0x4c/0x1b0
+[ 63.650205] do_init_module+0x56/0x1ea
+[ 63.654767] load_module+0x208c/0x27d0
+[ 63.659335] SYSC_finit_module+0x96/0xd0
+[ 63.664058] SyS_finit_module+0x9/0x10
+[ 63.668629] entry_SYSCALL_64_fastpath+0x1f/0xbe
+[ 63.674088]
+ -> #1 (reservation_ww_class_mutex){+.+.+.}:
+[ 63.681257] lock_acquire+0x6d/0x90
+[ 63.685551] __ww_mutex_lock.constprop.11+0x8c/0xed0
+[ 63.691426] ww_mutex_lock+0x67/0x70
+[ 63.695802] amdgpu_verify_access+0x6d/0x100 [amdgpu]
+[ 63.701743] ttm_bo_mmap+0x8e/0x100 [ttm]
+[ 63.706615] amdgpu_bo_mmap+0xd/0x60 [amdgpu]
+[ 63.711814] amdgpu_mmap+0x35/0x40 [amdgpu]
+[ 63.716904] mmap_region+0x3b5/0x5a0
+[ 63.721255] do_mmap+0x400/0x4d0
+[ 63.725260] vm_mmap_pgoff+0xb0/0xf0
+[ 63.729625] SyS_mmap_pgoff+0x19e/0x260
+[ 63.734292] SyS_mmap+0x1d/0x20
+[ 63.738199] entry_SYSCALL_64_fastpath+0x1f/0xbe
+[ 63.743681]
+ -> #0 (&mm->mmap_sem){++++++}:
+[ 63.749641] __lock_acquire+0x1401/0x1420
+[ 63.754491] lock_acquire+0x6d/0x90
+[ 63.758750] __might_fault+0x6b/0x90
+[ 63.763176] kgd_hqd_load+0x24f/0x270 [amdgpu]
+[ 63.768432] load_mqd+0x4b/0x50 [amdkfd]
+[ 63.773192] create_queue_nocpsch+0x535/0x620 [amdkfd]
+[ 63.779237] pqm_create_queue+0x34d/0x4f0 [amdkfd]
+[ 63.784835] kfd_ioctl_create_queue+0x282/0x670 [amdkfd]
+[ 63.790973] kfd_ioctl+0x310/0x4d0 [amdkfd]
+[ 63.795944] do_vfs_ioctl+0x90/0x6e0
+[ 63.800268] SyS_ioctl+0x74/0x80
+[ 63.804207] entry_SYSCALL_64_fastpath+0x1f/0xbe
+[ 63.809607]
+ other info that might help us debug this:
+
+[ 63.818026] Chain exists of:
+ &mm->mmap_sem --> reservation_ww_class_mutex --> &adev->srbm_mutex
+
+[ 63.830382] Possible unsafe locking scenario:
+
+[ 63.836605] CPU0 CPU1
+[ 63.841364] ---- ----
+[ 63.846123] lock(&adev->srbm_mutex);
+[ 63.850061] lock(reservation_ww_class_mutex);
+[ 63.857475] lock(&adev->srbm_mutex);
+[ 63.864084] lock(&mm->mmap_sem);
+[ 63.867657]
+ *** DEADLOCK ***
+
+[ 63.873884] 3 locks held by HelloWorldLoop/2526:
+[ 63.878739] #0: (&process->mutex){+.+.+.}, at: [<ffffffffc06e1a9a>] kfd_ioctl_create_queue+0x24a/0x670 [amdkfd]
+[ 63.889543] #1: (&dqm->lock){+.+...}, at: [<ffffffffc06eedeb>] create_queue_nocpsch+0x3b/0x620 [amdkfd]
+[ 63.899684] #2: (&adev->srbm_mutex){+.+...}, at: [<ffffffffc0484feb>] lock_srbm+0x2b/0x50 [amdgpu]
+[ 63.909500]
+ stack backtrace:
+[ 63.914187] CPU: 3 PID: 2526 Comm: HelloWorldLoop Not tainted 4.12.0-kfd-ozeng #3
+[ 63.922184] Hardware name: AMD Carrizo/Gardenia, BIOS WGA5819N_Weekly_15_08_1 08/19/2015
+[ 63.930865] Call Trace:
+[ 63.933464] dump_stack+0x85/0xc9
+[ 63.936999] print_circular_bug+0x1f9/0x207
+[ 63.941442] __lock_acquire+0x1401/0x1420
+[ 63.945745] ? lock_srbm+0x2b/0x50 [amdgpu]
+[ 63.950185] lock_acquire+0x6d/0x90
+[ 63.953885] ? __might_fault+0x3e/0x90
+[ 63.957899] __might_fault+0x6b/0x90
+[ 63.961699] ? __might_fault+0x3e/0x90
+[ 63.965755] kgd_hqd_load+0x24f/0x270 [amdgpu]
+[ 63.970577] load_mqd+0x4b/0x50 [amdkfd]
+[ 63.974745] create_queue_nocpsch+0x535/0x620 [amdkfd]
+[ 63.980242] pqm_create_queue+0x34d/0x4f0 [amdkfd]
+[ 63.985320] kfd_ioctl_create_queue+0x282/0x670 [amdkfd]
+[ 63.991021] kfd_ioctl+0x310/0x4d0 [amdkfd]
+[ 63.995499] ? kfd_ioctl_destroy_queue+0x70/0x70 [amdkfd]
+[ 64.001234] do_vfs_ioctl+0x90/0x6e0
+[ 64.005065] ? up_read+0x1a/0x40
+[ 64.008496] SyS_ioctl+0x74/0x80
+[ 64.011955] entry_SYSCALL_64_fastpath+0x1f/0xbe
+[ 64.016863] RIP: 0033:0x7f4b3bd35f07
+[ 64.020696] RSP: 002b:00007ffe7689ec38 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+[ 64.028786] RAX: ffffffffffffffda RBX: 00000000002a2000 RCX: 00007f4b3bd35f07
+[ 64.036414] RDX: 00007ffe7689ecb0 RSI: 00000000c0584b02 RDI: 0000000000000005
+[ 64.044045] RBP: 00007f4a3212d000 R08: 00007f4b3c919000 R09: 0000000000080000
+[ 64.051674] R10: 00007f4b376b64b8 R11: 0000000000000246 R12: 00007f4a3212d000
+[ 64.059324] R13: 0000000000000015 R14: 0000000000000064 R15: 00007ffe7689ef50
+
+Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 11 +++++++++--
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 10 +++++++++-
+ 2 files changed, 18 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+index d636861..936cc59 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+@@ -395,6 +395,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ struct cik_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, wptr_val, data;
++ bool valid_wptr = false;
+
+ m = get_mqd(mqd);
+
+@@ -413,8 +414,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+-
+- if (read_user_wptr(mm, wptr, wptr_val))
++ /* read_user_ptr may take the mm->mmap_sem.
++ * release srbm_mutex to avoid circular dependency between
++ * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
++ */
++ release_queue(kgd);
++ valid_wptr = read_user_wptr(mm, wptr, wptr_val);
++ acquire_queue(kgd, pipe_id, queue_id);
++ if (valid_wptr)
+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+index c08909c..4c5d39a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+@@ -380,6 +380,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ struct vi_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, wptr_val, data;
++ bool valid_wptr = false;
+
+ m = get_mqd(mqd);
+
+@@ -427,7 +428,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+- if (read_user_wptr(mm, wptr, wptr_val))
++ /* read_user_ptr may take the mm->mmap_sem.
++ * release srbm_mutex to avoid circular dependency between
++ * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
++ */
++ release_queue(kgd);
++ valid_wptr = read_user_wptr(mm, wptr, wptr_val);
++ acquire_queue(kgd, pipe_id, queue_id);
++ if (valid_wptr)
+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+--
+2.7.4
+