diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch new file mode 100644 index 00000000..ed441748 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1943-drm-amdgpu-Fixed-a-potential-circular-lock.patch @@ -0,0 +1,198 @@ +From c589cc351d97965c3deb3b2719257e69af4343ee Mon Sep 17 00:00:00 2001 +From: ozeng <oak.zeng@amd.com> +Date: Wed, 27 Sep 2017 17:53:12 -0400 +Subject: [PATCH 1943/4131] drm/amdgpu: Fixed a potential circular lock + +The dead circular lock senario captured is as followed. +The idea of the fix is moving read_user_wptr outside of +acquire_queue...release_queue critical section + +[ 63.477482] WARNING: possible circular locking dependency detected +[ 63.484091] 4.12.0-kfd-ozeng #3 Not tainted +[ 63.488531] ------------------------------------------------------ +[ 63.495146] HelloWorldLoop/2526 is trying to acquire lock: +[ 63.501011] (&mm->mmap_sem){++++++}, at: [<ffffffff911898ce>] __might_fault+0x3e/0x90 +[ 63.509472] + but task is already holding lock: +[ 63.515716] (&adev->srbm_mutex){+.+...}, at: [<ffffffffc0484feb>] lock_srbm+0x2b/0x50 [amdgpu] +[ 63.525099] + which lock already depends on the new lock. + +[ 63.533841] + the existing dependency chain (in reverse order) is: +[ 63.541839] + -> #2 (&adev->srbm_mutex){+.+...}: +[ 63.548178] lock_acquire+0x6d/0x90 +[ 63.552461] __mutex_lock+0x70/0x8c0 +[ 63.556826] mutex_lock_nested+0x16/0x20 +[ 63.561603] gfx_v8_0_kiq_resume+0x1039/0x14a0 [amdgpu] +[ 63.567817] gfx_v8_0_hw_init+0x204d/0x2210 [amdgpu] +[ 63.573675] amdgpu_device_init+0xdea/0x1790 [amdgpu] +[ 63.579640] amdgpu_driver_load_kms+0x63/0x220 [amdgpu] +[ 63.585743] drm_dev_register+0x145/0x1e0 +[ 63.590605] amdgpu_pci_probe+0x11e/0x160 [amdgpu] +[ 63.596266] local_pci_probe+0x40/0xa0 +[ 63.600803] pci_device_probe+0x134/0x150 +[ 63.605650] driver_probe_device+0x2a1/0x460 +[ 63.610785] __driver_attach+0xdc/0xe0 +[ 63.615321] bus_for_each_dev+0x5f/0x90 +[ 63.619984] driver_attach+0x19/0x20 +[ 63.624337] bus_add_driver+0x40/0x270 +[ 63.628908] driver_register+0x5b/0xe0 +[ 63.633446] __pci_register_driver+0x5b/0x60 +[ 63.638586] rtsx_pci_switch_output_voltage+0x1d/0x20 [rtsx_pci] +[ 63.645564] do_one_initcall+0x4c/0x1b0 +[ 63.650205] do_init_module+0x56/0x1ea +[ 63.654767] load_module+0x208c/0x27d0 +[ 63.659335] SYSC_finit_module+0x96/0xd0 +[ 63.664058] SyS_finit_module+0x9/0x10 +[ 63.668629] entry_SYSCALL_64_fastpath+0x1f/0xbe +[ 63.674088] + -> #1 (reservation_ww_class_mutex){+.+.+.}: +[ 63.681257] lock_acquire+0x6d/0x90 +[ 63.685551] __ww_mutex_lock.constprop.11+0x8c/0xed0 +[ 63.691426] ww_mutex_lock+0x67/0x70 +[ 63.695802] amdgpu_verify_access+0x6d/0x100 [amdgpu] +[ 63.701743] ttm_bo_mmap+0x8e/0x100 [ttm] +[ 63.706615] amdgpu_bo_mmap+0xd/0x60 [amdgpu] +[ 63.711814] amdgpu_mmap+0x35/0x40 [amdgpu] +[ 63.716904] mmap_region+0x3b5/0x5a0 +[ 63.721255] do_mmap+0x400/0x4d0 +[ 63.725260] vm_mmap_pgoff+0xb0/0xf0 +[ 63.729625] SyS_mmap_pgoff+0x19e/0x260 +[ 63.734292] SyS_mmap+0x1d/0x20 +[ 63.738199] entry_SYSCALL_64_fastpath+0x1f/0xbe +[ 63.743681] + -> #0 (&mm->mmap_sem){++++++}: +[ 63.749641] __lock_acquire+0x1401/0x1420 +[ 63.754491] lock_acquire+0x6d/0x90 +[ 63.758750] __might_fault+0x6b/0x90 +[ 63.763176] kgd_hqd_load+0x24f/0x270 [amdgpu] +[ 63.768432] load_mqd+0x4b/0x50 [amdkfd] +[ 63.773192] create_queue_nocpsch+0x535/0x620 [amdkfd] +[ 63.779237] pqm_create_queue+0x34d/0x4f0 [amdkfd] +[ 63.784835] kfd_ioctl_create_queue+0x282/0x670 [amdkfd] +[ 63.790973] kfd_ioctl+0x310/0x4d0 [amdkfd] +[ 63.795944] do_vfs_ioctl+0x90/0x6e0 +[ 63.800268] SyS_ioctl+0x74/0x80 +[ 63.804207] entry_SYSCALL_64_fastpath+0x1f/0xbe +[ 63.809607] + other info that might help us debug this: + +[ 63.818026] Chain exists of: + &mm->mmap_sem --> reservation_ww_class_mutex --> &adev->srbm_mutex + +[ 63.830382] Possible unsafe locking scenario: + +[ 63.836605] CPU0 CPU1 +[ 63.841364] ---- ---- +[ 63.846123] lock(&adev->srbm_mutex); +[ 63.850061] lock(reservation_ww_class_mutex); +[ 63.857475] lock(&adev->srbm_mutex); +[ 63.864084] lock(&mm->mmap_sem); +[ 63.867657] + *** DEADLOCK *** + +[ 63.873884] 3 locks held by HelloWorldLoop/2526: +[ 63.878739] #0: (&process->mutex){+.+.+.}, at: [<ffffffffc06e1a9a>] kfd_ioctl_create_queue+0x24a/0x670 [amdkfd] +[ 63.889543] #1: (&dqm->lock){+.+...}, at: [<ffffffffc06eedeb>] create_queue_nocpsch+0x3b/0x620 [amdkfd] +[ 63.899684] #2: (&adev->srbm_mutex){+.+...}, at: [<ffffffffc0484feb>] lock_srbm+0x2b/0x50 [amdgpu] +[ 63.909500] + stack backtrace: +[ 63.914187] CPU: 3 PID: 2526 Comm: HelloWorldLoop Not tainted 4.12.0-kfd-ozeng #3 +[ 63.922184] Hardware name: AMD Carrizo/Gardenia, BIOS WGA5819N_Weekly_15_08_1 08/19/2015 +[ 63.930865] Call Trace: +[ 63.933464] dump_stack+0x85/0xc9 +[ 63.936999] print_circular_bug+0x1f9/0x207 +[ 63.941442] __lock_acquire+0x1401/0x1420 +[ 63.945745] ? lock_srbm+0x2b/0x50 [amdgpu] +[ 63.950185] lock_acquire+0x6d/0x90 +[ 63.953885] ? __might_fault+0x3e/0x90 +[ 63.957899] __might_fault+0x6b/0x90 +[ 63.961699] ? __might_fault+0x3e/0x90 +[ 63.965755] kgd_hqd_load+0x24f/0x270 [amdgpu] +[ 63.970577] load_mqd+0x4b/0x50 [amdkfd] +[ 63.974745] create_queue_nocpsch+0x535/0x620 [amdkfd] +[ 63.980242] pqm_create_queue+0x34d/0x4f0 [amdkfd] +[ 63.985320] kfd_ioctl_create_queue+0x282/0x670 [amdkfd] +[ 63.991021] kfd_ioctl+0x310/0x4d0 [amdkfd] +[ 63.995499] ? kfd_ioctl_destroy_queue+0x70/0x70 [amdkfd] +[ 64.001234] do_vfs_ioctl+0x90/0x6e0 +[ 64.005065] ? up_read+0x1a/0x40 +[ 64.008496] SyS_ioctl+0x74/0x80 +[ 64.011955] entry_SYSCALL_64_fastpath+0x1f/0xbe +[ 64.016863] RIP: 0033:0x7f4b3bd35f07 +[ 64.020696] RSP: 002b:00007ffe7689ec38 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 +[ 64.028786] RAX: ffffffffffffffda RBX: 00000000002a2000 RCX: 00007f4b3bd35f07 +[ 64.036414] RDX: 00007ffe7689ecb0 RSI: 00000000c0584b02 RDI: 0000000000000005 +[ 64.044045] RBP: 00007f4a3212d000 R08: 00007f4b3c919000 R09: 0000000000080000 +[ 64.051674] R10: 00007f4b376b64b8 R11: 0000000000000246 R12: 00007f4a3212d000 +[ 64.059324] R13: 0000000000000015 R14: 0000000000000064 R15: 00007ffe7689ef50 + +Signed-off-by: Oak Zeng <Oak.Zeng@amd.com> +Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 11 +++++++++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 10 +++++++++- + 2 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index d636861..936cc59 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -395,6 +395,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + struct cik_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, wptr_val, data; ++ bool valid_wptr = false; + + m = get_mqd(mqd); + +@@ -413,8 +414,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); + +- +- if (read_user_wptr(mm, wptr, wptr_val)) ++ /* read_user_ptr may take the mm->mmap_sem. ++ * release srbm_mutex to avoid circular dependency between ++ * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. ++ */ ++ release_queue(kgd); ++ valid_wptr = read_user_wptr(mm, wptr, wptr_val); ++ acquire_queue(kgd, pipe_id, queue_id); ++ if (valid_wptr) + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index c08909c..4c5d39a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -380,6 +380,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + struct vi_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, wptr_val, data; ++ bool valid_wptr = false; + + m = get_mqd(mqd); + +@@ -427,7 +428,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); + +- if (read_user_wptr(mm, wptr, wptr_val)) ++ /* read_user_ptr may take the mm->mmap_sem. ++ * release srbm_mutex to avoid circular dependency between ++ * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. ++ */ ++ release_queue(kgd); ++ valid_wptr = read_user_wptr(mm, wptr, wptr_val); ++ acquire_queue(kgd, pipe_id, queue_id); ++ if (valid_wptr) + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); +-- +2.7.4 + |