aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1961-drm-amd-sched-fix-deadlock-caused-by-unsignaled-fenc.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1961-drm-amd-sched-fix-deadlock-caused-by-unsignaled-fenc.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1961-drm-amd-sched-fix-deadlock-caused-by-unsignaled-fenc.patch79
1 files changed, 79 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1961-drm-amd-sched-fix-deadlock-caused-by-unsignaled-fenc.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1961-drm-amd-sched-fix-deadlock-caused-by-unsignaled-fenc.patch
new file mode 100644
index 00000000..f8b833c0
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1961-drm-amd-sched-fix-deadlock-caused-by-unsignaled-fenc.patch
@@ -0,0 +1,79 @@
+From 5c79e8d34f987f4fdddd6e152845f18d4aecbfde Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle@amd.com>
+Date: Thu, 28 Sep 2017 11:57:32 +0200
+Subject: [PATCH 1961/4131] drm/amd/sched: fix deadlock caused by unsignaled
+ fences of deleted jobs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Highly concurrent Piglit runs can trigger a race condition where a pending
+SDMA job on a buffer object is never executed because the corresponding
+process is killed (perhaps due to a crash). Since the job's fences were
+never signaled, the buffer object was effectively leaked. Worse, the
+buffer was stuck wherever it happened to be at the time, possibly in VRAM.
+
+The symptom was user space processes stuck in interruptible waits with
+kernel stacks like:
+
+ [<ffffffffbc5e6722>] dma_fence_default_wait+0x112/0x250
+ [<ffffffffbc5e6399>] dma_fence_wait_timeout+0x39/0xf0
+ [<ffffffffbc5e82d2>] reservation_object_wait_timeout_rcu+0x1c2/0x300
+ [<ffffffffc03ce56f>] ttm_bo_cleanup_refs_and_unlock+0xff/0x1a0 [ttm]
+ [<ffffffffc03cf1ea>] ttm_mem_evict_first+0xba/0x1a0 [ttm]
+ [<ffffffffc03cf611>] ttm_bo_mem_space+0x341/0x4c0 [ttm]
+ [<ffffffffc03cfc54>] ttm_bo_validate+0xd4/0x150 [ttm]
+ [<ffffffffc03cffbd>] ttm_bo_init_reserved+0x2ed/0x420 [ttm]
+ [<ffffffffc042f523>] amdgpu_bo_create_restricted+0x1f3/0x470 [amdgpu]
+ [<ffffffffc042f9fa>] amdgpu_bo_create+0xda/0x220 [amdgpu]
+ [<ffffffffc04349ea>] amdgpu_gem_object_create+0xaa/0x140 [amdgpu]
+ [<ffffffffc0434f97>] amdgpu_gem_create_ioctl+0x97/0x120 [amdgpu]
+ [<ffffffffc037ddba>] drm_ioctl+0x1fa/0x480 [drm]
+ [<ffffffffc041904f>] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu]
+ [<ffffffffbc23db33>] do_vfs_ioctl+0xa3/0x5f0
+ [<ffffffffbc23e0f9>] SyS_ioctl+0x79/0x90
+ [<ffffffffbc864ffb>] entry_SYSCALL_64_fastpath+0x1e/0xad
+ [<ffffffffffffffff>] 0xffffffffffffffff
+
+Note: The correctness of this change depends on the earlier commit
+"drm/amd/sched: move adding finish callback to amd_sched_job_begin"
+
+v2: set an error on the finished fence
+
+Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
+
+ Conflicts:
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+
+Change-Id: I5c0955c44a83db23bd74282e1ae6e0bfd6e71b93
+---
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+index 4693be2..37e8b8d 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -227,9 +227,15 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
+ */
+ kthread_park(sched->thread);
+ kthread_unpark(sched->thread);
+- while (kfifo_out(&entity->job_queue, &job, sizeof(job)))
++ while (kfifo_out(&entity->job_queue, &job, sizeof(job))) {
++ struct amd_sched_fence *s_fence = job->s_fence;
++ amd_sched_fence_scheduled(s_fence);
++ dma_fence_set_error(&s_fence->finished, -ESRCH);
++ amd_sched_fence_finished(s_fence);
++ dma_fence_put(&s_fence->finished);
+ sched->ops->free_job(job);
+-
++ }
++
+ }
+ kfifo_free(&entity->job_queue);
+ }
+--
+2.7.4
+