aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch55
1 files changed, 55 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch
new file mode 100644
index 00000000..6ee25553
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch
@@ -0,0 +1,55 @@
+From 62a47e504af42f37ecf5ffef8fd1350276547cfc Mon Sep 17 00:00:00 2001
+From: Monk Liu <Monk.Liu@amd.com>
+Date: Wed, 8 Nov 2017 14:35:04 +0800
+Subject: [PATCH 2156/4131] drm/amdgpu:fix gpu recover missing skipping(v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+if app close CTX right after IB submit, gpu recover
+will fail to find out the entity behind this guilty
+job thus lead to no job skipping for this guilty job.
+
+to fix this corner case just move the increasement of
+job->karma out of the entity iteration.
+
+v2:
+only do karma increasment if bad->s_priority != KERNEL
+because we always consider KERNEL job be correct and always
+want to recover an unfinished kernel job (sometimes kernel
+job is interrupted by VF FLR or other GPU hang event)
+
+Change-Id: I33e9e959e182d7e002a2108e565cb898acac4f9c
+Signed-off-by: Monk Liu <Monk.Liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Reviewed-By: Xiangliang Yu <Xiangliang.Yu@amd.com>
+---
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+index 74a60c5..537b296 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -464,7 +464,8 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_jo
+ }
+ spin_unlock(&sched->job_list_lock);
+
+- if (bad) {
++ if (bad && bad->s_priority != AMD_SCHED_PRIORITY_KERNEL) {
++ atomic_inc(&bad->karma);
+ /* don't increase @bad's karma if it's from KERNEL RQ,
+ * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs)
+ * corrupt but keep in mind that kernel jobs always considered good.
+@@ -475,7 +476,7 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_jo
+ spin_lock(&rq->lock);
+ list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
+ if (bad->s_fence->scheduled.context == entity->fence_context) {
+- if (atomic_inc_return(&bad->karma) > bad->sched->hang_limit)
++ if (atomic_read(&bad->karma) > bad->sched->hang_limit)
+ if (entity->guilty)
+ atomic_set(entity->guilty, 1);
+ break;
+--
+2.7.4
+