diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch new file mode 100644 index 00000000..6ee25553 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2156-drm-amdgpu-fix-gpu-recover-missing-skipping-v2.patch @@ -0,0 +1,55 @@ +From 62a47e504af42f37ecf5ffef8fd1350276547cfc Mon Sep 17 00:00:00 2001 +From: Monk Liu <Monk.Liu@amd.com> +Date: Wed, 8 Nov 2017 14:35:04 +0800 +Subject: [PATCH 2156/4131] drm/amdgpu:fix gpu recover missing skipping(v2) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +if app close CTX right after IB submit, gpu recover +will fail to find out the entity behind this guilty +job thus lead to no job skipping for this guilty job. + +to fix this corner case just move the increasement of +job->karma out of the entity iteration. + +v2: +only do karma increasment if bad->s_priority != KERNEL +because we always consider KERNEL job be correct and always +want to recover an unfinished kernel job (sometimes kernel +job is interrupted by VF FLR or other GPU hang event) + +Change-Id: I33e9e959e182d7e002a2108e565cb898acac4f9c +Signed-off-by: Monk Liu <Monk.Liu@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +Reviewed-By: Xiangliang Yu <Xiangliang.Yu@amd.com> +--- + drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +index 74a60c5..537b296 100644 +--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c ++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +@@ -464,7 +464,8 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_jo + } + spin_unlock(&sched->job_list_lock); + +- if (bad) { ++ if (bad && bad->s_priority != AMD_SCHED_PRIORITY_KERNEL) { ++ atomic_inc(&bad->karma); + /* don't increase @bad's karma if it's from KERNEL RQ, + * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs) + * corrupt but keep in mind that kernel jobs always considered good. +@@ -475,7 +476,7 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_jo + spin_lock(&rq->lock); + list_for_each_entry_safe(entity, tmp, &rq->entities, list) { + if (bad->s_fence->scheduled.context == entity->fence_context) { +- if (atomic_inc_return(&bad->karma) > bad->sched->hang_limit) ++ if (atomic_read(&bad->karma) > bad->sched->hang_limit) + if (entity->guilty) + atomic_set(entity->guilty, 1); + break; +-- +2.7.4 + |