diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/0192-drm-amdgpu-add-ring-soft-recovery-v4.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/0192-drm-amdgpu-add-ring-soft-recovery-v4.patch | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/0192-drm-amdgpu-add-ring-soft-recovery-v4.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/0192-drm-amdgpu-add-ring-soft-recovery-v4.patch new file mode 100644 index 00000000..bf403348 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/0192-drm-amdgpu-add-ring-soft-recovery-v4.patch @@ -0,0 +1,100 @@ +From 0ea11f45fe10c0d5c130339f357949b5416d9924 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 21 Aug 2018 11:11:36 +0200 +Subject: [PATCH 0192/2940] drm/amdgpu: add ring soft recovery v4 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Instead of hammering hard on the GPU try a soft recovery first. + +v2: reorder code a bit +v3: increase timeout to 10ms, increment GPU reset counter +v4: squash in compile fix (Christian) + +Signed-off-by: Christian König <christian.koenig@amd.com> +Reviewed-by: Huang Rui <ray.huang@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 25 ++++++++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++++ + 3 files changed, 35 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +index 6c77c50a404e..755f733bf0d9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -33,6 +33,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) + struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); + struct amdgpu_job *job = to_amdgpu_job(s_job); + ++ if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { ++ DRM_ERROR("ring %s timeout, but soft recovered\n", ++ s_job->sched->name); ++ return; ++ } ++ + DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +index 5dfd26be1eec..b70e85ec147d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +@@ -383,6 +383,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); + } + ++/** ++ * amdgpu_ring_soft_recovery - try to soft recover a ring lockup ++ * ++ * @ring: ring to try the recovery on ++ * @vmid: VMID we try to get going again ++ * @fence: timedout fence ++ * ++ * Tries to get a ring proceeding again when it is stuck. ++ */ ++bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, ++ struct dma_fence *fence) ++{ ++ ktime_t deadline = ktime_add_us(ktime_get(), 10000); ++ ++ if (!ring->funcs->soft_recovery) ++ return false; ++ ++ atomic_inc(&ring->adev->gpu_reset_counter); ++ while (!dma_fence_is_signaled(fence) && ++ ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) ++ ring->funcs->soft_recovery(ring, vmid); ++ ++ return dma_fence_is_signaled(fence); ++} ++ + /* + * Debugfs info + */ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +index 409fdd9b9710..9cc239968e40 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +@@ -168,6 +168,8 @@ struct amdgpu_ring_funcs { + /* priority functions */ + void (*set_priority) (struct amdgpu_ring *ring, + enum drm_sched_priority priority); ++ /* Try to soft recover the ring to make the fence signal */ ++ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); + }; + + struct amdgpu_ring { +@@ -260,6 +262,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring); + void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t val0, + uint32_t reg1, uint32_t val1); ++bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, ++ struct dma_fence *fence); + + static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) + { +-- +2.17.1 + |