From 69b616387d8353287a6363622dd5b9406d1296e7 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 25 Dec 2017 15:14:58 +0800 Subject: [PATCH 3623/4131] drm/amdgpu: stop all rings before doing gpu recover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit found recover_vram_from_shadow sometimes get executed in paralle with SDMA scheduler, should stop all schedulers before doing gpu reset/recover Change-Id: Ibaef3e3c015f3cf88f84b2eaf95cda95ae1a64e3 Signed-off-by: Monk Liu Reviewed-by: Christian König Tested-by: Andrey Grodzovsky Conflicts: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 +++++++++++------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1619263..5a83045 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2687,18 +2687,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (amdgpu_device_has_dc_support(adev)) state = drm_atomic_helper_suspend(adev->ddev); - /* block scheduler */ + /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->sched.thread) continue; - /* only focus on the ring hit timeout if &job not NULL */ + kthread_park(ring->sched.thread); + if (job && job->ring->idx != i) continue; - kthread_park(ring->sched.thread); drm_sched_hw_job_reset(&ring->sched, &job->base); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ @@ -2741,33 +2741,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } dma_fence_put(fence); } + } - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; - /* only focus on the ring hit timeout if &job not NULL */ - if (job && job->ring->idx != i) - continue; + if (!ring || !ring->sched.thread) + continue; + /* only need recovery sched of the given job's ring + * or all rings (in the case @job is NULL) + * after above amdgpu_reset accomplished + */ + if ((!job || job->ring->idx == i) && !r) drm_sched_job_recovery(&ring->sched); - kthread_unpark(ring->sched.thread); - } - } else { - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - - /* only focus on the ring hit timeout if &job not NULL */ - if (job && job->ring->idx != i) - continue; kthread_unpark(adev->rings[i]->sched.thread); - } } if (amdgpu_device_has_dc_support(adev)) { -- 2.7.4