aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/5195-drm-amdgpu-cleanup-GPU-recovery-check-a-bit-v2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/5195-drm-amdgpu-cleanup-GPU-recovery-check-a-bit-v2.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/5195-drm-amdgpu-cleanup-GPU-recovery-check-a-bit-v2.patch193
1 files changed, 193 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/5195-drm-amdgpu-cleanup-GPU-recovery-check-a-bit-v2.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/5195-drm-amdgpu-cleanup-GPU-recovery-check-a-bit-v2.patch
new file mode 100644
index 00000000..47bc9ef5
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/5195-drm-amdgpu-cleanup-GPU-recovery-check-a-bit-v2.patch
@@ -0,0 +1,193 @@
+From 2f5d61b081ee4f06981c1596a19ee53bcbbab2e0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Tue, 21 Aug 2018 10:45:29 +0200
+Subject: [PATCH 5195/5725] drm/amdgpu: cleanup GPU recovery check a bit (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Check if we should call the function instead of providing the forced
+flag.
+
+v2: rebase on KFD changes (Alex)
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Reviewed-by: Huang Rui <ray.huang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Raveendra Talabattula <raveendra.talabattula@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 ++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 ++++++++++++++++++++----------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 4 ++--
+ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 ++-
+ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 ++--
+ drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 3 ++-
+ 8 files changed, 38 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index b11832a..91be1d4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -1286,8 +1286,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
+ #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
+
+ /* Common functions */
++bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
+ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+- struct amdgpu_job* job, bool force);
++ struct amdgpu_job* job);
+ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
+ bool amdgpu_device_need_post(struct amdgpu_device *adev);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+index 0adee23..599cb6f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+@@ -280,7 +280,8 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+- amdgpu_device_gpu_recover(adev, NULL, false);
++ if (amdgpu_device_should_recover_gpu(adev))
++ amdgpu_device_gpu_recover(adev, NULL);
+ }
+
+ u32 pool_to_domain(enum kgd_memory_pool p)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 1acb85e..67adfb4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -3304,31 +3304,43 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
+ }
+
+ /**
++ * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
++ *
++ * @adev: amdgpu device pointer
++ *
++ * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
++ * a hung GPU.
++ */
++bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
++{
++ if (!amdgpu_device_ip_check_soft_reset(adev)) {
++ DRM_INFO("Timeout, but no hardware hang detected.\n");
++ return false;
++ }
++
++ if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
++ !amdgpu_sriov_vf(adev))) {
++ DRM_INFO("GPU recovery disabled.\n");
++ return false;
++ }
++
++ return true;
++}
++
++/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu device pointer
+ * @job: which job trigger hang
+- * @force: forces reset regardless of amdgpu_gpu_recovery
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Returns 0 for success or an error on failure.
+ */
+ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+- struct amdgpu_job *job, bool force)
++ struct amdgpu_job *job)
+ {
+ int i, r, resched;
+
+- if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
+- DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
+- return 0;
+- }
+-
+- if (!force && (amdgpu_gpu_recovery == 0 ||
+- (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
+- DRM_INFO("GPU recovery disabled.\n");
+- return 0;
+- }
+-
+ dev_info(adev->dev, "GPU reset begin!\n");
+
+ mutex_lock(&adev->lock_reset);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index 1ec9590..e7f6389 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -702,7 +702,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
+ struct amdgpu_device *adev = dev->dev_private;
+
+ seq_printf(m, "gpu recover\n");
+- amdgpu_device_gpu_recover(adev, NULL, true);
++ amdgpu_device_gpu_recover(adev, NULL);
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+index da8eda8..2d29753 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+@@ -105,8 +105,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ reset_work);
+
+- if (!amdgpu_sriov_vf(adev))
+- amdgpu_device_gpu_recover(adev, NULL, false);
++ if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
++ amdgpu_device_gpu_recover(adev, NULL);
+ }
+
+ /**
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+index 1250aae..994b569 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+@@ -37,7 +37,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+ job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+ ring->fence_drv.sync_seq);
+
+- amdgpu_device_gpu_recover(ring->adev, job, false);
++ if (amdgpu_device_should_recover_gpu(ring->adev))
++ amdgpu_device_gpu_recover(ring->adev, job);
+ }
+
+ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+index 078f70f..8cbb465 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
++++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+@@ -266,8 +266,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+ }
+
+ /* Trigger recovery for world switch failure if no TDR */
+- if (amdgpu_lockup_timeout == 0)
+- amdgpu_device_gpu_recover(adev, NULL, true);
++ if (amdgpu_device_should_recover_gpu(adev))
++ amdgpu_device_gpu_recover(adev, NULL);
+ }
+
+ static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+index 9fc1c37..842567b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
++++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+@@ -521,7 +521,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
+ }
+
+ /* Trigger recovery due to world switch failure */
+- amdgpu_device_gpu_recover(adev, NULL, false);
++ if (amdgpu_device_should_recover_gpu(adev))
++ amdgpu_device_gpu_recover(adev, NULL);
+ }
+
+ static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+--
+2.7.4
+