aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch170
1 files changed, 170 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch
new file mode 100644
index 00000000..f0faf4ee
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch
@@ -0,0 +1,170 @@
+From 08e5fb8014201d89613db69476b1888500505519 Mon Sep 17 00:00:00 2001
+From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Date: Tue, 12 Dec 2017 14:09:30 -0500
+Subject: [PATCH 2961/4131] drm/amdgpu: Add gpu_recovery parameter
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add new parameter to control GPU recovery procedure.
+
+v2:
+Add auto logic where reset is disabled for bare metal and enabled
+for SR-IOV.
+Allow forced reset from debugfs.
+
+Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++++++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 2 +-
+ 8 files changed, 19 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 81e019a..d827b85 100755
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -128,6 +128,7 @@ extern int amdgpu_param_buf_per_se;
+ extern int amdgpu_job_hang_limit;
+ extern int amdgpu_lbpw;
+ extern int amdgpu_compute_multipipe;
++extern int amdgpu_gpu_recovery;
+
+ #ifdef CONFIG_DRM_AMDGPU_SI
+ extern int amdgpu_si_support;
+@@ -1985,7 +1986,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+ #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
+
+ /* Common functions */
+-int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job);
++int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job, bool force);
+ bool amdgpu_need_backup(struct amdgpu_device *adev);
+ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
+ bool amdgpu_need_post(struct amdgpu_device *adev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 3866998..285aa9f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -3039,11 +3039,12 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags,
+ *
+ * @adev: amdgpu device pointer
+ * @job: which job trigger hang
++ * @force forces reset regardless of amdgpu_gpu_recovery
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Returns 0 for success or an error on failure.
+ */
+-int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
++int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force)
+ {
+ struct drm_atomic_state *state = NULL;
+ uint64_t reset_flags = 0;
+@@ -3053,6 +3054,12 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
+ DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
+ return 0;
+ }
++
++ if (!force && (amdgpu_gpu_recovery == 0 ||
++ (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
++ DRM_INFO("GPU recovery disabled.\n");
++ return 0;
++ }
+
+ dev_info(adev->dev, "GPU reset begin!\n");
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 43c42c7..710e33e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -129,6 +129,7 @@ int amdgpu_param_buf_per_se = 0;
+ int amdgpu_job_hang_limit = 0;
+ int amdgpu_lbpw = -1;
+ int amdgpu_compute_multipipe = -1;
++int amdgpu_gpu_recovery = -1; /* auto */
+
+ MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
+ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
+@@ -284,6 +285,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444);
+ MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
+ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
+
++MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto");
++module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
++
+ #ifdef CONFIG_DRM_AMDGPU_SI
+
+ int amdgpu_si_support = 1;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index ddd43c6..8e5d055 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -705,7 +705,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
+ struct amdgpu_device *adev = dev->dev_private;
+
+ seq_printf(m, "gpu recover\n");
+- amdgpu_gpu_recover(adev, NULL);
++ amdgpu_gpu_recover(adev, NULL, true);
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+index 2ac1d98..c6e2dcd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+@@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
+ reset_work);
+
+ if (!amdgpu_sriov_vf(adev))
+- amdgpu_gpu_recover(adev, NULL);
++ amdgpu_gpu_recover(adev, NULL, false);
+ }
+
+ /* Disable *all* interrupts */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+index 9ca22a4..2087d7e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+@@ -37,7 +37,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+ atomic_read(&job->ring->fence_drv.last_seq),
+ job->ring->fence_drv.sync_seq);
+
+- amdgpu_gpu_recover(job->adev, job);
++ amdgpu_gpu_recover(job->adev, job, false);
+ }
+
+ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+index 71f5690..7ade56d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
++++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+@@ -253,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+ }
+
+ /* Trigger recovery due to world switch failure */
+- amdgpu_gpu_recover(adev, NULL);
++ amdgpu_gpu_recover(adev, NULL, false);
+ }
+
+ static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+index df52824..e05823d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
++++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+@@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
+ }
+
+ /* Trigger recovery due to world switch failure */
+- amdgpu_gpu_recover(adev, NULL);
++ amdgpu_gpu_recover(adev, NULL, false);
+ }
+
+ static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+--
+2.7.4
+