diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch new file mode 100644 index 00000000..f0faf4ee --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2961-drm-amdgpu-Add-gpu_recovery-parameter.patch @@ -0,0 +1,170 @@ +From 08e5fb8014201d89613db69476b1888500505519 Mon Sep 17 00:00:00 2001 +From: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Date: Tue, 12 Dec 2017 14:09:30 -0500 +Subject: [PATCH 2961/4131] drm/amdgpu: Add gpu_recovery parameter +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add new parameter to control GPU recovery procedure. + +v2: +Add auto logic where reset is disabled for bare metal and enabled +for SR-IOV. +Allow forced reset from debugfs. + +Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++++++- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- + drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 +- + drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 2 +- + 8 files changed, 19 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 81e019a..d827b85 100755 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -128,6 +128,7 @@ extern int amdgpu_param_buf_per_se; + extern int amdgpu_job_hang_limit; + extern int amdgpu_lbpw; + extern int amdgpu_compute_multipipe; ++extern int amdgpu_gpu_recovery; + + #ifdef CONFIG_DRM_AMDGPU_SI + extern int amdgpu_si_support; +@@ -1985,7 +1986,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) + #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) + + /* Common functions */ +-int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); ++int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job, bool force); + bool amdgpu_need_backup(struct amdgpu_device *adev); + void amdgpu_pci_config_reset(struct amdgpu_device *adev); + bool amdgpu_need_post(struct amdgpu_device *adev); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 3866998..285aa9f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -3039,11 +3039,12 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, + * + * @adev: amdgpu device pointer + * @job: which job trigger hang ++ * @force forces reset regardless of amdgpu_gpu_recovery + * + * Attempt to reset the GPU if it has hung (all asics). + * Returns 0 for success or an error on failure. + */ +-int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) ++int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force) + { + struct drm_atomic_state *state = NULL; + uint64_t reset_flags = 0; +@@ -3053,6 +3054,12 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) + DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); + return 0; + } ++ ++ if (!force && (amdgpu_gpu_recovery == 0 || ++ (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) { ++ DRM_INFO("GPU recovery disabled.\n"); ++ return 0; ++ } + + dev_info(adev->dev, "GPU reset begin!\n"); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 43c42c7..710e33e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -129,6 +129,7 @@ int amdgpu_param_buf_per_se = 0; + int amdgpu_job_hang_limit = 0; + int amdgpu_lbpw = -1; + int amdgpu_compute_multipipe = -1; ++int amdgpu_gpu_recovery = -1; /* auto */ + + MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); + module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); +@@ -284,6 +285,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); + MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); + module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); + ++MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); ++module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); ++ + #ifdef CONFIG_DRM_AMDGPU_SI + + int amdgpu_si_support = 1; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +index ddd43c6..8e5d055 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -705,7 +705,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "gpu recover\n"); +- amdgpu_gpu_recover(adev, NULL); ++ amdgpu_gpu_recover(adev, NULL, true); + + return 0; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +index 2ac1d98..c6e2dcd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +@@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) + reset_work); + + if (!amdgpu_sriov_vf(adev)) +- amdgpu_gpu_recover(adev, NULL); ++ amdgpu_gpu_recover(adev, NULL, false); + } + + /* Disable *all* interrupts */ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +index 9ca22a4..2087d7e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -37,7 +37,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) + atomic_read(&job->ring->fence_drv.last_seq), + job->ring->fence_drv.sync_seq); + +- amdgpu_gpu_recover(job->adev, job); ++ amdgpu_gpu_recover(job->adev, job, false); + } + + int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, +diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +index 71f5690..7ade56d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c ++++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +@@ -253,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) + } + + /* Trigger recovery due to world switch failure */ +- amdgpu_gpu_recover(adev, NULL); ++ amdgpu_gpu_recover(adev, NULL, false); + } + + static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, +diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +index df52824..e05823d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c ++++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +@@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) + } + + /* Trigger recovery due to world switch failure */ +- amdgpu_gpu_recover(adev, NULL); ++ amdgpu_gpu_recover(adev, NULL, false); + } + + static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, +-- +2.7.4 + |