diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3736-dmr-amdgpu-Avoid-HW-GPU-reset-for-RAS.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3736-dmr-amdgpu-Avoid-HW-GPU-reset-for-RAS.patch | 512 |
1 files changed, 512 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3736-dmr-amdgpu-Avoid-HW-GPU-reset-for-RAS.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3736-dmr-amdgpu-Avoid-HW-GPU-reset-for-RAS.patch new file mode 100644 index 00000000..476de3c3 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3736-dmr-amdgpu-Avoid-HW-GPU-reset-for-RAS.patch @@ -0,0 +1,512 @@ +From 61d5f95e1eca078269c2b3dc74e18b57ad13a064 Mon Sep 17 00:00:00 2001 +From: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Date: Thu, 22 Aug 2019 14:40:00 -0400 +Subject: [PATCH 3736/4256] dmr/amdgpu: Avoid HW GPU reset for RAS. + +Problem: +Under certain conditions, when some IP bocks take a RAS error, +we can get into a situation where a GPU reset is not possible +due to issues in RAS in SMU/PSP. + +Temporary fix until proper solution in PSP/SMU is ready: +When uncorrectable error happens the DF will unconditionally +broadcast error event packets to all its clients/slave upon +receiving fatal error event and freeze all its outbound queues, +err_event_athub interrupt will be triggered. +In such case and we use this interrupt +to issue GPU reset. THe GPU reset code is modified for such case to avoid HW +reset, only stops schedulers, deatches all in progress and not yet scheduled +job's fences, set error code on them and signals. +Also reject any new incoming job submissions from user space. +All this is done to notify the applications of the problem. + +v2: +Extract amdgpu_amdkfd_pre/post_reset from amdgpu_device_lock/unlock_adev +Move amdgpu_job_stop_all_jobs_on_sched to amdgpu_job.c +Remove print param from amdgpu_ras_query_error_count + +v3: +Update based on prevoius bug fixing patch to properly call amdgpu_amdkfd_pre_reset +for other XGMI hive memebers. + +Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Acked-by: Felix Kuehling <Felix.Kuehling@amd.com> +Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++ + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 ++++++++++++++++------ + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++ + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 38 ++++++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 ++ + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 +++++++++++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 ++++++ + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++--- + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 24 +++++++------- + drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 5 +++ + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 32 +++++++++--------- + 12 files changed, 155 insertions(+), 42 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index 55282bfcaa45..901ce33cc481 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -34,6 +34,7 @@ + #include "amdgpu_gmc.h" + #include "amdgpu_gem.h" + #include "amdgpu_display.h" ++#include "amdgpu_ras.h" + + static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_fence *data, +@@ -1278,6 +1279,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) + bool reserved_buffers = false; + int i, r; + ++ if (amdgpu_ras_intr_triggered()) ++ return -EHWPOISON; ++ + if (!adev->accel_working) + return -EBUSY; + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index be0a06014037..e30f7ba53aab 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -3725,25 +3725,18 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) + adev->mp1_state = PP_MP1_STATE_NONE; + break; + } +- /* Block kfd: SRIOV would do it separately */ +- if (!amdgpu_sriov_vf(adev)) +- amdgpu_amdkfd_pre_reset(adev); + + return true; + } + + static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) + { +- /*unlock kfd: SRIOV would do it separately */ +- if (!amdgpu_sriov_vf(adev)) +- amdgpu_amdkfd_post_reset(adev); + amdgpu_vf_error_trans_all(adev); + adev->mp1_state = PP_MP1_STATE_NONE; + adev->in_gpu_reset = 0; + mutex_unlock(&adev->lock_reset); + } + +- + /** + * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * +@@ -3763,11 +3756,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive = NULL; + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; ++ bool in_ras_intr = amdgpu_ras_intr_triggered(); + + need_full_reset = job_signaled = false; + INIT_LIST_HEAD(&device_list); + +- dev_info(adev->dev, "GPU reset begin!\n"); ++ dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); + + cancel_delayed_work_sync(&adev->delayed_init_work); + +@@ -3794,9 +3788,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + return 0; + } + ++ /* Block kfd: SRIOV would do it separately */ ++ if (!amdgpu_sriov_vf(adev)) ++ amdgpu_amdkfd_pre_reset(adev); ++ + /* Build list of devices to reset */ + if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (!hive) { ++ /*unlock kfd: SRIOV would do it separately */ ++ if (!amdgpu_sriov_vf(adev)) ++ amdgpu_amdkfd_post_reset(adev); + amdgpu_device_unlock_adev(adev); + return -ENODEV; + } +@@ -3814,8 +3815,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { +- if (tmp_adev != adev) ++ if (tmp_adev != adev) { + amdgpu_device_lock_adev(tmp_adev, false); ++ if (!amdgpu_sriov_vf(tmp_adev)) ++ amdgpu_amdkfd_pre_reset(tmp_adev); ++ } ++ + /* + * Mark these ASICs to be reseted as untracked first + * And add them back after reset completed +@@ -3823,7 +3828,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + amdgpu_unregister_gpu_instance(tmp_adev); + + /* disable ras on ALL IPs */ +- if (amdgpu_device_ip_need_full_reset(tmp_adev)) ++ if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) + amdgpu_ras_suspend(tmp_adev); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { +@@ -3833,10 +3838,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + continue; + + drm_sched_stop(&ring->sched, job ? &job->base : NULL); ++ ++ if (in_ras_intr) ++ amdgpu_job_stop_all_jobs_on_sched(&ring->sched); + } + } + + ++ if (in_ras_intr) ++ goto skip_sched_resume; ++ + /* + * Must check guilty signal here since after this point all old + * HW fences are force signaled. +@@ -3895,6 +3906,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + + /* Post ASIC reset for all devs .*/ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { ++ + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = tmp_adev->rings[i]; + +@@ -3921,7 +3933,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + } else { + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); + } ++ } + ++skip_sched_resume: ++ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { ++ /*unlock kfd: SRIOV would do it separately */ ++ if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) ++ amdgpu_amdkfd_post_reset(tmp_adev); + amdgpu_device_unlock_adev(tmp_adev); + } + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 2de8db5e864c..3aa7c136d2c3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -40,6 +40,8 @@ + + #include "amdgpu_amdkfd.h" + ++#include "amdgpu_ras.h" ++ + /* + * KMS wrapper. + * - 3.0.0 - initial driver +@@ -1144,6 +1146,9 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) + struct drm_device *dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = dev->dev_private; + ++ if (amdgpu_ras_intr_triggered()) ++ return; ++ + /* if we are running in a VM, make sure the device + * torn down properly on reboot/shutdown. + * unfortunately we can't detect certain +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +index 7ab1241bd9e5..c043d8f6bb8b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -246,6 +246,44 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) + return fence; + } + ++#define to_drm_sched_job(sched_job) \ ++ container_of((sched_job), struct drm_sched_job, queue_node) ++ ++void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) ++{ ++ struct drm_sched_job *s_job; ++ struct drm_sched_entity *s_entity = NULL; ++ int i; ++ ++ /* Signal all jobs not yet scheduled */ ++ for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { ++ struct drm_sched_rq *rq = &sched->sched_rq[i]; ++ ++ if (!rq) ++ continue; ++ ++ spin_lock(&rq->lock); ++ list_for_each_entry(s_entity, &rq->entities, list) { ++ while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { ++ struct drm_sched_fence *s_fence = s_job->s_fence; ++ ++ dma_fence_signal(&s_fence->scheduled); ++ dma_fence_set_error(&s_fence->finished, -EHWPOISON); ++ dma_fence_signal(&s_fence->finished); ++ } ++ } ++ spin_unlock(&rq->lock); ++ } ++ ++ /* Signal all jobs already scheduled to HW */ ++ list_for_each_entry(s_job, &sched->ring_mirror_list, node) { ++ struct drm_sched_fence *s_fence = s_job->s_fence; ++ ++ dma_fence_set_error(&s_fence->finished, -EHWPOISON); ++ dma_fence_signal(&s_fence->finished); ++ } ++} ++ + const struct drm_sched_backend_ops amdgpu_sched_ops = { + .dependency = amdgpu_job_dependency, + .run_job = amdgpu_job_run, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +index 51e62504c279..dc7ee9358dcd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +@@ -76,4 +76,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, + void *owner, struct dma_fence **f); + int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, + struct dma_fence **fence); ++ ++void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched); ++ + #endif +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 751c4c8e1cee..9d4e71ee8791 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -1030,6 +1030,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) + /* Ensure IB tests are run on ring */ + flush_delayed_work(&adev->delayed_init_work); + ++ ++ if (amdgpu_ras_intr_triggered()) { ++ DRM_ERROR("RAS Intr triggered, device disabled!!"); ++ return -EHWPOISON; ++ } ++ + file_priv->driver_priv = NULL; + + r = pm_runtime_get_sync(dev->dev); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +index 2ca3997d4b3a..01a66559f04e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +@@ -24,6 +24,8 @@ + #include <linux/debugfs.h> + #include <linux/list.h> + #include <linux/module.h> ++#include <linux/reboot.h> ++#include <linux/syscalls.h> + #include "amdgpu.h" + #include "amdgpu_ras.h" + #include "amdgpu_atomfirmware.h" +@@ -64,6 +66,9 @@ const char *ras_block_string[] = { + /* inject address is 52 bits */ + #define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) + ++ ++atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); ++ + static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, + struct amdgpu_bo **bo_ptr); +@@ -188,6 +193,10 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, + + return 0; + } ++ ++static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, ++ struct ras_common_if *head); ++ + /** + * DOC: AMDGPU RAS debugfs control interface + * +@@ -627,12 +636,14 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, + info->ue_count = obj->err_data.ue_count; + info->ce_count = obj->err_data.ce_count; + +- if (err_data.ce_count) ++ if (err_data.ce_count) { + dev_info(adev->dev, "%ld correctable errors detected in %s block\n", + obj->err_data.ce_count, ras_block_str(info->head.block)); +- if (err_data.ue_count) ++ } ++ if (err_data.ue_count) { + dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", + obj->err_data.ue_count, ras_block_str(info->head.block)); ++ } + + return 0; + } +@@ -1729,3 +1740,10 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) + + return 0; + } ++ ++void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) ++{ ++ if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { ++ DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected! Stopping all GPU jobs.\n"); ++ } ++} +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +index 66b71525446e..6fda96b29f1f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +@@ -606,4 +606,14 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, + + int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, + struct ras_dispatch_if *info); ++ ++extern atomic_t amdgpu_ras_in_intr; ++ ++static inline bool amdgpu_ras_intr_triggered(void) ++{ ++ return !!atomic_read(&amdgpu_ras_in_intr); ++} ++ ++void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); ++ + #endif +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +index 384fc226ecb5..918eaeedb5b9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -5683,10 +5683,12 @@ static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) + { + /* TODO ue will trigger an interrupt. */ +- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); +- if (adev->gfx.funcs->query_ras_error_count) +- adev->gfx.funcs->query_ras_error_count(adev, err_data); +- amdgpu_ras_reset_gpu(adev, 0); ++ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { ++ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); ++ if (adev->gfx.funcs->query_ras_error_count) ++ adev->gfx.funcs->query_ras_error_count(adev, err_data); ++ amdgpu_ras_reset_gpu(adev, 0); ++ } + return AMDGPU_RAS_SUCCESS; + } + +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +index 4a19647edfea..617311db7d2e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +@@ -243,18 +243,20 @@ static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct amdgpu_iv_entry *entry) + { +- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); +- if (adev->umc.funcs->query_ras_error_count) +- adev->umc.funcs->query_ras_error_count(adev, err_data); +- /* umc query_ras_error_address is also responsible for clearing +- * error status +- */ +- if (adev->umc.funcs->query_ras_error_address) +- adev->umc.funcs->query_ras_error_address(adev, err_data); ++ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { ++ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); ++ if (adev->umc.funcs->query_ras_error_count) ++ adev->umc.funcs->query_ras_error_count(adev, err_data); ++ /* umc query_ras_error_address is also responsible for clearing ++ * error status ++ */ ++ if (adev->umc.funcs->query_ras_error_address) ++ adev->umc.funcs->query_ras_error_address(adev, err_data); + +- /* only uncorrectable error needs gpu reset */ +- if (err_data->ue_count) +- amdgpu_ras_reset_gpu(adev, 0); ++ /* only uncorrectable error needs gpu reset */ ++ if (err_data->ue_count) ++ amdgpu_ras_reset_gpu(adev, 0); ++ } + + return AMDGPU_RAS_SUCCESS; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +index 5e784bbd2d7f..27eeab143ad7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c ++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +@@ -30,6 +30,7 @@ + #include "nbio/nbio_7_4_0_smn.h" + #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" + #include <uapi/linux/kfd_ioctl.h> ++#include "amdgpu_ras.h" + + #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c + +@@ -329,6 +330,8 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device + BIF_DOORBELL_INT_CNTL, + RAS_CNTLR_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); ++ ++ amdgpu_ras_global_ras_isr(adev); + } + } + +@@ -344,6 +347,8 @@ static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_d + BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); ++ ++ amdgpu_ras_global_ras_isr(adev); + } + } + +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +index 72840582f716..6424723e1af0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +@@ -1978,24 +1978,26 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + uint32_t err_source; + int instance; + +- instance = sdma_v4_0_irq_id_to_seq(entry->client_id); +- if (instance < 0) +- return 0; ++ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { ++ instance = sdma_v4_0_irq_id_to_seq(entry->client_id); ++ if (instance < 0) ++ return 0; + +- switch (entry->src_id) { +- case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: +- err_source = 0; +- break; +- case SDMA0_4_0__SRCID__SDMA_ECC: +- err_source = 1; +- break; +- default: +- return 0; +- } ++ switch (entry->src_id) { ++ case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: ++ err_source = 0; ++ break; ++ case SDMA0_4_0__SRCID__SDMA_ECC: ++ err_source = 1; ++ break; ++ default: ++ return 0; ++ } + +- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); ++ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + +- amdgpu_ras_reset_gpu(adev, 0); ++ amdgpu_ras_reset_gpu(adev, 0); ++ } + + return AMDGPU_RAS_SUCCESS; + } +-- +2.17.1 + |