From e3a7a6666e4bc09e679b68a0dd43113e595c03c6 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 10 Sep 2019 11:13:39 +0800 Subject: [PATCH 3770/4256] drm/amdgpu: initialize ras structures for xgmi block (v2) init ras common interface and fs node for xgmi block v2: remove unnecessary physical node number check before invoking amdgpu_xgmi_ras_late_init Signed-off-by: Hawking Zhang Reviewed-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 36 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++- 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e03351f0ba5a..df012766d907 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -120,6 +120,7 @@ struct amdgpu_xgmi { /* gpu list in the same hive */ struct list_head head; bool supported; + struct ras_common_if *ras_if; }; struct amdgpu_gmc { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 83be108f57bd..0fa1d2416b57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "amdgpu_smu.h" +#include "amdgpu_ras.h" #include "df/df_3_6_offset.h" static DEFINE_MUTEX(xgmi_mutex); @@ -436,3 +437,38 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) mutex_unlock(&hive->hive_lock); } } + +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "xgmi_wafl_err_count", + .debugfs_name = "xgmi_wafl_err_inject", + }; + + if (!adev->gmc.xgmi.supported || + adev->gmc.xgmi.num_physical_nodes == 0) + return 0; + + if (!adev->gmc.xgmi.ras_if) { + adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gmc.xgmi.ras_if) + return -ENOMEM; + adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; + adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gmc.xgmi.ras_if->sub_block_index = 0; + strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); + } + ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; + r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) { + kfree(adev->gmc.xgmi.ras_if); + adev->gmc.xgmi.ras_if = NULL; + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index fbcee31788c4..9023789397c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -42,6 +42,7 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 64cc3068873a..3dde208fa0c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -51,6 +51,7 @@ #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" #include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d @@ -804,7 +805,8 @@ static int gmc_v9_0_ecc_late_init(void *handle) if (r) return r; } - return 0; + + return amdgpu_xgmi_ras_late_init(adev); } static int gmc_v9_0_late_init(void *handle) -- 2.17.1