From 4f712c43a25f97247ad8b2aaa32c255b09704004 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 31 Oct 2019 14:15:29 +0800 Subject: [PATCH 4362/4736] drm/amdgpu: fix possible pstate switch race condition Added lock protection so that the p-state switch will be guarded to be sequential. Also update the hive pstate only all device from the hive are in the same state. Change-Id: I165a6f44e8aec1e6da56eefa0fc49d36670e56fe Signed-off-by: Evan Quan Reviewed-by: Feifei Xu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 34 ++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2eb3a6bcbd6c..715739799383 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1025,6 +1025,9 @@ struct amdgpu_device { uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; + + /* device pstate */ + int pstate; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 44a0ee91b42d..e58bad7f64c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_device *tmp_adev; + bool update_hive_pstate = true; if (!hive) return 0; - if (hive->pstate == pstate) + mutex_lock(&hive->hive_lock); + + if (hive->pstate == pstate) { + mutex_unlock(&hive->hive_lock); return 0; + } dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); @@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, pstate); - if (ret) + if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); + goto out; + } + + /* Update device pstate */ + adev->pstate = pstate; + + /* + * Update the hive pstate only all devices of the hive + * are in the same pstate + */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + if (tmp_adev->pstate != adev->pstate) { + update_hive_pstate = false; + break; + } + } + if (update_hive_pstate) + hive->pstate = pstate; + +out: + mutex_unlock(&hive->hive_lock); return ret; } @@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } + /* Set default device pstate */ + adev->pstate = -1; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); -- 2.17.1