diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1050-drm-amd-amdgpu-add-missing-mutex-lock-to-amdgpu_get_.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.19.8/1050-drm-amd-amdgpu-add-missing-mutex-lock-to-amdgpu_get_.patch | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1050-drm-amd-amdgpu-add-missing-mutex-lock-to-amdgpu_get_.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1050-drm-amd-amdgpu-add-missing-mutex-lock-to-amdgpu_get_.patch new file mode 100644 index 00000000..55d40aa7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1050-drm-amd-amdgpu-add-missing-mutex-lock-to-amdgpu_get_.patch @@ -0,0 +1,161 @@ +From 1c3f7f5e23d4b44cf0dd69f6687630a473a29e03 Mon Sep 17 00:00:00 2001 +From: Tom St Denis <tom.stdenis@amd.com> +Date: Mon, 7 Jan 2019 17:39:10 -0500 +Subject: [PATCH 1050/2940] drm/amd/amdgpu: add missing mutex lock to + amdgpu_get_xgmi_hive() (v3) + +v2: Move locks around in other functions so that this +function can stand on its own. Also only hold the hive +specific lock for add/remove device instead of the driver +global lock so you can't add/remove devices in parallel from +one hive. + +v3: add reset_lock + +Acked-by: Shaoyun.liu < Shaoyun.liu@amd.com> +Signed-off-by: Tom St Denis <tom.stdenis@amd.com> +Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 40 ++++++++++++++-------- + drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 5 +-- + 3 files changed, 32 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 1dbd85ee4492..2c9617b7d20f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -3574,9 +3574,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + * by different nodes. No point also since the one node already executing + * reset will also reset all the other nodes in the hive. + */ +- hive = amdgpu_get_xgmi_hive(adev); ++ hive = amdgpu_get_xgmi_hive(adev, 0); + if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && +- !mutex_trylock(&hive->hive_lock)) ++ !mutex_trylock(&hive->reset_lock)) + return 0; + + /* Start with adev pre asic reset first for soft reset check.*/ +@@ -3655,7 +3655,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + } + + if (hive && adev->gmc.xgmi.num_physical_nodes > 1) +- mutex_unlock(&hive->hive_lock); ++ mutex_unlock(&hive->reset_lock); + + if (r) + dev_info(adev->dev, "GPU reset end with ret = %d\n", r); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +index ac57a8767283..dac187454b33 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +@@ -40,26 +40,40 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) + return &hive->device_list; + } + +-struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) ++struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock) + { + int i; + struct amdgpu_hive_info *tmp; + + if (!adev->gmc.xgmi.hive_id) + return NULL; ++ ++ mutex_lock(&xgmi_mutex); ++ + for (i = 0 ; i < hive_count; ++i) { + tmp = &xgmi_hives[i]; +- if (tmp->hive_id == adev->gmc.xgmi.hive_id) ++ if (tmp->hive_id == adev->gmc.xgmi.hive_id) { ++ if (lock) ++ mutex_lock(&tmp->hive_lock); ++ mutex_unlock(&xgmi_mutex); + return tmp; ++ } + } +- if (i >= AMDGPU_MAX_XGMI_HIVE) ++ if (i >= AMDGPU_MAX_XGMI_HIVE) { ++ mutex_unlock(&xgmi_mutex); + return NULL; ++ } + + /* initialize new hive if not exist */ + tmp = &xgmi_hives[hive_count++]; + tmp->hive_id = adev->gmc.xgmi.hive_id; + INIT_LIST_HEAD(&tmp->device_list); + mutex_init(&tmp->hive_lock); ++ mutex_init(&tmp->reset_lock); ++ if (lock) ++ mutex_lock(&tmp->hive_lock); ++ ++ mutex_unlock(&xgmi_mutex); + + return tmp; + } +@@ -111,8 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) + return ret; + } + +- mutex_lock(&xgmi_mutex); +- hive = amdgpu_get_xgmi_hive(adev); ++ hive = amdgpu_get_xgmi_hive(adev, 1); + if (!hive) { + ret = -EINVAL; + dev_err(adev->dev, +@@ -147,8 +160,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) + break; + } + ++ mutex_unlock(&hive->hive_lock); + exit: +- mutex_unlock(&xgmi_mutex); + return ret; + } + +@@ -159,15 +172,14 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) + if (!adev->gmc.xgmi.supported) + return; + +- mutex_lock(&xgmi_mutex); +- +- hive = amdgpu_get_xgmi_hive(adev); ++ hive = amdgpu_get_xgmi_hive(adev, 1); + if (!hive) +- goto exit; ++ return; + +- if (!(hive->number_devices--)) ++ if (!(hive->number_devices--)) { + mutex_destroy(&hive->hive_lock); +- +-exit: +- mutex_unlock(&xgmi_mutex); ++ mutex_destroy(&hive->reset_lock); ++ } else { ++ mutex_unlock(&hive->hive_lock); ++ } + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +index 6151eb9c8ad3..14bc60664159 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +@@ -29,10 +29,11 @@ struct amdgpu_hive_info { + struct list_head device_list; + struct psp_xgmi_topology_info topology_info; + int number_devices; +- struct mutex hive_lock; ++ struct mutex hive_lock, ++ reset_lock; + }; + +-struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); ++struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); + int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); + int amdgpu_xgmi_add_device(struct amdgpu_device *adev); + void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); +-- +2.17.1 + |