aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2040-drm-amdgpu-fix-a-race-in-GPU-reset-with-IB-test-v2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2040-drm-amdgpu-fix-a-race-in-GPU-reset-with-IB-test-v2.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2040-drm-amdgpu-fix-a-race-in-GPU-reset-with-IB-test-v2.patch266
1 files changed, 266 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2040-drm-amdgpu-fix-a-race-in-GPU-reset-with-IB-test-v2.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2040-drm-amdgpu-fix-a-race-in-GPU-reset-with-IB-test-v2.patch
new file mode 100644
index 00000000..bc9a0c4f
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2040-drm-amdgpu-fix-a-race-in-GPU-reset-with-IB-test-v2.patch
@@ -0,0 +1,266 @@
+From d303ea8dbbb910cd6b02367b063a2a908cf26e15 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Tue, 28 May 2019 14:17:25 -0500
+Subject: [PATCH 2040/2940] drm/amdgpu: fix a race in GPU reset with IB test
+ (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Split late_init into two functions, one (do_late_init) which
+just does the hw init, and late_init which calls do_late_init
+and schedules the IB test work. Call do_late_init in
+the GPU reset code to run the init code, but not schedule
+the IB test code. The IB test code is called directly
+in the gpu reset code so no need to run the IB tests
+in a separate work thread. If we do, we end up racing.
+
+v2: Rework late_init. Pull out the mgpu fan boost and xgmi
+pstate code into late_init so they get called in all cases.
+rename the late_init worker thread to delayed work since it's
+just the IB tests now which can happen later. Schedule the
+work at init and resume time. It's not needed at reset time
+because the IB tests are called directly.
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Cc: Xinhui Pan <xinhui.pan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Chaudhary Amit Kumar <Chaudharyamit.Kumar@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 116 +++++++++++----------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
+ 3 files changed, 61 insertions(+), 59 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index e79bcfc1499c..b99fef62357f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -961,7 +961,7 @@ struct amdgpu_device {
+ const struct amdgpu_df_funcs *df_funcs;
+
+ /* delayed work_func for deferring clockgating during resume */
+- struct delayed_work late_init_work;
++ struct delayed_work delayed_init_work;
+
+ struct amdgpu_virt virt;
+ /* firmware VRAM reservation */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 1228adcfab49..84a9f0745d77 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -1904,6 +1904,43 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
+ return 0;
+ }
+
++static int amdgpu_device_enable_mgpu_fan_boost(void)
++{
++ struct amdgpu_gpu_instance *gpu_ins;
++ struct amdgpu_device *adev;
++ int i, ret = 0;
++
++ mutex_lock(&mgpu_info.mutex);
++
++ /*
++ * MGPU fan boost feature should be enabled
++ * only when there are two or more dGPUs in
++ * the system
++ */
++ if (mgpu_info.num_dgpu < 2)
++ goto out;
++
++ for (i = 0; i < mgpu_info.num_dgpu; i++) {
++ gpu_ins = &(mgpu_info.gpu_ins[i]);
++ adev = gpu_ins->adev;
++ if (!(adev->flags & AMD_IS_APU) &&
++ !gpu_ins->mgpu_fan_enabled &&
++ adev->powerplay.pp_funcs &&
++ adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
++ ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
++ if (ret)
++ break;
++
++ gpu_ins->mgpu_fan_enabled = 1;
++ }
++ }
++
++out:
++ mutex_unlock(&mgpu_info.mutex);
++
++ return ret;
++}
++
+ /**
+ * amdgpu_device_ip_late_init - run late init for hardware IPs
+ *
+@@ -1937,11 +1974,15 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+- queue_delayed_work(system_wq, &adev->late_init_work,
+- msecs_to_jiffies(AMDGPU_RESUME_MS));
+-
+ amdgpu_device_fill_reset_magic(adev);
+
++ r = amdgpu_device_enable_mgpu_fan_boost();
++ if (r)
++ DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
++
++ /* set to low pstate by default */
++ amdgpu_xgmi_set_pstate(adev, 0);
++
+ return 0;
+ }
+
+@@ -2040,65 +2081,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+ return 0;
+ }
+
+-static int amdgpu_device_enable_mgpu_fan_boost(void)
+-{
+- struct amdgpu_gpu_instance *gpu_ins;
+- struct amdgpu_device *adev;
+- int i, ret = 0;
+-
+- mutex_lock(&mgpu_info.mutex);
+-
+- /*
+- * MGPU fan boost feature should be enabled
+- * only when there are two or more dGPUs in
+- * the system
+- */
+- if (mgpu_info.num_dgpu < 2)
+- goto out;
+-
+- for (i = 0; i < mgpu_info.num_dgpu; i++) {
+- gpu_ins = &(mgpu_info.gpu_ins[i]);
+- adev = gpu_ins->adev;
+- if (!(adev->flags & AMD_IS_APU) &&
+- !gpu_ins->mgpu_fan_enabled &&
+- adev->powerplay.pp_funcs &&
+- adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
+- ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+- if (ret)
+- break;
+-
+- gpu_ins->mgpu_fan_enabled = 1;
+- }
+- }
+-
+-out:
+- mutex_unlock(&mgpu_info.mutex);
+-
+- return ret;
+-}
+-
+ /**
+- * amdgpu_device_ip_late_init_func_handler - work handler for ib test
++ * amdgpu_device_delayed_init_work_handler - work handler for IB tests
+ *
+ * @work: work_struct.
+ */
+-static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
++static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
+ {
+ struct amdgpu_device *adev =
+- container_of(work, struct amdgpu_device, late_init_work.work);
++ container_of(work, struct amdgpu_device, delayed_init_work.work);
+ int r;
+
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ DRM_ERROR("ib ring test failed (%d).\n", r);
+-
+- r = amdgpu_device_enable_mgpu_fan_boost();
+- if (r)
+- DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+-
+- /*set to low pstate by default */
+- amdgpu_xgmi_set_pstate(adev, 0);
+-
+ }
+
+ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
+@@ -2571,8 +2567,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ INIT_LIST_HEAD(&adev->ring_lru_list);
+ spin_lock_init(&adev->ring_lru_list_lock);
+
+- INIT_DELAYED_WORK(&adev->late_init_work,
+- amdgpu_device_ip_late_init_func_handler);
++ INIT_DELAYED_WORK(&adev->delayed_init_work,
++ amdgpu_device_delayed_init_work_handler);
+ INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
+ amdgpu_device_delay_enable_gfx_off);
+
+@@ -2806,6 +2802,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ /* must succeed. */
+ amdgpu_ras_resume(adev);
+
++ queue_delayed_work(system_wq, &adev->delayed_init_work,
++ msecs_to_jiffies(AMDGPU_RESUME_MS));
++
+ r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
+ if (r) {
+ dev_err(adev->dev, "Could not create pcie_replay_count");
+@@ -2853,7 +2852,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
+ adev->firmware.gpu_info_fw = NULL;
+ }
+ adev->accel_working = false;
+- cancel_delayed_work_sync(&adev->late_init_work);
++ cancel_delayed_work_sync(&adev->delayed_init_work);
+ /* free i2c buses */
+ if (!amdgpu_device_has_dc_support(adev))
+ amdgpu_i2c_fini(adev);
+@@ -2916,7 +2915,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
+ if (fbcon)
+ amdgpu_fbdev_set_suspend(adev, 1);
+
+- cancel_delayed_work_sync(&adev->late_init_work);
++ cancel_delayed_work_sync(&adev->delayed_init_work);
+
+ if (!amdgpu_device_has_dc_support(adev)) {
+ /* turn off display hw */
+@@ -3036,6 +3035,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
+ if (r)
+ return r;
+
++ queue_delayed_work(system_wq, &adev->delayed_init_work,
++ msecs_to_jiffies(AMDGPU_RESUME_MS));
++
+ if (!amdgpu_device_has_dc_support(adev)) {
+ /* pin cursors */
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+@@ -3059,7 +3061,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
+ return r;
+
+ /* Make sure IB tests flushed */
+- flush_delayed_work(&adev->late_init_work);
++ flush_delayed_work(&adev->delayed_init_work);
+
+ /* blat the mode back in */
+ if (fbcon) {
+@@ -3647,7 +3649,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+
+ dev_info(adev->dev, "GPU reset begin!\n");
+
+- cancel_delayed_work_sync(&adev->late_init_work);
++ cancel_delayed_work_sync(&adev->delayed_init_work);
+
+ hive = amdgpu_get_xgmi_hive(adev, false);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index dfa59c31e5e0..2cdaacef0226 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -1003,7 +1003,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
+ int r, pasid;
+
+ /* Ensure IB tests are run on ring */
+- flush_delayed_work(&adev->late_init_work);
++ flush_delayed_work(&adev->delayed_init_work);
+
+ file_priv->driver_priv = NULL;
+
+--
+2.17.1
+