aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1913-drm-amdgpu-fix-vf-error-handling.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1913-drm-amdgpu-fix-vf-error-handling.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1913-drm-amdgpu-fix-vf-error-handling.patch266
1 files changed, 266 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1913-drm-amdgpu-fix-vf-error-handling.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1913-drm-amdgpu-fix-vf-error-handling.patch
new file mode 100644
index 00000000..22db5024
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1913-drm-amdgpu-fix-vf-error-handling.patch
@@ -0,0 +1,266 @@
+From 19681fc835b0ded70494c9663fc0a900e93da514 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Thu, 28 Sep 2017 09:47:32 -0400
+Subject: [PATCH 1913/4131] drm/amdgpu: fix vf error handling
+
+The error handling for virtual functions assumed a single
+vf per VM and didn't properly account for bare metal. Make
+the error arrays per device and add locking.
+
+Reviewed-by: Gavin Wan <gavin.wan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 ++++++------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c | 54 +++++++++++++---------------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h | 5 ++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 13 +++++++
+ 4 files changed, 54 insertions(+), 41 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index cda0a88..b3d12c2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -2119,6 +2119,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ mutex_init(&adev->srbm_mutex);
+ mutex_init(&adev->grbm_idx_mutex);
+ mutex_init(&adev->mn_lock);
++ mutex_init(&adev->virt.vf_errors.lock);
+ hash_init(adev->mn_hash);
+
+ amdgpu_check_arguments(adev);
+@@ -2203,7 +2204,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+ goto failed;
+ }
+
+@@ -2214,7 +2215,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ if (amdgpu_vpost_needed(adev)) {
+ if (!adev->bios) {
+ dev_err(adev->dev, "no vBIOS found\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
+ r = -EINVAL;
+ goto failed;
+ }
+@@ -2222,7 +2223,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ if (r) {
+ dev_err(adev->dev, "gpu post error!\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
+ goto failed;
+ }
+ } else {
+@@ -2234,7 +2235,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_atomfirmware_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ } else {
+@@ -2242,7 +2243,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_atombios_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ /* init i2c buses */
+@@ -2254,7 +2255,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_fence_driver_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
+ goto failed;
+ }
+
+@@ -2264,7 +2265,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_init failed\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
+ amdgpu_fini(adev);
+ goto failed;
+ }
+@@ -2284,7 +2285,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_ib_pool_init(adev);
+ if (r) {
+ dev_err(adev->dev, "IB initialization failed (%d).\n", r);
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
+ goto failed;
+ }
+
+@@ -2337,7 +2338,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_late_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_late_init failed\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
+ goto failed;
+ }
+
+@@ -3046,7 +3047,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
+ }
+ } else {
+ dev_err(adev->dev, "asic resume failed (%d).\n", r);
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ if (adev->rings[i] && adev->rings[i]->sched.thread) {
+ kthread_unpark(adev->rings[i]->sched.thread);
+@@ -3064,7 +3065,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
+ if (r) {
+ /* bad news, how to tell it to userspace ? */
+ dev_info(adev->dev, "GPU reset failed\n");
+- amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
++ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+ }
+ else {
+ dev_info(adev->dev, "GPU reset successed!\n");
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
+index 45ac918..746b813 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
+@@ -25,30 +25,21 @@
+ #include "amdgpu_vf_error.h"
+ #include "mxgpu_ai.h"
+
+-#define AMDGPU_VF_ERROR_ENTRY_SIZE 16
+-
+-/* struct error_entry - amdgpu VF error information. */
+-struct amdgpu_vf_error_buffer {
+- int read_count;
+- int write_count;
+- uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
+- uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
+- uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
+-};
+-
+-struct amdgpu_vf_error_buffer admgpu_vf_errors;
+-
+-
+-void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data)
++void amdgpu_vf_error_put(struct amdgpu_device *adev,
++ uint16_t sub_error_code,
++ uint16_t error_flags,
++ uint64_t error_data)
+ {
+ int index;
+ uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
+
+- index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+- admgpu_vf_errors.code [index] = error_code;
+- admgpu_vf_errors.flags [index] = error_flags;
+- admgpu_vf_errors.data [index] = error_data;
+- admgpu_vf_errors.write_count ++;
++ mutex_lock(&adev->virt.vf_errors.lock);
++ index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
++ adev->virt.vf_errors.code [index] = error_code;
++ adev->virt.vf_errors.flags [index] = error_flags;
++ adev->virt.vf_errors.data [index] = error_data;
++ adev->virt.vf_errors.write_count ++;
++ mutex_unlock(&adev->virt.vf_errors.lock);
+ }
+
+
+@@ -58,7 +49,8 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
+ u32 data1, data2, data3;
+ int index;
+
+- if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
++ if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) ||
++ (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
+ return;
+ }
+ /*
+@@ -68,18 +60,22 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
+ return;
+ }
+ */
++
++ mutex_lock(&adev->virt.vf_errors.lock);
+ /* The errors are overlay of array, correct read_count as full. */
+- if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
+- admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
++ if (adev->virt.vf_errors.write_count - adev->virt.vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
++ adev->virt.vf_errors.read_count = adev->virt.vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
+ }
+
+- while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) {
+- index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+- data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]);
+- data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF;
+- data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF;
++ while (adev->virt.vf_errors.read_count < adev->virt.vf_errors.write_count) {
++ index =adev->virt.vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
++ data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(adev->virt.vf_errors.code[index],
++ adev->virt.vf_errors.flags[index]);
++ data2 = adev->virt.vf_errors.data[index] & 0xFFFFFFFF;
++ data3 = (adev->virt.vf_errors.data[index] >> 32) & 0xFFFFFFFF;
+
+ adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
+- admgpu_vf_errors.read_count ++;
++ adev->virt.vf_errors.read_count ++;
+ }
++ mutex_unlock(&adev->virt.vf_errors.lock);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
+index 2a3278e..6436bd0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
+@@ -56,7 +56,10 @@ enum AMDGIM_ERROR_CATEGORY {
+ AMDGIM_ERROR_CATEGORY_MAX
+ };
+
+-void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data);
++void amdgpu_vf_error_put(struct amdgpu_device *adev,
++ uint16_t sub_error_code,
++ uint16_t error_flags,
++ uint64_t error_data);
+ void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
+
+ #endif /* __VF_ERROR_H__ */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+index afcfb8b..e5fd0ff 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+@@ -36,6 +36,18 @@ struct amdgpu_mm_table {
+ uint64_t gpu_addr;
+ };
+
++#define AMDGPU_VF_ERROR_ENTRY_SIZE 16
++
++/* struct error_entry - amdgpu VF error information. */
++struct amdgpu_vf_error_buffer {
++ struct mutex lock;
++ int read_count;
++ int write_count;
++ uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
++ uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
++ uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
++};
++
+ /**
+ * struct amdgpu_virt_ops - amdgpu device virt operations
+ */
+@@ -59,6 +71,7 @@ struct amdgpu_virt {
+ struct work_struct flr_work;
+ struct amdgpu_mm_table mm_table;
+ const struct amdgpu_virt_ops *ops;
++ struct amdgpu_vf_error_buffer vf_errors;
+ };
+
+ #define AMDGPU_CSA_SIZE (8 * 1024)
+--
+2.7.4
+