aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1183-drm-amdkfd-Fix-IB-freeing-without-DIQ-synchronizatio.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1183-drm-amdkfd-Fix-IB-freeing-without-DIQ-synchronizatio.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1183-drm-amdkfd-Fix-IB-freeing-without-DIQ-synchronizatio.patch206
1 files changed, 206 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1183-drm-amdkfd-Fix-IB-freeing-without-DIQ-synchronizatio.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1183-drm-amdkfd-Fix-IB-freeing-without-DIQ-synchronizatio.patch
new file mode 100644
index 00000000..2425af38
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1183-drm-amdkfd-Fix-IB-freeing-without-DIQ-synchronizatio.patch
@@ -0,0 +1,206 @@
+From 253a625ea536b69eaef961ce1c461f1521ed565b Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Wed, 7 Sep 2016 18:06:52 -0400
+Subject: [PATCH 1183/4131] drm/amdkfd: Fix IB freeing without DIQ
+ synchronization
+
+When DIQ IBs are submitted without synchronization, it's not safe
+to release the IB memory. Avoid the need to explicitly free the IB
+by allocating it inline in the ring buffer, packaged inside a NOP
+packet.
+
+Change-Id: Ife4d527fbcca369bdb45d5a09b1ae72da3231045
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 25 ++++++++-----------
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 36 ++++++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 10 ++++++++
+ 3 files changed, 55 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+index 74109d0..9de73ce 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+@@ -373,8 +373,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ /* we do not control the vmid in DIQ mode, just a place holder */
+ unsigned int vmid = 0;
+
+- struct kfd_mem_obj *mem_obj;
+ uint32_t *packet_buff_uint = NULL;
++ uint64_t packet_buff_gpu_addr = 0;
+
+ struct pm4__set_config_reg *packets_vec = NULL;
+
+@@ -398,13 +398,13 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ break;
+ }
+
+- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
++ status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
++ ib_size/sizeof(uint32_t),
++ &packet_buff_uint, &packet_buff_gpu_addr);
+
+ if (status != 0)
+ break;
+
+- packet_buff_uint = mem_obj->cpu_ptr;
+-
+ memset(packet_buff_uint, 0, ib_size);
+
+ packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
+@@ -499,7 +499,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ status = dbgdev_diq_submit_ib(
+ dbgdev,
+ adw_info->process->pasid,
+- mem_obj->gpu_addr,
++ packet_buff_gpu_addr,
+ packet_buff_uint,
+ ib_size, true);
+
+@@ -511,8 +511,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ }
+
+ } while (false);
+- if (packet_buff_uint != NULL)
+- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+ return status;
+
+@@ -632,8 +630,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+ int status = 0;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+- struct kfd_mem_obj *mem_obj;
+ uint32_t *packet_buff_uint = NULL;
++ uint64_t packet_buff_gpu_addr = 0;
+ struct pm4__set_config_reg *packets_vec = NULL;
+ size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
+
+@@ -674,13 +672,13 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
++ status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
++ ib_size / sizeof(uint32_t),
++ &packet_buff_uint, &packet_buff_gpu_addr);
+
+ if (status != 0)
+ break;
+
+- packet_buff_uint = mem_obj->cpu_ptr;
+-
+ memset(packet_buff_uint, 0, ib_size);
+
+ packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
+@@ -715,7 +713,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+ status = dbgdev_diq_submit_ib(
+ dbgdev,
+ wac_info->process->pasid,
+- mem_obj->gpu_addr,
++ packet_buff_gpu_addr,
+ packet_buff_uint,
+ ib_size, false);
+
+@@ -724,9 +722,6 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+
+ } while (false);
+
+- if (packet_buff_uint != NULL)
+- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+-
+ return status;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+index 9eaa040..162a83f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+@@ -219,7 +219,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
+ * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
+ */
+ rptr = *kq->rptr_kernel;
+- wptr = *kq->wptr_kernel;
++ wptr = kq->pending_wptr;
+ queue_address = (unsigned int *)kq->pq_kernel_addr;
+ queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
+
+@@ -258,6 +258,39 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
+ return 0;
+ }
+
++static int acquire_inline_ib(struct kernel_queue *kq,
++ size_t size_in_dwords,
++ unsigned int **buffer_ptr,
++ uint64_t *gpu_addr)
++{
++ int ret;
++ unsigned int *buf;
++ union PM4_MES_TYPE_3_HEADER nop;
++
++ if (size_in_dwords >= (1 << 14))
++ return -EINVAL;
++
++ /* Allocate size_in_dwords on the ring, plus an extra dword
++ * for a NOP packet header
++ */
++ ret = acquire_packet_buffer(kq, size_in_dwords + 1, &buf);
++ if (ret)
++ return ret;
++
++ /* Build a NOP packet that contains the IB as "payload". */
++ nop.u32all = 0;
++ nop.opcode = IT_NOP;
++ nop.count = size_in_dwords - 1;
++ nop.type = PM4_TYPE_3;
++
++ *buf = nop.u32all;
++ *buffer_ptr = buf + 1;
++ *gpu_addr = kq->pq_gpu_addr + ((unsigned long)*buffer_ptr -
++ (unsigned long)kq->pq_kernel_addr);
++
++ return 0;
++}
++
+ static void submit_packet(struct kernel_queue *kq)
+ {
+ #ifdef DEBUG
+@@ -300,6 +333,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
+ kq->ops.initialize = initialize;
+ kq->ops.uninitialize = uninitialize;
+ kq->ops.acquire_packet_buffer = acquire_packet_buffer;
++ kq->ops.acquire_inline_ib = acquire_inline_ib;
+ kq->ops.submit_packet = submit_packet;
+ kq->ops.rollback_packet = rollback_packet;
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+index 5940531..a217f42 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+@@ -42,6 +42,12 @@
+ * pending write pointer to that location so subsequent calls to
+ * acquire_packet_buffer will get a correct write pointer
+ *
++ * @acquire_inline_ib: Returns a pointer to the location in the kernel
++ * queue ring buffer where the calling function can write an inline IB. It is
++ * Guaranteed that there is enough space for that IB. It also updates the
++ * pending write pointer to that location so subsequent calls to
++ * acquire_packet_buffer will get a correct write pointer
++ *
+ * @submit_packet: Update the write pointer and doorbell of a kernel queue.
+ *
+ * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
+@@ -59,6 +65,10 @@ struct kernel_queue_ops {
+ int (*acquire_packet_buffer)(struct kernel_queue *kq,
+ size_t packet_size_in_dwords,
+ unsigned int **buffer_ptr);
++ int (*acquire_inline_ib)(struct kernel_queue *kq,
++ size_t packet_size_in_dwords,
++ unsigned int **buffer_ptr,
++ uint64_t *gpu_addr);
+
+ void (*submit_packet)(struct kernel_queue *kq);
+ void (*rollback_packet)(struct kernel_queue *kq);
+--
+2.7.4
+