aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch417
1 files changed, 417 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch
new file mode 100644
index 00000000..721de83b
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch
@@ -0,0 +1,417 @@
+From 858f55ddeac45eaca40516c74fd835777960e429 Mon Sep 17 00:00:00 2001
+From: Yong Zhao <Yong.Zhao@amd.com>
+Date: Fri, 8 Nov 2019 00:30:49 -0500
+Subject: [PATCH 4606/4736] drm/amdkfd: Eliminate unnecessary kernel queue
+ function pointers v2
+
+Up to this point, those functions are all the same for all ASICs, so
+no need to call them by functions pointers. Removing the function
+pointers will greatly increase the code readablity. If there is ever
+need for those function pointers, we can add it back then.
+
+v2: Adapt for amd-kfd-staging branch, which has acquire_inline_ib()
+exclusively.
+
+Change-Id: I9515fdece70110067cda66e2d24d6768b4846c2f
+Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 14 +++---
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 35 +++++++--------
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 44 ++++++-------------
+ .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 34 +++++++-------
+ 4 files changed, 54 insertions(+), 73 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+index 142ac7954032..3e5904f8876a 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+@@ -74,11 +74,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+ * The receive packet buff will be sitting on the Indirect Buffer
+ * and in the PQ we put the IB packet + sync packet(s).
+ */
+- status = kq->ops.acquire_packet_buffer(kq,
++ status = kq_acquire_packet_buffer(kq,
+ pq_packets_size_in_bytes / sizeof(uint32_t),
+ &ib_packet_buff);
+ if (status) {
+- pr_err("acquire_packet_buffer failed\n");
++ pr_err("kq_acquire_packet_buffer failed\n");
+ return status;
+ }
+
+@@ -101,7 +101,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+ ib_packet->bitfields5.pasid = pasid;
+
+ if (!sync) {
+- kq->ops.submit_packet(kq);
++ kq_submit_packet(kq);
+ return status;
+ }
+
+@@ -122,7 +122,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+
+ if (status) {
+ pr_err("Failed to allocate GART memory\n");
+- kq->ops.rollback_packet(kq);
++ kq_rollback_packet(kq);
+ return status;
+ }
+
+@@ -158,7 +158,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+
+ rm_packet->data_lo = QUEUESTATE__ACTIVE;
+
+- kq->ops.submit_packet(kq);
++ kq_submit_packet(kq);
+
+ /* Wait till CP writes sync code: */
+ status = amdkfd_fence_wait_timeout(
+@@ -372,7 +372,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ return -EINVAL;
+ }
+
+- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
++ status = kq_acquire_inline_ib(dbgdev->kq,
+ ib_size/sizeof(uint32_t),
+ &packet_buff_uint, &packet_buff_gpu_addr);
+ if (status) {
+@@ -652,7 +652,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
++ status = kq_acquire_inline_ib(dbgdev->kq,
+ ib_size / sizeof(uint32_t),
+ &packet_buff_uint, &packet_buff_gpu_addr);
+ if (status) {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+index ca7e8d299c8b..236023ce1125 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+@@ -34,7 +34,10 @@
+
+ #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
+
+-static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
++/* Initialize a kernel queue, including allocations of GART memory
++ * needed for the queue.
++ */
++static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size)
+ {
+ struct queue_properties prop;
+@@ -88,7 +91,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+ kq->pq_gpu_addr = kq->pq->gpu_addr;
+
+ /* For CIK family asics, kq->eop_mem is not needed */
+- if (dev->device_info->asic_family > CHIP_HAWAII) {
++ if (dev->device_info->asic_family > CHIP_MULLINS) {
+ retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+ if (retval != 0)
+ goto err_eop_allocate_vidmem;
+@@ -192,7 +195,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+
+ }
+
+-static void uninitialize(struct kernel_queue *kq)
++/* Uninitialize a kernel queue and free all its memory usages. */
++static void kq_uninitialize(struct kernel_queue *kq)
+ {
+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
+ kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
+@@ -221,7 +225,7 @@ static void uninitialize(struct kernel_queue *kq)
+ uninit_queue(kq->queue);
+ }
+
+-static int acquire_packet_buffer(struct kernel_queue *kq,
++int kq_acquire_packet_buffer(struct kernel_queue *kq,
+ size_t packet_size_in_dwords, unsigned int **buffer_ptr)
+ {
+ size_t available_size;
+@@ -282,7 +286,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
+ return -ENOMEM;
+ }
+
+-static int acquire_inline_ib(struct kernel_queue *kq,
++int kq_acquire_inline_ib(struct kernel_queue *kq,
+ size_t size_in_dwords,
+ unsigned int **buffer_ptr,
+ uint64_t *gpu_addr)
+@@ -297,7 +301,7 @@ static int acquire_inline_ib(struct kernel_queue *kq,
+ /* Allocate size_in_dwords on the ring, plus an extra dword
+ * for a NOP packet header
+ */
+- ret = acquire_packet_buffer(kq, size_in_dwords + 1, &buf);
++ ret = kq_acquire_packet_buffer(kq, size_in_dwords + 1, &buf);
+ if (ret)
+ return ret;
+
+@@ -315,7 +319,7 @@ static int acquire_inline_ib(struct kernel_queue *kq,
+ return 0;
+ }
+
+-static void submit_packet(struct kernel_queue *kq)
++void kq_submit_packet(struct kernel_queue *kq)
+ {
+ #ifdef DEBUG
+ int i;
+@@ -338,7 +342,7 @@ static void submit_packet(struct kernel_queue *kq)
+ }
+ }
+
+-static void rollback_packet(struct kernel_queue *kq)
++void kq_rollback_packet(struct kernel_queue *kq)
+ {
+ if (kq->dev->device_info->doorbell_size == 8) {
+ kq->pending_wptr64 = *kq->wptr64_kernel;
+@@ -358,14 +362,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
+ if (!kq)
+ return NULL;
+
+- kq->ops.initialize = initialize;
+- kq->ops.uninitialize = uninitialize;
+- kq->ops.acquire_packet_buffer = acquire_packet_buffer;
+- kq->ops.acquire_inline_ib = acquire_inline_ib;
+- kq->ops.submit_packet = submit_packet;
+- kq->ops.rollback_packet = rollback_packet;
+-
+- if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
++ if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
+ return kq;
+
+ pr_err("Failed to init kernel queue\n");
+@@ -376,7 +373,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
+
+ void kernel_queue_uninit(struct kernel_queue *kq)
+ {
+- kq->ops.uninitialize(kq);
++ kq_uninitialize(kq);
+ kfree(kq);
+ }
+
+@@ -396,7 +393,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
+ return;
+ }
+
+- retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
++ retval = kq_acquire_packet_buffer(kq, 5, &buffer);
+ if (unlikely(retval != 0)) {
+ pr_err(" Failed to acquire packet buffer\n");
+ pr_err("Kernel queue test failed\n");
+@@ -404,7 +401,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
+ }
+ for (i = 0; i < 5; i++)
+ buffer[i] = kq->nop_packet;
+- kq->ops.submit_packet(kq);
++ kq_submit_packet(kq);
+
+ pr_err("Ending kernel queue test\n");
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+index 852de7466cc4..3e39dcb542df 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+@@ -29,54 +29,38 @@
+ #include "kfd_priv.h"
+
+ /**
+- * struct kernel_queue_ops
+- *
+- * @initialize: Initialize a kernel queue, including allocations of GART memory
+- * needed for the queue.
+- *
+- * @uninitialize: Uninitialize a kernel queue and free all its memory usages.
+- *
+- * @acquire_packet_buffer: Returns a pointer to the location in the kernel
++ * kq_acquire_packet_buffer: Returns a pointer to the location in the kernel
+ * queue ring buffer where the calling function can write its packet. It is
+ * Guaranteed that there is enough space for that packet. It also updates the
+ * pending write pointer to that location so subsequent calls to
+ * acquire_packet_buffer will get a correct write pointer
+ *
+- * @acquire_inline_ib: Returns a pointer to the location in the kernel
++ * kq_acquire_inline_ib: Returns a pointer to the location in the kernel
+ * queue ring buffer where the calling function can write an inline IB. It is
+ * Guaranteed that there is enough space for that IB. It also updates the
+ * pending write pointer to that location so subsequent calls to
+ * acquire_packet_buffer will get a correct write pointer
+ *
+- * @submit_packet: Update the write pointer and doorbell of a kernel queue.
+- *
+- * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
+- * queue are equal, which means the CP has read all the submitted packets.
++ * kq_submit_packet: Update the write pointer and doorbell of a kernel queue.
+ *
+- * @rollback_packet: This routine is called if we failed to build an acquired
++ * kq_rollback_packet: This routine is called if we failed to build an acquired
+ * packet for some reason. It just overwrites the pending wptr with the current
+ * one
+ *
+ */
+-struct kernel_queue_ops {
+- bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
+- enum kfd_queue_type type, unsigned int queue_size);
+- void (*uninitialize)(struct kernel_queue *kq);
+- int (*acquire_packet_buffer)(struct kernel_queue *kq,
+- size_t packet_size_in_dwords,
+- unsigned int **buffer_ptr);
+- int (*acquire_inline_ib)(struct kernel_queue *kq,
+- size_t packet_size_in_dwords,
+- unsigned int **buffer_ptr,
+- uint64_t *gpu_addr);
+
+- void (*submit_packet)(struct kernel_queue *kq);
+- void (*rollback_packet)(struct kernel_queue *kq);
+-};
++int kq_acquire_packet_buffer(struct kernel_queue *kq,
++ size_t packet_size_in_dwords,
++ unsigned int **buffer_ptr);
++int kq_acquire_inline_ib(struct kernel_queue *kq,
++ size_t size_in_dwords,
++ unsigned int **buffer_ptr,
++ uint64_t *gpu_addr);
++void kq_submit_packet(struct kernel_queue *kq);
++void kq_rollback_packet(struct kernel_queue *kq);
+
+-struct kernel_queue {
+- struct kernel_queue_ops ops;
+
++struct kernel_queue {
+ /* data */
+ struct kfd_dev *dev;
+ struct mqd_manager *mqd_mgr;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+index cbf83ed96dad..6ef4dc60852d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+@@ -280,7 +280,7 @@ int pm_send_set_resources(struct packet_manager *pm,
+
+ size = pm->pmf->set_resources_size;
+ mutex_lock(&pm->lock);
+- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
++ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+ if (!buffer) {
+@@ -291,9 +291,9 @@ int pm_send_set_resources(struct packet_manager *pm,
+
+ retval = pm->pmf->set_resources(pm, buffer, res);
+ if (!retval)
+- pm->priv_queue->ops.submit_packet(pm->priv_queue);
++ kq_submit_packet(pm->priv_queue);
+ else
+- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
++ kq_rollback_packet(pm->priv_queue);
+
+ out:
+ mutex_unlock(&pm->lock);
+@@ -318,7 +318,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+ packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
+ mutex_lock(&pm->lock);
+
+- retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
++ retval = kq_acquire_packet_buffer(pm->priv_queue,
+ packet_size_dwords, &rl_buffer);
+ if (retval)
+ goto fail_acquire_packet_buffer;
+@@ -328,14 +328,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+ if (retval)
+ goto fail_create_runlist;
+
+- pm->priv_queue->ops.submit_packet(pm->priv_queue);
++ kq_submit_packet(pm->priv_queue);
+
+ mutex_unlock(&pm->lock);
+
+ return retval;
+
+ fail_create_runlist:
+- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
++ kq_rollback_packet(pm->priv_queue);
+ fail_acquire_packet_buffer:
+ mutex_unlock(&pm->lock);
+ fail_create_runlist_ib:
+@@ -354,7 +354,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+
+ size = pm->pmf->query_status_size;
+ mutex_lock(&pm->lock);
+- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
++ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+@@ -364,9 +364,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+
+ retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
+ if (!retval)
+- pm->priv_queue->ops.submit_packet(pm->priv_queue);
++ kq_submit_packet(pm->priv_queue);
+ else
+- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
++ kq_rollback_packet(pm->priv_queue);
+
+ out:
+ mutex_unlock(&pm->lock);
+@@ -383,7 +383,7 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+ mutex_lock(&pm->lock);
+
+ if (size) {
+- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
++ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+
+@@ -395,9 +395,9 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+
+ retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
+ if (!retval)
+- pm->priv_queue->ops.submit_packet(pm->priv_queue);
++ kq_submit_packet(pm->priv_queue);
+ else
+- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
++ kq_rollback_packet(pm->priv_queue);
+ }
+
+ out:
+@@ -415,7 +415,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+
+ size = pm->pmf->unmap_queues_size;
+ mutex_lock(&pm->lock);
+- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
++ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+@@ -426,9 +426,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+ retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
+ reset, sdma_engine);
+ if (!retval)
+- pm->priv_queue->ops.submit_packet(pm->priv_queue);
++ kq_submit_packet(pm->priv_queue);
+ else
+- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
++ kq_rollback_packet(pm->priv_queue);
+
+ out:
+ mutex_unlock(&pm->lock);
+@@ -473,7 +473,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
+
+ size = pm->pmf->query_status_size;
+ mutex_lock(&pm->lock);
+- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
++ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+@@ -481,7 +481,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
+ goto out;
+ }
+ memset(buffer, 0x55, size);
+- pm->priv_queue->ops.submit_packet(pm->priv_queue);
++ kq_submit_packet(pm->priv_queue);
+
+ pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+ buffer[0], buffer[1], buffer[2], buffer[3],
+--
+2.17.1
+