diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch | 417 |
1 files changed, 417 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch new file mode 100644 index 00000000..721de83b --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4606-drm-amdkfd-Eliminate-unnecessary-kernel-queue-functi.patch @@ -0,0 +1,417 @@ +From 858f55ddeac45eaca40516c74fd835777960e429 Mon Sep 17 00:00:00 2001 +From: Yong Zhao <Yong.Zhao@amd.com> +Date: Fri, 8 Nov 2019 00:30:49 -0500 +Subject: [PATCH 4606/4736] drm/amdkfd: Eliminate unnecessary kernel queue + function pointers v2 + +Up to this point, those functions are all the same for all ASICs, so +no need to call them by functions pointers. Removing the function +pointers will greatly increase the code readablity. If there is ever +need for those function pointers, we can add it back then. + +v2: Adapt for amd-kfd-staging branch, which has acquire_inline_ib() +exclusively. + +Change-Id: I9515fdece70110067cda66e2d24d6768b4846c2f +Signed-off-by: Yong Zhao <Yong.Zhao@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 14 +++--- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 35 +++++++-------- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 44 ++++++------------- + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 34 +++++++------- + 4 files changed, 54 insertions(+), 73 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +index 142ac7954032..3e5904f8876a 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +@@ -74,11 +74,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + * The receive packet buff will be sitting on the Indirect Buffer + * and in the PQ we put the IB packet + sync packet(s). + */ +- status = kq->ops.acquire_packet_buffer(kq, ++ status = kq_acquire_packet_buffer(kq, + pq_packets_size_in_bytes / sizeof(uint32_t), + &ib_packet_buff); + if (status) { +- pr_err("acquire_packet_buffer failed\n"); ++ pr_err("kq_acquire_packet_buffer failed\n"); + return status; + } + +@@ -101,7 +101,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + ib_packet->bitfields5.pasid = pasid; + + if (!sync) { +- kq->ops.submit_packet(kq); ++ kq_submit_packet(kq); + return status; + } + +@@ -122,7 +122,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + + if (status) { + pr_err("Failed to allocate GART memory\n"); +- kq->ops.rollback_packet(kq); ++ kq_rollback_packet(kq); + return status; + } + +@@ -158,7 +158,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + + rm_packet->data_lo = QUEUESTATE__ACTIVE; + +- kq->ops.submit_packet(kq); ++ kq_submit_packet(kq); + + /* Wait till CP writes sync code: */ + status = amdkfd_fence_wait_timeout( +@@ -372,7 +372,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + return -EINVAL; + } + +- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, ++ status = kq_acquire_inline_ib(dbgdev->kq, + ib_size/sizeof(uint32_t), + &packet_buff_uint, &packet_buff_gpu_addr); + if (status) { +@@ -652,7 +652,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + +- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, ++ status = kq_acquire_inline_ib(dbgdev->kq, + ib_size / sizeof(uint32_t), + &packet_buff_uint, &packet_buff_gpu_addr); + if (status) { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +index ca7e8d299c8b..236023ce1125 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +@@ -34,7 +34,10 @@ + + #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16) + +-static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, ++/* Initialize a kernel queue, including allocations of GART memory ++ * needed for the queue. ++ */ ++static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) + { + struct queue_properties prop; +@@ -88,7 +91,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + kq->pq_gpu_addr = kq->pq->gpu_addr; + + /* For CIK family asics, kq->eop_mem is not needed */ +- if (dev->device_info->asic_family > CHIP_HAWAII) { ++ if (dev->device_info->asic_family > CHIP_MULLINS) { + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval != 0) + goto err_eop_allocate_vidmem; +@@ -192,7 +195,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + + } + +-static void uninitialize(struct kernel_queue *kq) ++/* Uninitialize a kernel queue and free all its memory usages. */ ++static void kq_uninitialize(struct kernel_queue *kq) + { + if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) + kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, +@@ -221,7 +225,7 @@ static void uninitialize(struct kernel_queue *kq) + uninit_queue(kq->queue); + } + +-static int acquire_packet_buffer(struct kernel_queue *kq, ++int kq_acquire_packet_buffer(struct kernel_queue *kq, + size_t packet_size_in_dwords, unsigned int **buffer_ptr) + { + size_t available_size; +@@ -282,7 +286,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, + return -ENOMEM; + } + +-static int acquire_inline_ib(struct kernel_queue *kq, ++int kq_acquire_inline_ib(struct kernel_queue *kq, + size_t size_in_dwords, + unsigned int **buffer_ptr, + uint64_t *gpu_addr) +@@ -297,7 +301,7 @@ static int acquire_inline_ib(struct kernel_queue *kq, + /* Allocate size_in_dwords on the ring, plus an extra dword + * for a NOP packet header + */ +- ret = acquire_packet_buffer(kq, size_in_dwords + 1, &buf); ++ ret = kq_acquire_packet_buffer(kq, size_in_dwords + 1, &buf); + if (ret) + return ret; + +@@ -315,7 +319,7 @@ static int acquire_inline_ib(struct kernel_queue *kq, + return 0; + } + +-static void submit_packet(struct kernel_queue *kq) ++void kq_submit_packet(struct kernel_queue *kq) + { + #ifdef DEBUG + int i; +@@ -338,7 +342,7 @@ static void submit_packet(struct kernel_queue *kq) + } + } + +-static void rollback_packet(struct kernel_queue *kq) ++void kq_rollback_packet(struct kernel_queue *kq) + { + if (kq->dev->device_info->doorbell_size == 8) { + kq->pending_wptr64 = *kq->wptr64_kernel; +@@ -358,14 +362,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + if (!kq) + return NULL; + +- kq->ops.initialize = initialize; +- kq->ops.uninitialize = uninitialize; +- kq->ops.acquire_packet_buffer = acquire_packet_buffer; +- kq->ops.acquire_inline_ib = acquire_inline_ib; +- kq->ops.submit_packet = submit_packet; +- kq->ops.rollback_packet = rollback_packet; +- +- if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) ++ if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) + return kq; + + pr_err("Failed to init kernel queue\n"); +@@ -376,7 +373,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + + void kernel_queue_uninit(struct kernel_queue *kq) + { +- kq->ops.uninitialize(kq); ++ kq_uninitialize(kq); + kfree(kq); + } + +@@ -396,7 +393,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) + return; + } + +- retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); ++ retval = kq_acquire_packet_buffer(kq, 5, &buffer); + if (unlikely(retval != 0)) { + pr_err(" Failed to acquire packet buffer\n"); + pr_err("Kernel queue test failed\n"); +@@ -404,7 +401,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) + } + for (i = 0; i < 5; i++) + buffer[i] = kq->nop_packet; +- kq->ops.submit_packet(kq); ++ kq_submit_packet(kq); + + pr_err("Ending kernel queue test\n"); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +index 852de7466cc4..3e39dcb542df 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +@@ -29,54 +29,38 @@ + #include "kfd_priv.h" + + /** +- * struct kernel_queue_ops +- * +- * @initialize: Initialize a kernel queue, including allocations of GART memory +- * needed for the queue. +- * +- * @uninitialize: Uninitialize a kernel queue and free all its memory usages. +- * +- * @acquire_packet_buffer: Returns a pointer to the location in the kernel ++ * kq_acquire_packet_buffer: Returns a pointer to the location in the kernel + * queue ring buffer where the calling function can write its packet. It is + * Guaranteed that there is enough space for that packet. It also updates the + * pending write pointer to that location so subsequent calls to + * acquire_packet_buffer will get a correct write pointer + * +- * @acquire_inline_ib: Returns a pointer to the location in the kernel ++ * kq_acquire_inline_ib: Returns a pointer to the location in the kernel + * queue ring buffer where the calling function can write an inline IB. It is + * Guaranteed that there is enough space for that IB. It also updates the + * pending write pointer to that location so subsequent calls to + * acquire_packet_buffer will get a correct write pointer + * +- * @submit_packet: Update the write pointer and doorbell of a kernel queue. +- * +- * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel +- * queue are equal, which means the CP has read all the submitted packets. ++ * kq_submit_packet: Update the write pointer and doorbell of a kernel queue. + * +- * @rollback_packet: This routine is called if we failed to build an acquired ++ * kq_rollback_packet: This routine is called if we failed to build an acquired + * packet for some reason. It just overwrites the pending wptr with the current + * one + * + */ +-struct kernel_queue_ops { +- bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev, +- enum kfd_queue_type type, unsigned int queue_size); +- void (*uninitialize)(struct kernel_queue *kq); +- int (*acquire_packet_buffer)(struct kernel_queue *kq, +- size_t packet_size_in_dwords, +- unsigned int **buffer_ptr); +- int (*acquire_inline_ib)(struct kernel_queue *kq, +- size_t packet_size_in_dwords, +- unsigned int **buffer_ptr, +- uint64_t *gpu_addr); + +- void (*submit_packet)(struct kernel_queue *kq); +- void (*rollback_packet)(struct kernel_queue *kq); +-}; ++int kq_acquire_packet_buffer(struct kernel_queue *kq, ++ size_t packet_size_in_dwords, ++ unsigned int **buffer_ptr); ++int kq_acquire_inline_ib(struct kernel_queue *kq, ++ size_t size_in_dwords, ++ unsigned int **buffer_ptr, ++ uint64_t *gpu_addr); ++void kq_submit_packet(struct kernel_queue *kq); ++void kq_rollback_packet(struct kernel_queue *kq); + +-struct kernel_queue { +- struct kernel_queue_ops ops; + ++struct kernel_queue { + /* data */ + struct kfd_dev *dev; + struct mqd_manager *mqd_mgr; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +index cbf83ed96dad..6ef4dc60852d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +@@ -280,7 +280,7 @@ int pm_send_set_resources(struct packet_manager *pm, + + size = pm->pmf->set_resources_size; + mutex_lock(&pm->lock); +- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, ++ kq_acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), + (unsigned int **)&buffer); + if (!buffer) { +@@ -291,9 +291,9 @@ int pm_send_set_resources(struct packet_manager *pm, + + retval = pm->pmf->set_resources(pm, buffer, res); + if (!retval) +- pm->priv_queue->ops.submit_packet(pm->priv_queue); ++ kq_submit_packet(pm->priv_queue); + else +- pm->priv_queue->ops.rollback_packet(pm->priv_queue); ++ kq_rollback_packet(pm->priv_queue); + + out: + mutex_unlock(&pm->lock); +@@ -318,7 +318,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); + mutex_lock(&pm->lock); + +- retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, ++ retval = kq_acquire_packet_buffer(pm->priv_queue, + packet_size_dwords, &rl_buffer); + if (retval) + goto fail_acquire_packet_buffer; +@@ -328,14 +328,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + if (retval) + goto fail_create_runlist; + +- pm->priv_queue->ops.submit_packet(pm->priv_queue); ++ kq_submit_packet(pm->priv_queue); + + mutex_unlock(&pm->lock); + + return retval; + + fail_create_runlist: +- pm->priv_queue->ops.rollback_packet(pm->priv_queue); ++ kq_rollback_packet(pm->priv_queue); + fail_acquire_packet_buffer: + mutex_unlock(&pm->lock); + fail_create_runlist_ib: +@@ -354,7 +354,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, + + size = pm->pmf->query_status_size; + mutex_lock(&pm->lock); +- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, ++ kq_acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); +@@ -364,9 +364,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, + + retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); + if (!retval) +- pm->priv_queue->ops.submit_packet(pm->priv_queue); ++ kq_submit_packet(pm->priv_queue); + else +- pm->priv_queue->ops.rollback_packet(pm->priv_queue); ++ kq_rollback_packet(pm->priv_queue); + + out: + mutex_unlock(&pm->lock); +@@ -383,7 +383,7 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) + mutex_lock(&pm->lock); + + if (size) { +- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, ++ kq_acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), + (unsigned int **)&buffer); + +@@ -395,9 +395,9 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) + + retval = pm->pmf->set_grace_period(pm, buffer, grace_period); + if (!retval) +- pm->priv_queue->ops.submit_packet(pm->priv_queue); ++ kq_submit_packet(pm->priv_queue); + else +- pm->priv_queue->ops.rollback_packet(pm->priv_queue); ++ kq_rollback_packet(pm->priv_queue); + } + + out: +@@ -415,7 +415,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + + size = pm->pmf->unmap_queues_size; + mutex_lock(&pm->lock); +- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, ++ kq_acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); +@@ -426,9 +426,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param, + reset, sdma_engine); + if (!retval) +- pm->priv_queue->ops.submit_packet(pm->priv_queue); ++ kq_submit_packet(pm->priv_queue); + else +- pm->priv_queue->ops.rollback_packet(pm->priv_queue); ++ kq_rollback_packet(pm->priv_queue); + + out: + mutex_unlock(&pm->lock); +@@ -473,7 +473,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm) + + size = pm->pmf->query_status_size; + mutex_lock(&pm->lock); +- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, ++ kq_acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); +@@ -481,7 +481,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm) + goto out; + } + memset(buffer, 0x55, size); +- pm->priv_queue->ops.submit_packet(pm->priv_queue); ++ kq_submit_packet(pm->priv_queue); + + pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", + buffer[0], buffer[1], buffer[2], buffer[3], +-- +2.17.1 + |