diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch | 496 |
1 files changed, 0 insertions, 496 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch deleted file mode 100644 index 934b7aae..00000000 --- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch +++ /dev/null @@ -1,496 +0,0 @@ -From fd2a595582dd099adc3ac881ce931643ef9935d5 Mon Sep 17 00:00:00 2001 -From: Jay Cornwall <Jay.Cornwall@amd.com> -Date: Mon, 24 Oct 2016 17:12:50 -0500 -Subject: [PATCH 1551/4131] drm/amdgpu: Load write pointer manually during KFD - MQD setup - -CP_HQD_PQ_WPTR_POLL* fails intermittently on Tonga due to an RTL bug -in CP clock gating logic. - -Load the write pointer inside the driver after the doorbell logic -has been enabled. Apply the same shift/mask used by CP microcode to -populate CP_HQD_PQ_WPTR manually. Unify this code path with MQD setup -for SDMA queues. - -Change-Id: I3f4b8a3b778d00310c85c17f5af740cb3aa2844b -Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 23 +++++++++ - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++ - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 60 +++++----------------- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 59 +++++---------------- - .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 21 +++----- - drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +- - drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 3 +- - drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 19 ++++--- - drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 17 ++++-- - drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 7 +-- - 10 files changed, 94 insertions(+), 121 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -index 03432c7..f425878 100755 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -@@ -26,6 +26,7 @@ - #include "amdgpu.h" - #include "amdgpu_gfx.h" - #include <linux/module.h> -+#include <linux/mmu_context.h> - - #define AMDKFD_SKIP_UNCOMPILED_CODE 1 - -@@ -535,3 +536,25 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, - dma_buf_put(dma_buf); - return r; - } -+ -+bool read_user_wptr(struct mm_struct *mm, uint32_t __user *wptr, -+ uint32_t *wptr_val) -+{ -+ bool wptr_valid = false; -+ -+ if (mm && wptr) { -+ if (mm == current->mm) { -+ /* Running in the correct user process context */ -+ wptr_valid = !get_user(*wptr_val, wptr); -+ } else if (current->mm == NULL) { -+ /* A kernel thread can temporarily use a user -+ * process context for AIO -+ */ -+ use_mm(mm); -+ wptr_valid = !get_user(*wptr_val, wptr); -+ unuse_mm(mm); -+ } -+ } -+ -+ return wptr_valid; -+} -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -index 4e67b1e..dcf2c5a 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -@@ -153,6 +153,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, - size_t buffer_size, uint32_t *metadata_size, - uint32_t *flags); - -+bool read_user_wptr(struct mm_struct *mm, uint32_t __user *wptr, -+ uint32_t *wptr_val); -+ - /* GPUVM API */ - int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - struct kgd_dev *kgd, uint64_t va, uint64_t size, -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c -index 07a843f..7ab108e 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c -@@ -23,7 +23,6 @@ - #include <linux/fdtable.h> - #include <linux/uaccess.h> - #include <linux/firmware.h> --#include <linux/mmu_context.h> - #include <drm/drmP.h> - #include "amdgpu.h" - #include "amdgpu_amdkfd.h" -@@ -105,7 +104,9 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, u - static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); - static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, -- uint32_t queue_id, uint32_t __user *wptr); -+ uint32_t queue_id, uint32_t __user *wptr, -+ uint32_t wptr_shift, uint32_t wptr_mask, -+ struct mm_struct *mm); - static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -@@ -274,16 +275,6 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, - lock_srbm(kgd, mec, pipe, queue_id, 0); - } - --static uint32_t get_queue_mask(uint32_t pipe_id, uint32_t queue_id) --{ -- /* assumes that pipe0 is used by graphics and that the correct -- * MEC is selected by acquire_queue already -- */ -- unsigned bit = ((pipe_id+1) * CIK_QUEUES_PER_PIPE_MEC + queue_id) & 31; -- -- return ((uint32_t)1) << bit; --} -- - static void release_queue(struct kgd_dev *kgd) - { - unlock_srbm(kgd); -@@ -380,12 +371,15 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) - } - - static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, -- uint32_t queue_id, uint32_t __user *wptr) -+ uint32_t queue_id, uint32_t __user *wptr, -+ uint32_t wptr_shift, uint32_t wptr_mask, -+ struct mm_struct *mm) -+ - { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct cik_mqd *m; - uint32_t *mqd_hqd; -- uint32_t reg; -+ uint32_t reg, wptr_val; - - m = get_mqd(mqd); - -@@ -397,23 +391,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - for (reg = mmCP_HQD_VMID; reg <= mmCP_MQD_CONTROL; reg++) - WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); - -- if (wptr) { -- /* Don't read wptr with get_user because the user -- * context may not be accessible (if this function -- * runs in a work queue). Instead trigger a one-shot -- * polling read from memory in the CP. This assumes -- * that wptr is GPU-accessible in the queue's VMID via -- * ATC or SVM. WPTR==RPTR before starting the poll so -- * the CP starts fetching new commands from the right -- * place. -- */ -- WREG32(mmCP_HQD_PQ_WPTR, m->cp_hqd_pq_rptr); -- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, (uint32_t)(uint64_t)wptr); -- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, -- (uint32_t)((uint64_t)wptr >> 32)); -- WREG32(mmCP_PQ_WPTR_POLL_CNTL1, -- get_queue_mask(pipe_id, queue_id)); -- } -+ /* Copy userspace write pointer value to register. -+ * Doorbell logic is active and will monitor subsequent changes. -+ */ -+ if (read_user_wptr(mm, wptr, &wptr_val)) -+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); - - /* Write CP_HQD_ACTIVE last. */ - for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++) -@@ -499,20 +481,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); - -- if (mm) { -- if (mm == current->mm) { -- /* Running in the correct user process context */ -- wptr_valid = !get_user(data, wptr); -- } else if (current->mm == NULL) { -- /* A kernel thread can temporarily use a user -- * process context for AIO -- */ -- use_mm(mm); -- wptr_valid = !get_user(data, wptr); -- unuse_mm(mm); -- } -- -- if (wptr_valid) -+ if (read_user_wptr(mm, wptr, &data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); - else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, -@@ -783,7 +752,6 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, - { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t data; -- bool wptr_valid = false; - - mutex_lock(&adev->grbm_idx_mutex); - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c -index aed08fe..ba316e8 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c -@@ -24,7 +24,6 @@ - #include <linux/fdtable.h> - #include <linux/uaccess.h> - #include <linux/firmware.h> --#include <linux/mmu_context.h> - #include <drm/drmP.h> - #include "amdgpu.h" - #include "amdgpu_amdkfd.h" -@@ -82,7 +81,9 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); - static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, -- uint32_t queue_id, uint32_t __user *wptr); -+ uint32_t queue_id, uint32_t __user *wptr, -+ uint32_t wptr_shift, uint32_t wptr_mask, -+ struct mm_struct *mm); - static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -@@ -265,16 +266,6 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, - lock_srbm(kgd, mec, pipe, queue_id, 0); - } - --static uint32_t get_queue_mask(uint32_t pipe_id, uint32_t queue_id) --{ -- /* assumes that pipe0 is used by graphics and that the correct -- * MEC is selected by acquire_queue already -- */ -- unsigned bit = ((pipe_id+1) * VI_QUEUES_PER_PIPE_MEC + queue_id) & 31; -- -- return ((uint32_t)1) << bit; --} -- - static void release_queue(struct kgd_dev *kgd) - { - unlock_srbm(kgd); -@@ -372,12 +363,14 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) - } - - static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, -- uint32_t queue_id, uint32_t __user *wptr) -+ uint32_t queue_id, uint32_t __user *wptr, -+ uint32_t wptr_shift, uint32_t wptr_mask, -+ struct mm_struct *mm) - { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct vi_mqd *m; - uint32_t *mqd_hqd; -- uint32_t reg; -+ uint32_t reg, wptr_val; - - m = get_mqd(mqd); - -@@ -403,23 +396,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) - WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); - -- if (wptr) { -- /* Don't read wptr with get_user because the user -- * context may not be accessible (if this function -- * runs in a work queue). Instead trigger a one-shot -- * polling read from memory in the CP. This assumes -- * that wptr is GPU-accessible in the queue's VMID via -- * ATC or SVM. WPTR==RPTR before starting the poll so -- * the CP starts fetching new commands from the right -- * place. -- */ -- WREG32(mmCP_HQD_PQ_WPTR, m->cp_hqd_pq_rptr); -- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, (uint32_t)(uint64_t)wptr); -- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, -- (uint32_t)((uint64_t)wptr >> 32)); -- WREG32(mmCP_PQ_WPTR_POLL_CNTL1, -- get_queue_mask(pipe_id, queue_id)); -- } -+ /* Copy userspace write pointer value to register. -+ * Doorbell logic is active and will monitor subsequent changes. -+ */ -+ if (read_user_wptr(mm, wptr, &wptr_val)) -+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); - - /* Write CP_HQD_ACTIVE last. */ - for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++) -@@ -474,7 +455,6 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t sdma_base_addr; - uint32_t temp, timeout = 2000; - uint32_t data; -- bool wptr_valid = false; - - m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); -@@ -505,20 +485,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, m->sdmax_rlcx_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - -- if (mm) { -- if (mm == current->mm) { -- /* Running in the correct user process context */ -- wptr_valid = !get_user(data, wptr); -- } else if (current->mm == NULL) { -- /* A kernel thread can temporarily use a user -- * process context for AIO -- */ -- use_mm(mm); -- wptr_valid = !get_user(data, wptr); -- unuse_mm(mm); -- } -- } -- if (wptr_valid) -+ if (read_user_wptr(mm, wptr, &data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); - else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -index 0cf4a62..ffeea82 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -@@ -328,9 +328,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, - if (!q->properties.is_active) - return 0; - -- retval = mqd->load_mqd(mqd, q->mqd, q->pipe, -- q->queue, (uint32_t __user *) q->properties.write_ptr, -- q->process->mm); -+ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, -+ q->process->mm); - if (retval != 0) { - deallocate_hqd(dqm, q); - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); -@@ -447,10 +446,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) - (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { - if (q->properties.is_active) -- retval = mqd->load_mqd(mqd, q->mqd, q->pipe, -- q->queue, -- (uint32_t __user *)q->properties.write_ptr, -- q->process->mm); -+ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, -+ &q->properties, q->process->mm); - else if (prev_active) - retval = mqd->destroy_mqd(mqd, q->mqd, - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, -@@ -590,11 +587,9 @@ int process_restore_queues(struct device_queue_manager *dqm, - if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && - (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) -- retval = mqd->load_mqd( -- mqd, q->mqd, q->pipe, q->queue, -- (uint32_t __user *) -- q->properties.write_ptr, -- q->process->mm); -+ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, -+ q->queue, &q->properties, -+ q->process->mm); - dqm->queue_count++; - } - } -@@ -826,7 +821,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, - return retval; - } - -- retval = mqd->load_mqd(mqd, q->mqd, 0, 0, NULL, NULL); -+ retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); - if (retval != 0) { - deallocate_sdma_queue(dqm, q->sdma_id); - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c -index f19f2b3..126d848 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c -@@ -144,7 +144,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, - kq->queue->pipe = KFD_CIK_HIQ_PIPE; - kq->queue->queue = KFD_CIK_HIQ_QUEUE; - kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, -- kq->queue->queue, NULL, NULL); -+ kq->queue->queue, &kq->queue->properties, -+ NULL); - } else { - /* allocate fence for DIQ */ - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h -index 4be3267..8972bcf 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h -@@ -67,7 +67,8 @@ struct mqd_manager { - - int (*load_mqd)(struct mqd_manager *mm, void *mqd, - uint32_t pipe_id, uint32_t queue_id, -- uint32_t __user *wptr, struct mm_struct *mms); -+ struct queue_properties *p, -+ struct mm_struct *mms); - - int (*update_mqd)(struct mqd_manager *mm, void *mqd, - struct queue_properties *q); -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c -index 7528265..11e85d3 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c -@@ -207,18 +207,25 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - } - - static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, -- uint32_t queue_id, uint32_t __user *wptr, -+ uint32_t queue_id, struct queue_properties *p, - struct mm_struct *mms) - { -- return mm->dev->kfd2kgd->hqd_load -- (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); -+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ -+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); -+ uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); -+ -+ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, -+ (uint32_t __user *)p->write_ptr, -+ wptr_shift, wptr_mask, mms); - } - - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, -- uint32_t pipe_id, uint32_t queue_id, -- uint32_t __user *wptr, struct mm_struct *mms) -+ uint32_t pipe_id, uint32_t queue_id, -+ struct queue_properties *p, struct mm_struct *mms) - { -- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, wptr, mms); -+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, -+ (uint32_t __user *)p->write_ptr, -+ mms); - } - - static int __update_mqd(struct mqd_manager *mm, void *mqd, -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c -index b0ea0d2..0050821 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c -@@ -173,10 +173,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, - - static int load_mqd(struct mqd_manager *mm, void *mqd, - uint32_t pipe_id, uint32_t queue_id, -- uint32_t __user *wptr, struct mm_struct *mms) -+ struct queue_properties *p, struct mm_struct *mms) - { -- return mm->dev->kfd2kgd->hqd_load -- (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); -+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ -+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); -+ uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); -+ -+ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, -+ (uint32_t __user *)p->write_ptr, -+ wptr_shift, wptr_mask, mms); - } - - static int __update_mqd(struct mqd_manager *mm, void *mqd, -@@ -371,9 +376,11 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, - uint32_t pipe_id, uint32_t queue_id, -- uint32_t __user *wptr, struct mm_struct *mms) -+ struct queue_properties *p, struct mm_struct *mms) - { -- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, wptr, mms); -+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, -+ (uint32_t __user *)p->write_ptr, -+ mms); - } - - static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, -diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h -index 1971537..910dca1 100644 ---- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h -+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h -@@ -183,8 +183,7 @@ struct tile_config { - * sceduling mode. - * - * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. -- * used only for no HWS mode. If mm is passed in, its mmap_sem must be -- * read-locked. -+ * used only for no HWS mode. - * - * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs. - * Array is allocated with kmalloc, needs to be freed with kfree by caller. -@@ -268,7 +267,9 @@ struct kfd2kgd_calls { - - - int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, -- uint32_t queue_id, uint32_t __user *wptr); -+ uint32_t queue_id, uint32_t __user *wptr, -+ uint32_t wptr_shift, uint32_t wptr_mask, -+ struct mm_struct *mm); - - int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); --- -2.7.4 - |