diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch new file mode 100644 index 00000000..934b7aae --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1551-drm-amdgpu-Load-write-pointer-manually-during-KFD-MQ.patch @@ -0,0 +1,496 @@ +From fd2a595582dd099adc3ac881ce931643ef9935d5 Mon Sep 17 00:00:00 2001 +From: Jay Cornwall <Jay.Cornwall@amd.com> +Date: Mon, 24 Oct 2016 17:12:50 -0500 +Subject: [PATCH 1551/4131] drm/amdgpu: Load write pointer manually during KFD + MQD setup + +CP_HQD_PQ_WPTR_POLL* fails intermittently on Tonga due to an RTL bug +in CP clock gating logic. + +Load the write pointer inside the driver after the doorbell logic +has been enabled. Apply the same shift/mask used by CP microcode to +populate CP_HQD_PQ_WPTR manually. Unify this code path with MQD setup +for SDMA queues. + +Change-Id: I3f4b8a3b778d00310c85c17f5af740cb3aa2844b +Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 23 +++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++ + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 60 +++++----------------- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 59 +++++---------------- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 21 +++----- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 19 ++++--- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 17 ++++-- + drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 7 +-- + 10 files changed, 94 insertions(+), 121 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +index 03432c7..f425878 100755 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -26,6 +26,7 @@ + #include "amdgpu.h" + #include "amdgpu_gfx.h" + #include <linux/module.h> ++#include <linux/mmu_context.h> + + #define AMDKFD_SKIP_UNCOMPILED_CODE 1 + +@@ -535,3 +536,25 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + dma_buf_put(dma_buf); + return r; + } ++ ++bool read_user_wptr(struct mm_struct *mm, uint32_t __user *wptr, ++ uint32_t *wptr_val) ++{ ++ bool wptr_valid = false; ++ ++ if (mm && wptr) { ++ if (mm == current->mm) { ++ /* Running in the correct user process context */ ++ wptr_valid = !get_user(*wptr_val, wptr); ++ } else if (current->mm == NULL) { ++ /* A kernel thread can temporarily use a user ++ * process context for AIO ++ */ ++ use_mm(mm); ++ wptr_valid = !get_user(*wptr_val, wptr); ++ unuse_mm(mm); ++ } ++ } ++ ++ return wptr_valid; ++} +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index 4e67b1e..dcf2c5a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -153,6 +153,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags); + ++bool read_user_wptr(struct mm_struct *mm, uint32_t __user *wptr, ++ uint32_t *wptr_val); ++ + /* GPUVM API */ + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index 07a843f..7ab108e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -23,7 +23,6 @@ + #include <linux/fdtable.h> + #include <linux/uaccess.h> + #include <linux/firmware.h> +-#include <linux/mmu_context.h> + #include <drm/drmP.h> + #include "amdgpu.h" + #include "amdgpu_amdkfd.h" +@@ -105,7 +104,9 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, u + static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr); ++ uint32_t queue_id, uint32_t __user *wptr, ++ uint32_t wptr_shift, uint32_t wptr_mask, ++ struct mm_struct *mm); + static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +@@ -274,16 +275,6 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + lock_srbm(kgd, mec, pipe, queue_id, 0); + } + +-static uint32_t get_queue_mask(uint32_t pipe_id, uint32_t queue_id) +-{ +- /* assumes that pipe0 is used by graphics and that the correct +- * MEC is selected by acquire_queue already +- */ +- unsigned bit = ((pipe_id+1) * CIK_QUEUES_PER_PIPE_MEC + queue_id) & 31; +- +- return ((uint32_t)1) << bit; +-} +- + static void release_queue(struct kgd_dev *kgd) + { + unlock_srbm(kgd); +@@ -380,12 +371,15 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) + } + + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr) ++ uint32_t queue_id, uint32_t __user *wptr, ++ uint32_t wptr_shift, uint32_t wptr_mask, ++ struct mm_struct *mm) ++ + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_mqd *m; + uint32_t *mqd_hqd; +- uint32_t reg; ++ uint32_t reg, wptr_val; + + m = get_mqd(mqd); + +@@ -397,23 +391,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + for (reg = mmCP_HQD_VMID; reg <= mmCP_MQD_CONTROL; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + +- if (wptr) { +- /* Don't read wptr with get_user because the user +- * context may not be accessible (if this function +- * runs in a work queue). Instead trigger a one-shot +- * polling read from memory in the CP. This assumes +- * that wptr is GPU-accessible in the queue's VMID via +- * ATC or SVM. WPTR==RPTR before starting the poll so +- * the CP starts fetching new commands from the right +- * place. +- */ +- WREG32(mmCP_HQD_PQ_WPTR, m->cp_hqd_pq_rptr); +- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, (uint32_t)(uint64_t)wptr); +- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, +- (uint32_t)((uint64_t)wptr >> 32)); +- WREG32(mmCP_PQ_WPTR_POLL_CNTL1, +- get_queue_mask(pipe_id, queue_id)); +- } ++ /* Copy userspace write pointer value to register. ++ * Doorbell logic is active and will monitor subsequent changes. ++ */ ++ if (read_user_wptr(mm, wptr, &wptr_val)) ++ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + /* Write CP_HQD_ACTIVE last. */ + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++) +@@ -499,20 +481,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); + +- if (mm) { +- if (mm == current->mm) { +- /* Running in the correct user process context */ +- wptr_valid = !get_user(data, wptr); +- } else if (current->mm == NULL) { +- /* A kernel thread can temporarily use a user +- * process context for AIO +- */ +- use_mm(mm); +- wptr_valid = !get_user(data, wptr); +- unuse_mm(mm); +- } +- +- if (wptr_valid) ++ if (read_user_wptr(mm, wptr, &data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, +@@ -783,7 +752,6 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data; +- bool wptr_valid = false; + + mutex_lock(&adev->grbm_idx_mutex); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index aed08fe..ba316e8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -24,7 +24,6 @@ + #include <linux/fdtable.h> + #include <linux/uaccess.h> + #include <linux/firmware.h> +-#include <linux/mmu_context.h> + #include <drm/drmP.h> + #include "amdgpu.h" + #include "amdgpu_amdkfd.h" +@@ -82,7 +81,9 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr); ++ uint32_t queue_id, uint32_t __user *wptr, ++ uint32_t wptr_shift, uint32_t wptr_mask, ++ struct mm_struct *mm); + static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +@@ -265,16 +266,6 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + lock_srbm(kgd, mec, pipe, queue_id, 0); + } + +-static uint32_t get_queue_mask(uint32_t pipe_id, uint32_t queue_id) +-{ +- /* assumes that pipe0 is used by graphics and that the correct +- * MEC is selected by acquire_queue already +- */ +- unsigned bit = ((pipe_id+1) * VI_QUEUES_PER_PIPE_MEC + queue_id) & 31; +- +- return ((uint32_t)1) << bit; +-} +- + static void release_queue(struct kgd_dev *kgd) + { + unlock_srbm(kgd); +@@ -372,12 +363,14 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) + } + + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr) ++ uint32_t queue_id, uint32_t __user *wptr, ++ uint32_t wptr_shift, uint32_t wptr_mask, ++ struct mm_struct *mm) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct vi_mqd *m; + uint32_t *mqd_hqd; +- uint32_t reg; ++ uint32_t reg, wptr_val; + + m = get_mqd(mqd); + +@@ -403,23 +396,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + +- if (wptr) { +- /* Don't read wptr with get_user because the user +- * context may not be accessible (if this function +- * runs in a work queue). Instead trigger a one-shot +- * polling read from memory in the CP. This assumes +- * that wptr is GPU-accessible in the queue's VMID via +- * ATC or SVM. WPTR==RPTR before starting the poll so +- * the CP starts fetching new commands from the right +- * place. +- */ +- WREG32(mmCP_HQD_PQ_WPTR, m->cp_hqd_pq_rptr); +- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, (uint32_t)(uint64_t)wptr); +- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, +- (uint32_t)((uint64_t)wptr >> 32)); +- WREG32(mmCP_PQ_WPTR_POLL_CNTL1, +- get_queue_mask(pipe_id, queue_id)); +- } ++ /* Copy userspace write pointer value to register. ++ * Doorbell logic is active and will monitor subsequent changes. ++ */ ++ if (read_user_wptr(mm, wptr, &wptr_val)) ++ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + /* Write CP_HQD_ACTIVE last. */ + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++) +@@ -474,7 +455,6 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t sdma_base_addr; + uint32_t temp, timeout = 2000; + uint32_t data; +- bool wptr_valid = false; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); +@@ -505,20 +485,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, m->sdmax_rlcx_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + +- if (mm) { +- if (mm == current->mm) { +- /* Running in the correct user process context */ +- wptr_valid = !get_user(data, wptr); +- } else if (current->mm == NULL) { +- /* A kernel thread can temporarily use a user +- * process context for AIO +- */ +- use_mm(mm); +- wptr_valid = !get_user(data, wptr); +- unuse_mm(mm); +- } +- } +- if (wptr_valid) ++ if (read_user_wptr(mm, wptr, &data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 0cf4a62..ffeea82 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -328,9 +328,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + if (!q->properties.is_active) + return 0; + +- retval = mqd->load_mqd(mqd, q->mqd, q->pipe, +- q->queue, (uint32_t __user *) q->properties.write_ptr, +- q->process->mm); ++ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, ++ q->process->mm); + if (retval != 0) { + deallocate_hqd(dqm, q); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +@@ -447,10 +446,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + if (q->properties.is_active) +- retval = mqd->load_mqd(mqd, q->mqd, q->pipe, +- q->queue, +- (uint32_t __user *)q->properties.write_ptr, +- q->process->mm); ++ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, ++ &q->properties, q->process->mm); + else if (prev_active) + retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, +@@ -590,11 +587,9 @@ int process_restore_queues(struct device_queue_manager *dqm, + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) +- retval = mqd->load_mqd( +- mqd, q->mqd, q->pipe, q->queue, +- (uint32_t __user *) +- q->properties.write_ptr, +- q->process->mm); ++ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, ++ q->queue, &q->properties, ++ q->process->mm); + dqm->queue_count++; + } + } +@@ -826,7 +821,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + return retval; + } + +- retval = mqd->load_mqd(mqd, q->mqd, 0, 0, NULL, NULL); ++ retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); + if (retval != 0) { + deallocate_sdma_queue(dqm, q->sdma_id); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +index f19f2b3..126d848 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +@@ -144,7 +144,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + kq->queue->pipe = KFD_CIK_HIQ_PIPE; + kq->queue->queue = KFD_CIK_HIQ_QUEUE; + kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, +- kq->queue->queue, NULL, NULL); ++ kq->queue->queue, &kq->queue->properties, ++ NULL); + } else { + /* allocate fence for DIQ */ + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +index 4be3267..8972bcf 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +@@ -67,7 +67,8 @@ struct mqd_manager { + + int (*load_mqd)(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, +- uint32_t __user *wptr, struct mm_struct *mms); ++ struct queue_properties *p, ++ struct mm_struct *mms); + + int (*update_mqd)(struct mqd_manager *mm, void *mqd, + struct queue_properties *q); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +index 7528265..11e85d3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +@@ -207,18 +207,25 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + } + + static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, ++ uint32_t queue_id, struct queue_properties *p, + struct mm_struct *mms) + { +- return mm->dev->kfd2kgd->hqd_load +- (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); ++ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ ++ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); ++ uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); ++ ++ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, ++ (uint32_t __user *)p->write_ptr, ++ wptr_shift, wptr_mask, mms); + } + + static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, +- uint32_t pipe_id, uint32_t queue_id, +- uint32_t __user *wptr, struct mm_struct *mms) ++ uint32_t pipe_id, uint32_t queue_id, ++ struct queue_properties *p, struct mm_struct *mms) + { +- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, wptr, mms); ++ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, ++ (uint32_t __user *)p->write_ptr, ++ mms); + } + + static int __update_mqd(struct mqd_manager *mm, void *mqd, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +index b0ea0d2..0050821 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +@@ -173,10 +173,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + + static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, +- uint32_t __user *wptr, struct mm_struct *mms) ++ struct queue_properties *p, struct mm_struct *mms) + { +- return mm->dev->kfd2kgd->hqd_load +- (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); ++ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ ++ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); ++ uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); ++ ++ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, ++ (uint32_t __user *)p->write_ptr, ++ wptr_shift, wptr_mask, mms); + } + + static int __update_mqd(struct mqd_manager *mm, void *mqd, +@@ -371,9 +376,11 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + + static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, +- uint32_t __user *wptr, struct mm_struct *mms) ++ struct queue_properties *p, struct mm_struct *mms) + { +- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, wptr, mms); ++ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, ++ (uint32_t __user *)p->write_ptr, ++ mms); + } + + static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, +diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +index 1971537..910dca1 100644 +--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h ++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +@@ -183,8 +183,7 @@ struct tile_config { + * sceduling mode. + * + * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. +- * used only for no HWS mode. If mm is passed in, its mmap_sem must be +- * read-locked. ++ * used only for no HWS mode. + * + * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs. + * Array is allocated with kmalloc, needs to be freed with kfree by caller. +@@ -268,7 +267,9 @@ struct kfd2kgd_calls { + + + int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr); ++ uint32_t queue_id, uint32_t __user *wptr, ++ uint32_t wptr_shift, uint32_t wptr_mask, ++ struct mm_struct *mm); + + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +-- +2.7.4 + |