aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch200
1 files changed, 200 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch
new file mode 100644
index 00000000..faa9348c
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch
@@ -0,0 +1,200 @@
+From f038f14a4d4a6d2c719de9600ba9371f19a51412 Mon Sep 17 00:00:00 2001
+From: Jay Cornwall <Jay.Cornwall@amd.com>
+Date: Thu, 6 Oct 2016 19:48:22 -0500
+Subject: [PATCH 1532/4131] drm/amdgpu: Synchronize KFD HQD load protocol with
+ CP scheduler
+
+The non-CP scheduling path should write to the entire HQD without
+assuming that some registers do not need to be initialized. These
+assumptions are otherwise challenging to verify.
+
+On the Gfx8 path this fixes AQL support due to a missing write to
+CP_HQD_IQ_RPTR. Additionally, implement a workaround for an errata
+concerning the EOP queue on Tonga ASICs to fix hangs when submitting
+commands to KFD queues.
+
+Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
+
+ Conflicts:
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+
+Change-Id: Ia3dae4001fde5d8d093ad460ebbd31899a7329c8
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 54 ++++--------------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 68 +++++++++--------------
+ 2 files changed, 37 insertions(+), 85 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+index 62270ca..3b812a8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+@@ -382,26 +382,18 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ {
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct cik_mqd *m;
++ uint32_t *mqd_hqd;
++ uint32_t reg;
+
+ m = get_mqd(mqd);
+
+-
+ acquire_queue(kgd, pipe_id, queue_id);
+
+- WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
+- WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
+- WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
+-
+- WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
+- WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
+- WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
+- WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
++ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
++ mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
+- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, m->cp_hqd_pq_rptr_report_addr_hi);
+- WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
+-
+- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
++ for (reg = mmCP_HQD_VMID; reg <= mmCP_MQD_CONTROL; reg++)
++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+@@ -419,34 +411,12 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ (uint32_t)((uint64_t)wptr >> 32));
+ WREG32(mmCP_PQ_WPTR_POLL_CNTL1,
+ get_queue_mask(pipe_id, queue_id));
+- } else
+- WREG32(mmCP_HQD_PQ_WPTR, 0);
+-
+-
+- WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
+- WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
+- WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
+-
+- WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
+-
+- WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
+- WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
+- WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
+-
+- WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
+- WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
+- WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
+- WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
+-
+- WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
+-
+- WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
+- WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
+-
+- WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
+-
+- WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+- release_queue(kgd);
++ }
++
++ /* Write CP_HQD_ACTIVE last. */
++ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++)
++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
++ release_queue(kgd);
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+index 4db9637..aed08fe 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+@@ -376,24 +376,32 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ {
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct vi_mqd *m;
++ uint32_t *mqd_hqd;
++ uint32_t reg;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+- WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
+- WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
+- WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
++ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
++ mqd_hqd = &m->cp_mqd_base_addr_lo;
++
++ for (reg = mmCP_HQD_VMID; reg <= mmCP_HQD_EOP_CONTROL; reg++)
++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+- WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
+- WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
+- WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
+- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
+- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+- m->cp_hqd_pq_rptr_report_addr_hi);
+- WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
+- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
+- WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
++ /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
++ * This is safe since EOP RPTR==WPTR for any inactive HQD
++ * on ASICs that do not support context-save.
++ * EOP writes/reads can start anywhere in the ring.
++ */
++ if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
++ WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
++ WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
++ WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
++ }
++
++ for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+@@ -411,37 +419,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ (uint32_t)((uint64_t)wptr >> 32));
+ WREG32(mmCP_PQ_WPTR_POLL_CNTL1,
+ get_queue_mask(pipe_id, queue_id));
+- } else
+- WREG32(mmCP_HQD_PQ_WPTR, 0);
+-
+- WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
+- WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
+- WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
+- WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
+-
+- WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
+- WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
+- WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
+- WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+- WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+- WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
+-
+- WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
+- WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
+- WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
+- WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
+- WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
+- WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
+- WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
+-
+- WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
+-
+- WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
+- WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
+- WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+- WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
+-
+- WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
++ }
++
++ /* Write CP_HQD_ACTIVE last. */
++ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++)
++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ release_queue(kgd);
+
+--
+2.7.4
+