diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch new file mode 100644 index 00000000..faa9348c --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch @@ -0,0 +1,200 @@ +From f038f14a4d4a6d2c719de9600ba9371f19a51412 Mon Sep 17 00:00:00 2001 +From: Jay Cornwall <Jay.Cornwall@amd.com> +Date: Thu, 6 Oct 2016 19:48:22 -0500 +Subject: [PATCH 1532/4131] drm/amdgpu: Synchronize KFD HQD load protocol with + CP scheduler + +The non-CP scheduling path should write to the entire HQD without +assuming that some registers do not need to be initialized. These +assumptions are otherwise challenging to verify. + +On the Gfx8 path this fixes AQL support due to a missing write to +CP_HQD_IQ_RPTR. Additionally, implement a workaround for an errata +concerning the EOP queue on Tonga ASICs to fix hangs when submitting +commands to KFD queues. + +Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> + + Conflicts: + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c + +Change-Id: Ia3dae4001fde5d8d093ad460ebbd31899a7329c8 +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 54 ++++-------------- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 68 +++++++++-------------- + 2 files changed, 37 insertions(+), 85 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index 62270ca..3b812a8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -382,26 +382,18 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_mqd *m; ++ uint32_t *mqd_hqd; ++ uint32_t reg; + + m = get_mqd(mqd); + +- + acquire_queue(kgd, pipe_id, queue_id); + +- WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); +- WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); +- WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); +- +- WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); +- WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); +- WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); +- WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); ++ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ ++ mqd_hqd = &m->cp_mqd_base_addr_lo; + +- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); +- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, m->cp_hqd_pq_rptr_report_addr_hi); +- WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); +- +- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); ++ for (reg = mmCP_HQD_VMID; reg <= mmCP_MQD_CONTROL; reg++) ++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + if (wptr) { + /* Don't read wptr with get_user because the user +@@ -419,34 +411,12 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + (uint32_t)((uint64_t)wptr >> 32)); + WREG32(mmCP_PQ_WPTR_POLL_CNTL1, + get_queue_mask(pipe_id, queue_id)); +- } else +- WREG32(mmCP_HQD_PQ_WPTR, 0); +- +- +- WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); +- WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); +- WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); +- +- WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); +- +- WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); +- WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); +- WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); +- +- WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); +- WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); +- WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); +- WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); +- +- WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); +- +- WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); +- WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); +- +- WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); +- +- WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); +- release_queue(kgd); ++ } ++ ++ /* Write CP_HQD_ACTIVE last. */ ++ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++) ++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); ++ release_queue(kgd); + + return 0; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index 4db9637..aed08fe 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -376,24 +376,32 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct vi_mqd *m; ++ uint32_t *mqd_hqd; ++ uint32_t reg; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + +- WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); +- WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); +- WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); ++ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ ++ mqd_hqd = &m->cp_mqd_base_addr_lo; ++ ++ for (reg = mmCP_HQD_VMID; reg <= mmCP_HQD_EOP_CONTROL; reg++) ++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + +- WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); +- WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); +- WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); +- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); +- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, +- m->cp_hqd_pq_rptr_report_addr_hi); +- WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); +- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); +- WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); ++ /* Tonga errata: EOP RPTR/WPTR should be left unmodified. ++ * This is safe since EOP RPTR==WPTR for any inactive HQD ++ * on ASICs that do not support context-save. ++ * EOP writes/reads can start anywhere in the ring. ++ */ ++ if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { ++ WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); ++ WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); ++ WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); ++ } ++ ++ for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) ++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + if (wptr) { + /* Don't read wptr with get_user because the user +@@ -411,37 +419,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + (uint32_t)((uint64_t)wptr >> 32)); + WREG32(mmCP_PQ_WPTR_POLL_CNTL1, + get_queue_mask(pipe_id, queue_id)); +- } else +- WREG32(mmCP_HQD_PQ_WPTR, 0); +- +- WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); +- WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); +- WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); +- WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); +- +- WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); +- WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); +- WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); +- WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); +- WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); +- WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); +- +- WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); +- WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); +- WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); +- WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); +- WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); +- WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); +- WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); +- +- WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); +- +- WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); +- WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); +- WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); +- WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); +- +- WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); ++ } ++ ++ /* Write CP_HQD_ACTIVE last. */ ++ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++) ++ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + release_queue(kgd); + +-- +2.7.4 + |