diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch new file mode 100644 index 00000000..3cda2fb0 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch @@ -0,0 +1,318 @@ +From 52204a252c3a02e779a4107e21aeb5389de5e14f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Wed, 26 Sep 2018 19:56:41 +0200 +Subject: [PATCH 0667/2940] drm/amdgpu: fix sdma v4 startup under SRIOV +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Under SRIOV we were enabling the ring buffer before it was initialized. + +Signed-off-by: Christian König <christian.koenig@amd.com> +Reviewed-by: Huang Rui <ray.huang@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 234 ++++++++++++------------- + 1 file changed, 116 insertions(+), 118 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +index 7a8c9172d30a..fde27d8bfeb2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +@@ -675,13 +675,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) + * sdma_v4_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer ++ * @i: instance to resume + * + * Set up the gfx DMA ring buffers and enable them (VEGA10). + * Returns 0 for success, error for failure. + */ +-static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) ++static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) + { +- struct amdgpu_ring *ring; ++ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; + u32 rb_cntl, ib_cntl, wptr_poll_cntl; + u32 rb_bufsz; + u32 wb_offset; +@@ -689,129 +690,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) + u32 doorbell_offset; + u32 temp; + u64 wptr_gpu_addr; +- int i, r; + +- for (i = 0; i < adev->sdma.num_instances; i++) { +- ring = &adev->sdma.instance[i].ring; +- wb_offset = (ring->rptr_offs * 4); ++ wb_offset = (ring->rptr_offs * 4); + +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + +- /* Set ring buffer size in dwords */ +- rb_bufsz = order_base_2(ring->ring_size / 4); +- rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); +- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); ++ /* Set ring buffer size in dwords */ ++ rb_bufsz = order_base_2(ring->ring_size / 4); ++ rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); ++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); + #ifdef __BIG_ENDIAN +- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); +- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, +- RPTR_WRITEBACK_SWAP_ENABLE, 1); ++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); ++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, ++ RPTR_WRITEBACK_SWAP_ENABLE, 1); + #endif +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + +- /* Initialize the ring buffer's read and write pointers */ +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); ++ /* Initialize the ring buffer's read and write pointers */ ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + +- /* set the wb address whether it's enabled or not */ +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), +- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), +- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); ++ /* set the wb address whether it's enabled or not */ ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), ++ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), ++ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + +- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); ++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + +- ring->wptr = 0; ++ ring->wptr = 0; + +- /* before programing wptr to a less value, need set minor_ptr_update first */ +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); ++ /* before programing wptr to a less value, need set minor_ptr_update first */ ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + +- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); +- } ++ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); ++ } + +- doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); +- doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); ++ doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); ++ doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + +- if (ring->use_doorbell) { +- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); +- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, +- OFFSET, ring->doorbell_index); +- } else { +- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); +- } +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); +- adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, +- ring->doorbell_index); +- +- if (amdgpu_sriov_vf(adev)) +- sdma_v4_0_ring_set_wptr(ring); +- +- /* set minor_ptr_update to 0 after wptr programed */ +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); +- +- /* set utc l1 enable flag always to 1 */ +- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); +- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); +- +- if (!amdgpu_sriov_vf(adev)) { +- /* unhalt engine */ +- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); +- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); +- } ++ if (ring->use_doorbell) { ++ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); ++ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, ++ OFFSET, ring->doorbell_index); ++ } else { ++ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); ++ } ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); ++ adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ++ ring->doorbell_index); + +- /* setup the wptr shadow polling */ +- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), +- lower_32_bits(wptr_gpu_addr)); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), +- upper_32_bits(wptr_gpu_addr)); +- wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); +- if (amdgpu_sriov_vf(adev)) +- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); +- else +- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); ++ if (amdgpu_sriov_vf(adev)) ++ sdma_v4_0_ring_set_wptr(ring); + +- /* enable DMA RB */ +- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); ++ /* set minor_ptr_update to 0 after wptr programed */ ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + +- ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); +- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +-#ifdef __BIG_ENDIAN +- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +-#endif +- /* enable DMA IBs */ +- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); ++ /* set utc l1 enable flag always to 1 */ ++ temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); ++ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + +- ring->ready = true; ++ if (!amdgpu_sriov_vf(adev)) { ++ /* unhalt engine */ ++ temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); ++ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); ++ } + +- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ +- sdma_v4_0_ctx_switch_enable(adev, true); +- sdma_v4_0_enable(adev, true); +- } ++ /* setup the wptr shadow polling */ ++ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), ++ lower_32_bits(wptr_gpu_addr)); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), ++ upper_32_bits(wptr_gpu_addr)); ++ wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); ++ if (amdgpu_sriov_vf(adev)) ++ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); ++ else ++ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + +- r = amdgpu_ring_test_ring(ring); +- if (r) { +- ring->ready = false; +- return r; +- } ++ /* enable DMA RB */ ++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + +- if (adev->mman.buffer_funcs_ring == ring) +- amdgpu_ttm_set_buffer_funcs_status(adev, true); +- +- } ++ ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ++ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); ++#ifdef __BIG_ENDIAN ++ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); ++#endif ++ /* enable DMA IBs */ ++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + +- return 0; ++ ring->ready = true; + } + + static void +@@ -943,33 +923,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) + */ + static int sdma_v4_0_start(struct amdgpu_device *adev) + { +- int r = 0; ++ struct amdgpu_ring *ring; ++ int i, r; + + if (amdgpu_sriov_vf(adev)) { + sdma_v4_0_ctx_switch_enable(adev, false); + sdma_v4_0_enable(adev, false); ++ } else { + +- /* set RB registers */ +- r = sdma_v4_0_gfx_resume(adev); +- return r; ++ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { ++ r = sdma_v4_0_load_microcode(adev); ++ if (r) ++ return r; ++ } ++ ++ /* unhalt the MEs */ ++ sdma_v4_0_enable(adev, true); ++ /* enable sdma ring preemption */ ++ sdma_v4_0_ctx_switch_enable(adev, true); + } + +- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { +- r = sdma_v4_0_load_microcode(adev); ++ /* start the gfx rings and rlc compute queues */ ++ for (i = 0; i < adev->sdma.num_instances; i++) ++ sdma_v4_0_gfx_resume(adev, i); ++ ++ if (amdgpu_sriov_vf(adev)) { ++ sdma_v4_0_ctx_switch_enable(adev, true); ++ sdma_v4_0_enable(adev, true); ++ } else { ++ r = sdma_v4_0_rlc_resume(adev); + if (r) + return r; + } + +- /* unhalt the MEs */ +- sdma_v4_0_enable(adev, true); +- /* enable sdma ring preemption */ +- sdma_v4_0_ctx_switch_enable(adev, true); ++ for (i = 0; i < adev->sdma.num_instances; i++) { ++ ring = &adev->sdma.instance[i].ring; + +- /* start the gfx rings and rlc compute queues */ +- r = sdma_v4_0_gfx_resume(adev); +- if (r) +- return r; +- r = sdma_v4_0_rlc_resume(adev); ++ r = amdgpu_ring_test_ring(ring); ++ if (r) { ++ ring->ready = false; ++ return r; ++ } ++ ++ if (adev->mman.buffer_funcs_ring == ring) ++ amdgpu_ttm_set_buffer_funcs_status(adev, true); ++ } + + return r; + } +-- +2.17.1 + |