aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch318
1 files changed, 318 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch
new file mode 100644
index 00000000..3cda2fb0
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0667-drm-amdgpu-fix-sdma-v4-startup-under-SRIOV.patch
@@ -0,0 +1,318 @@
+From 52204a252c3a02e779a4107e21aeb5389de5e14f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Wed, 26 Sep 2018 19:56:41 +0200
+Subject: [PATCH 0667/2940] drm/amdgpu: fix sdma v4 startup under SRIOV
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Under SRIOV we were enabling the ring buffer before it was initialized.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Huang Rui <ray.huang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 234 ++++++++++++-------------
+ 1 file changed, 116 insertions(+), 118 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+index 7a8c9172d30a..fde27d8bfeb2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+@@ -675,13 +675,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
+ * sdma_v4_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
++ * @i: instance to resume
+ *
+ * Set up the gfx DMA ring buffers and enable them (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+-static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
++static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
+ {
+- struct amdgpu_ring *ring;
++ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 rb_bufsz;
+ u32 wb_offset;
+@@ -689,129 +690,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+- int i, r;
+
+- for (i = 0; i < adev->sdma.num_instances; i++) {
+- ring = &adev->sdma.instance[i].ring;
+- wb_offset = (ring->rptr_offs * 4);
++ wb_offset = (ring->rptr_offs * 4);
+
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+- /* Set ring buffer size in dwords */
+- rb_bufsz = order_base_2(ring->ring_size / 4);
+- rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
++ /* Set ring buffer size in dwords */
++ rb_bufsz = order_base_2(ring->ring_size / 4);
++ rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ #ifdef __BIG_ENDIAN
+- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+- RPTR_WRITEBACK_SWAP_ENABLE, 1);
++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
++ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ #endif
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+- /* Initialize the ring buffer's read and write pointers */
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
++ /* Initialize the ring buffer's read and write pointers */
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+
+- /* set the wb address whether it's enabled or not */
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
++ /* set the wb address whether it's enabled or not */
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
++ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
++ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+- ring->wptr = 0;
++ ring->wptr = 0;
+
+- /* before programing wptr to a less value, need set minor_ptr_update first */
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
++ /* before programing wptr to a less value, need set minor_ptr_update first */
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+
+- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+- }
++ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
++ }
+
+- doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+- doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
++ doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
++ doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+
+- if (ring->use_doorbell) {
+- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+- OFFSET, ring->doorbell_index);
+- } else {
+- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+- }
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+- adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+- ring->doorbell_index);
+-
+- if (amdgpu_sriov_vf(adev))
+- sdma_v4_0_ring_set_wptr(ring);
+-
+- /* set minor_ptr_update to 0 after wptr programed */
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+-
+- /* set utc l1 enable flag always to 1 */
+- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+-
+- if (!amdgpu_sriov_vf(adev)) {
+- /* unhalt engine */
+- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+- }
++ if (ring->use_doorbell) {
++ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
++ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
++ OFFSET, ring->doorbell_index);
++ } else {
++ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
++ }
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
++ adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
++ ring->doorbell_index);
+
+- /* setup the wptr shadow polling */
+- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+- lower_32_bits(wptr_gpu_addr));
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+- upper_32_bits(wptr_gpu_addr));
+- wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+- if (amdgpu_sriov_vf(adev))
+- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
+- else
+- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
++ if (amdgpu_sriov_vf(adev))
++ sdma_v4_0_ring_set_wptr(ring);
+
+- /* enable DMA RB */
+- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
++ /* set minor_ptr_update to 0 after wptr programed */
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
+- ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+-#ifdef __BIG_ENDIAN
+- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+-#endif
+- /* enable DMA IBs */
+- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
++ /* set utc l1 enable flag always to 1 */
++ temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
++ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+- ring->ready = true;
++ if (!amdgpu_sriov_vf(adev)) {
++ /* unhalt engine */
++ temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
++ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
++ }
+
+- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+- sdma_v4_0_ctx_switch_enable(adev, true);
+- sdma_v4_0_enable(adev, true);
+- }
++ /* setup the wptr shadow polling */
++ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
++ lower_32_bits(wptr_gpu_addr));
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
++ upper_32_bits(wptr_gpu_addr));
++ wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
++ if (amdgpu_sriov_vf(adev))
++ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
++ else
++ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
+
+- r = amdgpu_ring_test_ring(ring);
+- if (r) {
+- ring->ready = false;
+- return r;
+- }
++ /* enable DMA RB */
++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+- if (adev->mman.buffer_funcs_ring == ring)
+- amdgpu_ttm_set_buffer_funcs_status(adev, true);
+-
+- }
++ ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
++ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
++#ifdef __BIG_ENDIAN
++ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
++#endif
++ /* enable DMA IBs */
++ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+
+- return 0;
++ ring->ready = true;
+ }
+
+ static void
+@@ -943,33 +923,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
+ */
+ static int sdma_v4_0_start(struct amdgpu_device *adev)
+ {
+- int r = 0;
++ struct amdgpu_ring *ring;
++ int i, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_0_ctx_switch_enable(adev, false);
+ sdma_v4_0_enable(adev, false);
++ } else {
+
+- /* set RB registers */
+- r = sdma_v4_0_gfx_resume(adev);
+- return r;
++ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
++ r = sdma_v4_0_load_microcode(adev);
++ if (r)
++ return r;
++ }
++
++ /* unhalt the MEs */
++ sdma_v4_0_enable(adev, true);
++ /* enable sdma ring preemption */
++ sdma_v4_0_ctx_switch_enable(adev, true);
+ }
+
+- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+- r = sdma_v4_0_load_microcode(adev);
++ /* start the gfx rings and rlc compute queues */
++ for (i = 0; i < adev->sdma.num_instances; i++)
++ sdma_v4_0_gfx_resume(adev, i);
++
++ if (amdgpu_sriov_vf(adev)) {
++ sdma_v4_0_ctx_switch_enable(adev, true);
++ sdma_v4_0_enable(adev, true);
++ } else {
++ r = sdma_v4_0_rlc_resume(adev);
+ if (r)
+ return r;
+ }
+
+- /* unhalt the MEs */
+- sdma_v4_0_enable(adev, true);
+- /* enable sdma ring preemption */
+- sdma_v4_0_ctx_switch_enable(adev, true);
++ for (i = 0; i < adev->sdma.num_instances; i++) {
++ ring = &adev->sdma.instance[i].ring;
+
+- /* start the gfx rings and rlc compute queues */
+- r = sdma_v4_0_gfx_resume(adev);
+- if (r)
+- return r;
+- r = sdma_v4_0_rlc_resume(adev);
++ r = amdgpu_ring_test_ring(ring);
++ if (r) {
++ ring->ready = false;
++ return r;
++ }
++
++ if (adev->mman.buffer_funcs_ring == ring)
++ amdgpu_ttm_set_buffer_funcs_status(adev, true);
++ }
+
+ return r;
+ }
+--
+2.17.1
+