diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3078-drm-amdgpu-add-high-priority-compute-support-for-gfx.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3078-drm-amdgpu-add-high-priority-compute-support-for-gfx.patch | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3078-drm-amdgpu-add-high-priority-compute-support-for-gfx.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3078-drm-amdgpu-add-high-priority-compute-support-for-gfx.patch new file mode 100644 index 00000000..85a8ef13 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3078-drm-amdgpu-add-high-priority-compute-support-for-gfx.patch @@ -0,0 +1,140 @@ +From a6813aad83510940e906c46d91def9dba00dff04 Mon Sep 17 00:00:00 2001 +From: Andres Rodriguez <andresx7@gmail.com> +Date: Tue, 2 Jan 2018 15:49:40 -0500 +Subject: [PATCH 3078/4131] drm/amdgpu: add high priority compute support for + gfx9 + +We follow the same approach as gfx8. The only changes are register +access macros. + +Tested on vega10. The execution latency results fall within the expected +ranges from the polaris10 data. + +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Andres Rodriguez <andresx7@gmail.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 100 ++++++++++++++++++++++++++++++++++ + 1 file changed, 100 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +index 4f50ae4..a149b35 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -3740,6 +3740,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) + return wptr; + } + ++static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, ++ bool acquire) ++{ ++ struct amdgpu_device *adev = ring->adev; ++ int pipe_num, tmp, reg; ++ int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; ++ ++ pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; ++ ++ /* first me only has 2 entries, GFX and HP3D */ ++ if (ring->me > 0) ++ pipe_num -= 2; ++ ++ reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; ++ tmp = RREG32(reg); ++ tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); ++ WREG32(reg, tmp); ++} ++ ++static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, ++ struct amdgpu_ring *ring, ++ bool acquire) ++{ ++ int i, pipe; ++ bool reserve; ++ struct amdgpu_ring *iring; ++ ++ mutex_lock(&adev->gfx.pipe_reserve_mutex); ++ pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); ++ if (acquire) ++ set_bit(pipe, adev->gfx.pipe_reserve_bitmap); ++ else ++ clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); ++ ++ if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { ++ /* Clear all reservations - everyone reacquires all resources */ ++ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) ++ gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], ++ true); ++ ++ for (i = 0; i < adev->gfx.num_compute_rings; ++i) ++ gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], ++ true); ++ } else { ++ /* Lower all pipes without a current reservation */ ++ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { ++ iring = &adev->gfx.gfx_ring[i]; ++ pipe = amdgpu_gfx_queue_to_bit(adev, ++ iring->me, ++ iring->pipe, ++ 0); ++ reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); ++ gfx_v9_0_ring_set_pipe_percent(iring, reserve); ++ } ++ ++ for (i = 0; i < adev->gfx.num_compute_rings; ++i) { ++ iring = &adev->gfx.compute_ring[i]; ++ pipe = amdgpu_gfx_queue_to_bit(adev, ++ iring->me, ++ iring->pipe, ++ 0); ++ reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); ++ gfx_v9_0_ring_set_pipe_percent(iring, reserve); ++ } ++ } ++ ++ mutex_unlock(&adev->gfx.pipe_reserve_mutex); ++} ++ ++static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, ++ struct amdgpu_ring *ring, ++ bool acquire) ++{ ++ uint32_t pipe_priority = acquire ? 0x2 : 0x0; ++ uint32_t queue_priority = acquire ? 0xf : 0x0; ++ ++ mutex_lock(&adev->srbm_mutex); ++ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); ++ ++ WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); ++ WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); ++ ++ soc15_grbm_select(adev, 0, 0, 0, 0); ++ mutex_unlock(&adev->srbm_mutex); ++} ++ ++static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, ++ enum drm_sched_priority priority) ++{ ++ struct amdgpu_device *adev = ring->adev; ++ bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; ++ ++ if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) ++ return; ++ ++ gfx_v9_0_hqd_set_priority(adev, ring, acquire); ++ gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); ++} ++ + static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; +@@ -4266,6 +4365,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, ++ .set_priority = gfx_v9_0_ring_set_priority_compute, + }; + + static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { +-- +2.7.4 + |