diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch new file mode 100644 index 00000000..5bad54ab --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch @@ -0,0 +1,243 @@ +From 4ff99a140c1c6893f2dc9c5ac9f1e172c23c1155 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com> +Date: Mon, 21 Jan 2019 17:22:55 -0500 +Subject: [PATCH 1191/2940] drm/amdgpu: add a workaround for GDS ordered append + hangs with compute queues +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +I'm not increasing the DRM version because GDS isn't totally without bugs yet. + +v2: update emit_ib_size + +Signed-off-by: Marek Olšák <marek.olsak@amd.com> +Acked-by: Christian König <christian.koenig@amd.com> +Acked-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Chaudhary Amit Kumar <Chaudharyamit.Kumar@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 2 ++ + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 19 +++++++++++- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 21 +++++++++++-- + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 +++++++++++++++++++++++-- + include/uapi/drm/amdgpu_drm.h | 5 ++++ + 6 files changed, 84 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index c3eb1875e54e..deadeb765cf5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -72,9 +72,10 @@ + * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES ++ * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID + */ + #define KMS_DRIVER_MAJOR 3 +-#define KMS_DRIVER_MINOR 28 ++#define KMS_DRIVER_MINOR 29 + #define KMS_DRIVER_PATCHLEVEL 0 + + #define AMDGPU_VERSION "18.50.1.418" +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +index ecbcefe49a98..f89f5734d985 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +@@ -37,6 +37,8 @@ struct amdgpu_gds { + struct amdgpu_gds_asic_info mem; + struct amdgpu_gds_asic_info gws; + struct amdgpu_gds_asic_info oa; ++ uint32_t gds_compute_max_wave_id; ++ + /* At present, GDS, GWS and OA resources for gfx (graphics) + * is always pre-allocated and available for graphics operation. + * Such resource is shared between all gfx clients. +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +index 7984292f9282..a59e0fdf5a97 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -2264,6 +2264,22 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + ++ /* Currently, there is a high possibility to get wave ID mismatch ++ * between ME and GDS, leading to a hw deadlock, because ME generates ++ * different wave IDs than the GDS expects. This situation happens ++ * randomly when at least 5 compute pipes use GDS ordered append. ++ * The wave IDs generated by ME are also wrong after suspend/resume. ++ * Those are probably bugs somewhere else in the kernel driver. ++ * ++ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and ++ * GDS to 0 for this ring (me/pipe). ++ */ ++ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); ++ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); ++ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); ++ } ++ + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + amdgpu_ring_write(ring, + #ifdef __BIG_ENDIAN +@@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { + 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ + CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ + 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ +- .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ ++ .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ + .emit_ib = gfx_v7_0_ring_emit_ib_compute, + .emit_fence = gfx_v7_0_ring_emit_fence_compute, + .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, +@@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) + adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws.total_size = 64; + adev->gds.oa.total_size = 16; ++ adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); + + if (adev->gds.mem.total_size == 64 * 1024) { + adev->gds.mem.gfx_partition_size = 4096; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 4f730f074611..e6f66bf7e1f0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -6083,6 +6083,22 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + ++ /* Currently, there is a high possibility to get wave ID mismatch ++ * between ME and GDS, leading to a hw deadlock, because ME generates ++ * different wave IDs than the GDS expects. This situation happens ++ * randomly when at least 5 compute pipes use GDS ordered append. ++ * The wave IDs generated by ME are also wrong after suspend/resume. ++ * Those are probably bugs somewhere else in the kernel driver. ++ * ++ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and ++ * GDS to 0 for this ring (me/pipe). ++ */ ++ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); ++ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); ++ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); ++ } ++ + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + amdgpu_ring_write(ring, + #ifdef __BIG_ENDIAN +@@ -6889,7 +6905,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { + 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ + VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ + 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ +- .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ ++ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_ib = gfx_v8_0_ring_emit_ib_compute, + .emit_fence = gfx_v8_0_ring_emit_fence_compute, + .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, +@@ -6919,7 +6935,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { + 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ + 17 + /* gfx_v8_0_ring_emit_vm_flush */ + 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ +- .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ ++ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_fence = gfx_v8_0_ring_emit_fence_kiq, + .test_ring = gfx_v8_0_ring_test_ring, + .insert_nop = amdgpu_ring_insert_nop, +@@ -6995,6 +7011,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) + adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws.total_size = 64; + adev->gds.oa.total_size = 16; ++ adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); + + if (adev->gds.mem.total_size == 64 * 1024) { + adev->gds.mem.gfx_partition_size = 4096; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +index a91687b4ac8e..1478e784cff0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -4014,6 +4014,22 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + ++ /* Currently, there is a high possibility to get wave ID mismatch ++ * between ME and GDS, leading to a hw deadlock, because ME generates ++ * different wave IDs than the GDS expects. This situation happens ++ * randomly when at least 5 compute pipes use GDS ordered append. ++ * The wave IDs generated by ME are also wrong after suspend/resume. ++ * Those are probably bugs somewhere else in the kernel driver. ++ * ++ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and ++ * GDS to 0 for this ring (me/pipe). ++ */ ++ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); ++ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); ++ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); ++ } ++ + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +@@ -4733,7 +4749,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ +- .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ ++ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib = gfx_v9_0_ring_emit_ib_compute, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, +@@ -4768,7 +4784,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ +- .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ ++ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_fence = gfx_v9_0_ring_emit_fence_kiq, + .test_ring = gfx_v9_0_ring_test_ring, + .insert_nop = amdgpu_ring_insert_nop, +@@ -4850,6 +4866,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) + break; + } + ++ switch (adev->asic_type) { ++ case CHIP_VEGA10: ++ case CHIP_VEGA20: ++ adev->gds.gds_compute_max_wave_id = 0x7ff; ++ break; ++ case CHIP_VEGA12: ++ adev->gds.gds_compute_max_wave_id = 0x27f; ++ break; ++ case CHIP_RAVEN: ++ if (adev->rev_id >= 0x8) ++ adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ ++ else ++ adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ ++ break; ++ default: ++ /* this really depends on the chip */ ++ adev->gds.gds_compute_max_wave_id = 0x7ff; ++ break; ++ } ++ + adev->gds.gws.total_size = 64; + adev->gds.oa.total_size = 16; + +diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h +index 29ef0f434e3f..b54b02502867 100644 +--- a/include/uapi/drm/amdgpu_drm.h ++++ b/include/uapi/drm/amdgpu_drm.h +@@ -625,6 +625,11 @@ union drm_amdgpu_cs { + * caches (L2/vL1/sL1/I$). */ + #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + ++/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. ++ * This will reset wave ID counters for the IB. ++ */ ++#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) ++ + struct drm_amdgpu_cs_chunk_ib { + __u32 _pad; + /** AMDGPU_IB_FLAG_* */ +-- +2.17.1 + |