1 files changed, 243 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch
new file mode 100644
index 00000000..5bad54ab
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1191-drm-amdgpu-add-a-workaround-for-GDS-ordered-append-h.patch
@@ -0,0 +1,243 @@
+From 4ff99a140c1c6893f2dc9c5ac9f1e172c23c1155 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
+Date: Mon, 21 Jan 2019 17:22:55 -0500
+Subject: [PATCH 1191/2940] drm/amdgpu: add a workaround for GDS ordered append
+ hangs with compute queues
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+I'm not increasing the DRM version because GDS isn't totally without bugs yet.
+
+v2: update emit_ib_size
+
+Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Chaudhary Amit Kumar <Chaudharyamit.Kumar@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  2 ++
+ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 19 +++++++++++-
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 21 +++++++++++--
+ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 40 +++++++++++++++++++++++--
+ include/uapi/drm/amdgpu_drm.h           |  5 ++++
+ 6 files changed, 84 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index c3eb1875e54e..deadeb765cf5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -72,9 +72,10 @@
+  * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+  * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+  * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
++ * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
+  */
+ #define KMS_DRIVER_MAJOR	3
+-#define KMS_DRIVER_MINOR	28
++#define KMS_DRIVER_MINOR	29
+ #define KMS_DRIVER_PATCHLEVEL	0
+ 
+ #define AMDGPU_VERSION		"18.50.1.418"
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+index ecbcefe49a98..f89f5734d985 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+@@ -37,6 +37,8 @@ struct amdgpu_gds {
+ 	struct amdgpu_gds_asic_info	mem;
+ 	struct amdgpu_gds_asic_info	gws;
+ 	struct amdgpu_gds_asic_info	oa;
++	uint32_t			gds_compute_max_wave_id;
++
+ 	/* At present, GDS, GWS and OA resources for gfx (graphics)
+ 	 * is always pre-allocated and available for graphics operation.
+ 	 * Such resource is shared between all gfx clients.
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+index 7984292f9282..a59e0fdf5a97 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+@@ -2264,6 +2264,22 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+ 
++	/* Currently, there is a high possibility to get wave ID mismatch
++	 * between ME and GDS, leading to a hw deadlock, because ME generates
++	 * different wave IDs than the GDS expects. This situation happens
++	 * randomly when at least 5 compute pipes use GDS ordered append.
++	 * The wave IDs generated by ME are also wrong after suspend/resume.
++	 * Those are probably bugs somewhere else in the kernel driver.
++	 *
++	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
++	 * GDS to 0 for this ring (me/pipe).
++	 */
++	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
++		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
++		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
++		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
++	}
++
+ 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ 	amdgpu_ring_write(ring,
+ #ifdef __BIG_ENDIAN
+@@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
+ 		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
+ 		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
+ 		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+-	.emit_ib_size =	4, /* gfx_v7_0_ring_emit_ib_compute */
++	.emit_ib_size =	7, /* gfx_v7_0_ring_emit_ib_compute */
+ 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
+ 	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
+ 	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
+@@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
+ 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
+ 	adev->gds.gws.total_size = 64;
+ 	adev->gds.oa.total_size = 16;
++	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
+ 
+ 	if (adev->gds.mem.total_size == 64 * 1024) {
+ 		adev->gds.mem.gfx_partition_size = 4096;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index 4f730f074611..e6f66bf7e1f0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -6083,6 +6083,22 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+ 
++	/* Currently, there is a high possibility to get wave ID mismatch
++	 * between ME and GDS, leading to a hw deadlock, because ME generates
++	 * different wave IDs than the GDS expects. This situation happens
++	 * randomly when at least 5 compute pipes use GDS ordered append.
++	 * The wave IDs generated by ME are also wrong after suspend/resume.
++	 * Those are probably bugs somewhere else in the kernel driver.
++	 *
++	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
++	 * GDS to 0 for this ring (me/pipe).
++	 */
++	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
++		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
++		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
++		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
++	}
++
+ 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ 	amdgpu_ring_write(ring,
+ #ifdef __BIG_ENDIAN
+@@ -6889,7 +6905,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
+ 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+ 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
+ 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+-	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
++	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
+ 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
+ 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
+ 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
+@@ -6919,7 +6935,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
+ 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+ 		17 + /* gfx_v8_0_ring_emit_vm_flush */
+ 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+-	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
++	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
+ 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
+ 	.test_ring = gfx_v8_0_ring_test_ring,
+ 	.insert_nop = amdgpu_ring_insert_nop,
+@@ -6995,6 +7011,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
+ 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
+ 	adev->gds.gws.total_size = 64;
+ 	adev->gds.oa.total_size = 16;
++	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
+ 
+ 	if (adev->gds.mem.total_size == 64 * 1024) {
+ 		adev->gds.mem.gfx_partition_size = 4096;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+index a91687b4ac8e..1478e784cff0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+@@ -4014,6 +4014,22 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+ 
++	/* Currently, there is a high possibility to get wave ID mismatch
++	 * between ME and GDS, leading to a hw deadlock, because ME generates
++	 * different wave IDs than the GDS expects. This situation happens
++	 * randomly when at least 5 compute pipes use GDS ordered append.
++	 * The wave IDs generated by ME are also wrong after suspend/resume.
++	 * Those are probably bugs somewhere else in the kernel driver.
++	 *
++	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
++	 * GDS to 0 for this ring (me/pipe).
++	 */
++	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
++		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
++		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
++		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
++	}
++
+ 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ 	amdgpu_ring_write(ring,
+@@ -4733,7 +4749,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
+ 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 		2 + /* gfx_v9_0_ring_emit_vm_flush */
+ 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
+-	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
++	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
+ 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
+ 	.emit_fence = gfx_v9_0_ring_emit_fence,
+ 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+@@ -4768,7 +4784,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
+ 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 		2 + /* gfx_v9_0_ring_emit_vm_flush */
+ 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+-	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
++	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
+ 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
+ 	.test_ring = gfx_v9_0_ring_test_ring,
+ 	.insert_nop = amdgpu_ring_insert_nop,
+@@ -4850,6 +4866,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
+ 		break;
+ 	}
+ 
++	switch (adev->asic_type) {
++	case CHIP_VEGA10:
++	case CHIP_VEGA20:
++		adev->gds.gds_compute_max_wave_id = 0x7ff;
++		break;
++	case CHIP_VEGA12:
++		adev->gds.gds_compute_max_wave_id = 0x27f;
++		break;
++	case CHIP_RAVEN:
++		if (adev->rev_id >= 0x8)
++			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
++		else
++			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
++		break;
++	default:
++		/* this really depends on the chip */
++		adev->gds.gds_compute_max_wave_id = 0x7ff;
++		break;
++	}
++
+ 	adev->gds.gws.total_size = 64;
+ 	adev->gds.oa.total_size = 16;
+ 
+diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
+index 29ef0f434e3f..b54b02502867 100644
+--- a/include/uapi/drm/amdgpu_drm.h
++++ b/include/uapi/drm/amdgpu_drm.h
+@@ -625,6 +625,11 @@ union drm_amdgpu_cs {
+  * caches (L2/vL1/sL1/I$). */
+ #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+ 
++/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
++ * This will reset wave ID counters for the IB.
++ */
++#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
++
+ struct drm_amdgpu_cs_chunk_ib {
+ 	__u32 _pad;
+ 	/** AMDGPU_IB_FLAG_* */
+-- 
+2.17.1
+