diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4233-drm-amdgpu-optionally-do-a-writeback-but-don-t-inval.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/4233-drm-amdgpu-optionally-do-a-writeback-but-don-t-inval.patch | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4233-drm-amdgpu-optionally-do-a-writeback-but-don-t-inval.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4233-drm-amdgpu-optionally-do-a-writeback-but-don-t-inval.patch new file mode 100644 index 00000000..34b53207 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4233-drm-amdgpu-optionally-do-a-writeback-but-don-t-inval.patch @@ -0,0 +1,176 @@ +From e0c57dec07e7d2f1733561c0bb8a692bd65ade48 Mon Sep 17 00:00:00 2001 +From: Marek Olsak <marek.olsak@amd.com> +Date: Tue, 3 Apr 2018 13:05:03 -0400 +Subject: [PATCH 4233/5725] drm/amdgpu: optionally do a writeback but don't + invalidate TC for IB fences +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There is a new IB flag that enables this new behavior. +Full invalidation is unnecessary for RELEASE_MEM and doesn't make sense +when draw calls from two adjacent gfx IBs run in parallel. This will be +the new default for Mesa. + +v2: bump the version + +Signed-off-by: Marek Olšák <marek.olsak@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Kalyan Alle <kalyan.alle@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 5 +++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 ++++++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 +++- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +++++++---- + drivers/gpu/drm/amd/amdgpu/soc15d.h | 1 + + 7 files changed, 23 insertions(+), 11 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 3f68ca9..51e2928 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -75,9 +75,10 @@ + * - 3.23.0 - Add query for VRAM lost counter + * - 3.24.0 - Add high priority compute support for gfx9 + * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). ++ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. + */ + #define KMS_DRIVER_MAJOR 3 +-#define KMS_DRIVER_MINOR 25 ++#define KMS_DRIVER_MINOR 26 + #define KMS_DRIVER_PATCHLEVEL 0 + + #define AMDGPU_VERSION "18.20.2.15" +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +index 97449e0..4a3cef9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) + * Emits a fence command on the requested ring (all asics). + * Returns 0 on success, -ENOMEM on failure. + */ +-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) ++int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, ++ unsigned flags) + { + struct amdgpu_device *adev = ring->adev; + struct amdgpu_fence *fence; +@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) + adev->fence_context + ring->idx, + seq); + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, +- seq, AMDGPU_FENCE_FLAG_INT); ++ seq, flags | AMDGPU_FENCE_FLAG_INT); + + ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; + /* This function can't be called concurrently anyway, otherwise +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +index 29c8015..3f7afcf 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_vm *vm; + uint64_t fence_ctx; + uint32_t status = 0, alloc_size; ++ unsigned fence_flags = 0; + + unsigned i; + int r = 0; +@@ -234,7 +235,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + ) + amdgpu_ring_emit_hdp_invalidate(ring); + +- r = amdgpu_fence_emit(ring, f); ++ if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) ++ fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; ++ ++ r = amdgpu_fence_emit(ring, f, fence_flags); + if (r) { + dev_err(adev->dev, "failed to emit fence (%d)\n", r); + if (job && job->vmid) +@@ -249,7 +253,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + /* wrap the last IB with fence */ + if (job && job->uf_addr) { + amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, +- AMDGPU_FENCE_FLAG_64BIT); ++ fence_flags | AMDGPU_FENCE_FLAG_64BIT); + } + + if (patch_offset != ~0 && ring->funcs->patch_cond_exec) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +index 6ed21bd..79ca5b7 100755 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +@@ -41,6 +41,7 @@ + + #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) + #define AMDGPU_FENCE_FLAG_INT (1 << 1) ++#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) + + enum amdgpu_ring_type { + AMDGPU_RING_TYPE_GFX, +@@ -89,7 +90,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, + unsigned irq_type); + void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); + void amdgpu_fence_driver_resume(struct amdgpu_device *adev); +-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); ++int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, ++ unsigned flags); + int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); + void amdgpu_fence_process(struct amdgpu_ring *ring); + int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index d693066..56a8614 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -633,7 +633,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ + amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); + + if (vm_flush_needed || pasid_mapping_needed) { +- r = amdgpu_fence_emit(ring, &fence); ++ r = amdgpu_fence_emit(ring, &fence, 0); + if (r) + return r; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +index d04a78b..cf1d206 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -3857,13 +3857,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + { + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; ++ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); +- amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | +- EOP_TC_ACTION_EN | +- EOP_TC_WB_ACTION_EN | +- EOP_TC_MD_ACTION_EN | ++ amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | ++ EOP_TC_NC_ACTION_EN) : ++ (EOP_TCL1_ACTION_EN | ++ EOP_TC_ACTION_EN | ++ EOP_TC_WB_ACTION_EN | ++ EOP_TC_MD_ACTION_EN)) | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); +diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h +index f22f7a8..8dc2910 100755 +--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h ++++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h +@@ -159,6 +159,7 @@ + #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ + #define EOP_TCL1_ACTION_EN (1 << 16) + #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ ++#define EOP_TC_NC_ACTION_EN (1 << 19) + #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ + + #define DATA_SEL(x) ((x) << 29) +-- +2.7.4 + |