diff options
Diffstat (limited to 'common/recipes-kernel/linux')
64 files changed, 9095 insertions, 41 deletions
diff --git a/common/recipes-kernel/linux/linux-amd/0001-drm-radeon-add-vm_set_page-tracepoint.patch b/common/recipes-kernel/linux/linux-amd/0001-drm-radeon-add-vm_set_page-tracepoint.patch new file mode 100644 index 00000000..574ea763 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0001-drm-radeon-add-vm_set_page-tracepoint.patch @@ -0,0 +1,118 @@ +From e468a2618e85630c1443bdd8402eb05443c05b9a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 29 Oct 2013 20:14:48 +0100 +Subject: [PATCH 01/60] drm/radeon: add vm_set_page tracepoint +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/cik_sdma.c | 3 +++ + drivers/gpu/drm/radeon/ni_dma.c | 3 +++ + drivers/gpu/drm/radeon/radeon_trace.h | 24 ++++++++++++++++++++++++ + drivers/gpu/drm/radeon/si_dma.c | 3 +++ + 4 files changed, 33 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c +index dc055d4..f0020fe 100644 +--- a/drivers/gpu/drm/radeon/cik_sdma.c ++++ b/drivers/gpu/drm/radeon/cik_sdma.c +@@ -25,6 +25,7 @@ + #include <drm/drmP.h> + #include "radeon.h" + #include "radeon_asic.h" ++#include "radeon_trace.h" + #include "cikd.h" + + /* sdma */ +@@ -650,6 +651,8 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, + uint64_t value; + unsigned ndw; + ++ trace_radeon_vm_set_page(pe, addr, count, incr, r600_flags); ++ + if (flags & RADEON_VM_PAGE_SYSTEM) { + while (count) { + ndw = count * 2; +diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c +index d0e4ab1..599e87b 100644 +--- a/drivers/gpu/drm/radeon/ni_dma.c ++++ b/drivers/gpu/drm/radeon/ni_dma.c +@@ -24,6 +24,7 @@ + #include <drm/drmP.h> + #include "radeon.h" + #include "radeon_asic.h" ++#include "radeon_trace.h" + #include "nid.h" + + u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); +@@ -254,6 +255,8 @@ void cayman_dma_vm_set_page(struct radeon_device *rdev, + uint64_t value; + unsigned ndw; + ++ trace_radeon_vm_set_page(pe, addr, count, incr, r600_flags); ++ + if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) { + while (count) { + ndw = count * 2; +diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h +index f7e3678..811bca6 100644 +--- a/drivers/gpu/drm/radeon/radeon_trace.h ++++ b/drivers/gpu/drm/radeon/radeon_trace.h +@@ -47,6 +47,30 @@ TRACE_EVENT(radeon_cs, + __entry->fences) + ); + ++TRACE_EVENT(radeon_vm_set_page, ++ TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, ++ uint32_t incr, uint32_t flags), ++ TP_ARGS(pe, addr, count, incr, flags), ++ TP_STRUCT__entry( ++ __field(u64, pe) ++ __field(u64, addr) ++ __field(u32, count) ++ __field(u32, incr) ++ __field(u32, flags) ++ ), ++ ++ TP_fast_assign( ++ __entry->pe = pe; ++ __entry->addr = addr; ++ __entry->count = count; ++ __entry->incr = incr; ++ __entry->flags = flags; ++ ), ++ TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, count=%u", ++ __entry->pe, __entry->addr, __entry->incr, ++ __entry->flags, __entry->count) ++); ++ + DECLARE_EVENT_CLASS(radeon_fence_request, + + TP_PROTO(struct drm_device *dev, u32 seqno), +diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c +index 49909d2..17205fd 100644 +--- a/drivers/gpu/drm/radeon/si_dma.c ++++ b/drivers/gpu/drm/radeon/si_dma.c +@@ -24,6 +24,7 @@ + #include <drm/drmP.h> + #include "radeon.h" + #include "radeon_asic.h" ++#include "radeon_trace.h" + #include "sid.h" + + u32 si_gpu_check_soft_reset(struct radeon_device *rdev); +@@ -79,6 +80,8 @@ void si_dma_vm_set_page(struct radeon_device *rdev, + uint64_t value; + unsigned ndw; + ++ trace_radeon_vm_set_page(pe, addr, count, incr, r600_flags); ++ + if (flags & RADEON_VM_PAGE_SYSTEM) { + while (count) { + ndw = count * 2; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0002-drm-radeon-cleanup-flushing-on-CIK-v3.patch b/common/recipes-kernel/linux/linux-amd/0002-drm-radeon-cleanup-flushing-on-CIK-v3.patch new file mode 100644 index 00000000..f37b1d6b --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0002-drm-radeon-cleanup-flushing-on-CIK-v3.patch @@ -0,0 +1,96 @@ +From d7a49c33db3be125a7e7682d706139a9c5fe4427 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com> +Date: Sat, 19 Oct 2013 22:06:43 +0200 +Subject: [PATCH 02/60] drm/radeon: cleanup flushing on CIK (v3) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +v2: fix compute handling. +v3: use HDP_MEM_COHERENCY_FLUSH_CNTL again + +Signed-off-by: Marek Olšák <marek.olsak@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 43 +++++++++++++++---------------------------- + 1 file changed, 15 insertions(+), 28 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index 0fc5fd6..d7582cd 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -3012,6 +3012,18 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) + return r; + } + ++static void cik_cp_hdp_flush(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ /* Request an HDP flush */ ++ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); ++ radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | ++ WRITE_DATA_DST_SEL(0))); ++ radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); ++ radeon_ring_write(ring, 0); ++ radeon_ring_write(ring, 0); ++} ++ + /** + * cik_fence_gfx_ring_emit - emit a fence on the gfx ring + * +@@ -3038,15 +3050,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, 0); + /* HDP flush */ +- /* We should be using the new WAIT_REG_MEM special op packet here +- * but it causes the CP to hang +- */ +- radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(0))); +- radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); +- radeon_ring_write(ring, 0); +- radeon_ring_write(ring, 0); ++ cik_cp_hdp_flush(rdev, ring); + } + + /** +@@ -3076,15 +3080,7 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, 0); + /* HDP flush */ +- /* We should be using the new WAIT_REG_MEM special op packet here +- * but it causes the CP to hang +- */ +- radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(0))); +- radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); +- radeon_ring_write(ring, 0); +- radeon_ring_write(ring, 0); ++ cik_cp_hdp_flush(rdev, ring); + } + + void cik_semaphore_ring_emit(struct radeon_device *rdev, +@@ -4814,16 +4810,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) + radeon_ring_write(ring, VMID(0)); + + /* HDP flush */ +- /* We should be using the WAIT_REG_MEM packet here like in +- * cik_fence_ring_emit(), but it causes the CP to hang in this +- * context... +- */ +- radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(0))); +- radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); +- radeon_ring_write(ring, 0); +- radeon_ring_write(ring, 0); ++ cik_cp_hdp_flush(rdev, ring); + + /* bits 0-15 are the VM contexts0-15 */ + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0003-drm-radeon-cleanup-DMA-HDP-flush-on-CIK-v2.patch b/common/recipes-kernel/linux/linux-amd/0003-drm-radeon-cleanup-DMA-HDP-flush-on-CIK-v2.patch new file mode 100644 index 00000000..130d54b0 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0003-drm-radeon-cleanup-DMA-HDP-flush-on-CIK-v2.patch @@ -0,0 +1,100 @@ +From e668247ce4383d72e79fab167e294ca0e036f6cf Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Tue, 5 Nov 2013 18:12:13 -0500 +Subject: [PATCH 03/60] drm/radeon: cleanup DMA HDP flush on CIK (v2) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +v2: use HDP_MEM_COHERENCY_FLUSH_CNTL again + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/cik_sdma.c | 38 ++++++++++---------------------------- + 1 file changed, 10 insertions(+), 28 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c +index f0020fe..37028b9 100644 +--- a/drivers/gpu/drm/radeon/cik_sdma.c ++++ b/drivers/gpu/drm/radeon/cik_sdma.c +@@ -51,6 +51,14 @@ u32 cik_gpu_check_soft_reset(struct radeon_device *rdev); + * buffers. + */ + ++static void cik_sdma_hdp_flush(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); ++ radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); ++ radeon_ring_write(ring, 0x0); ++} ++ + /** + * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine + * +@@ -102,14 +110,6 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev, + { + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; +- u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | +- SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ +- u32 ref_and_mask; +- +- if (fence->ring == R600_RING_TYPE_DMA_INDEX) +- ref_and_mask = SDMA0; +- else +- ref_and_mask = SDMA1; + + /* write the fence */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); +@@ -119,12 +119,7 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev, + /* generate an interrupt */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); + /* flush HDP */ +- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); +- radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); +- radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); +- radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ +- radeon_ring_write(ring, ref_and_mask); /* MASK */ +- radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ ++ cik_sdma_hdp_flush(rdev, ring); + } + + /** +@@ -720,18 +715,10 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, + void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) + { + struct radeon_ring *ring = &rdev->ring[ridx]; +- u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | +- SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ +- u32 ref_and_mask; + + if (vm == NULL) + return; + +- if (ridx == R600_RING_TYPE_DMA_INDEX) +- ref_and_mask = SDMA0; +- else +- ref_and_mask = SDMA1; +- + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + if (vm->id < 8) { + radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); +@@ -766,12 +753,7 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm + radeon_ring_write(ring, VMID(0)); + + /* flush HDP */ +- radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); +- radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); +- radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); +- radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ +- radeon_ring_write(ring, ref_and_mask); /* MASK */ +- radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ ++ cik_sdma_hdp_flush(rdev, ring); + + /* flush TLB */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0004-drm-radeon-allow-semaphore-emission-to-fail.patch b/common/recipes-kernel/linux/linux-amd/0004-drm-radeon-allow-semaphore-emission-to-fail.patch new file mode 100644 index 00000000..bb2a4502 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0004-drm-radeon-allow-semaphore-emission-to-fail.patch @@ -0,0 +1,754 @@ +From 73ffb41074452917a90af3bc46da9b15aa8c6fdf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Mon, 8 Jul 2013 05:11:52 -0600 +Subject: [PATCH 04/60] drm/radeon: allow semaphore emission to fail +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some rings can only use semaphore in certain states, take that into account +and fall back to waiting for a fence when a ring currently can't emit a +semaphore. + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 4 +- + drivers/gpu/drm/radeon/cik_sdma.c | 13 ++-- + drivers/gpu/drm/radeon/evergreen_dma.c | 9 +-- + drivers/gpu/drm/radeon/r100.c | 3 +- + drivers/gpu/drm/radeon/r600.c | 13 ++-- + drivers/gpu/drm/radeon/r600_dma.c | 13 ++-- + drivers/gpu/drm/radeon/radeon.h | 14 ++-- + drivers/gpu/drm/radeon/radeon_asic.h | 18 ++--- + drivers/gpu/drm/radeon/radeon_cs.c | 9 ++- + drivers/gpu/drm/radeon/radeon_fence.c | 26 +++++++ + drivers/gpu/drm/radeon/radeon_gart.c | 2 +- + drivers/gpu/drm/radeon/radeon_ring.c | 46 +++-------- + drivers/gpu/drm/radeon/radeon_semaphore.c | 123 ++++++++++++++++++++++-------- + drivers/gpu/drm/radeon/rv770_dma.c | 9 +-- + drivers/gpu/drm/radeon/si_dma.c | 9 +-- + drivers/gpu/drm/radeon/uvd_v1_0.c | 4 +- + drivers/gpu/drm/radeon/uvd_v3_1.c | 4 +- + 17 files changed, 182 insertions(+), 137 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index d7582cd..54a62cf 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -3083,7 +3083,7 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, + cik_cp_hdp_flush(rdev, ring); + } + +-void cik_semaphore_ring_emit(struct radeon_device *rdev, ++bool cik_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +@@ -3094,6 +3094,8 @@ void cik_semaphore_ring_emit(struct radeon_device *rdev, + radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); ++ ++ return true; + } + + /* +diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c +index 37028b9..e73c49e 100644 +--- a/drivers/gpu/drm/radeon/cik_sdma.c ++++ b/drivers/gpu/drm/radeon/cik_sdma.c +@@ -133,7 +133,7 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev, + * Add a DMA semaphore packet to the ring wait on or signal + * other rings (CIK). + */ +-void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, ++bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +@@ -144,6 +144,8 @@ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); + radeon_ring_write(ring, addr & 0xfffffff8); + radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); ++ ++ return true; + } + + /** +@@ -439,13 +441,8 @@ int cik_copy_dma(struct radeon_device *rdev, + return r; + } + +- if (radeon_fence_need_sync(*fence, ring->idx)) { +- radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, +- ring->idx); +- radeon_fence_note_sync(*fence, ring->idx); +- } else { +- radeon_semaphore_free(rdev, &sem, NULL); +- } ++ radeon_semaphore_sync_to(sem, *fence); ++ radeon_semaphore_sync_rings(rdev, sem, ring->idx); + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; +diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c +index 6a0656d..a37b544 100644 +--- a/drivers/gpu/drm/radeon/evergreen_dma.c ++++ b/drivers/gpu/drm/radeon/evergreen_dma.c +@@ -131,13 +131,8 @@ int evergreen_copy_dma(struct radeon_device *rdev, + return r; + } + +- if (radeon_fence_need_sync(*fence, ring->idx)) { +- radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, +- ring->idx); +- radeon_fence_note_sync(*fence, ring->idx); +- } else { +- radeon_semaphore_free(rdev, &sem, NULL); +- } ++ radeon_semaphore_sync_to(sem, *fence); ++ radeon_semaphore_sync_rings(rdev, sem, ring->idx); + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; +diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c +index f98dcbe..16997d3 100644 +--- a/drivers/gpu/drm/radeon/r100.c ++++ b/drivers/gpu/drm/radeon/r100.c +@@ -869,13 +869,14 @@ void r100_fence_ring_emit(struct radeon_device *rdev, + radeon_ring_write(ring, RADEON_SW_INT_FIRE); + } + +-void r100_semaphore_ring_emit(struct radeon_device *rdev, ++bool r100_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) + { + /* Unused on older asics, since we don't have semaphores or multiple rings */ + BUG(); ++ return false; + } + + int r100_copy_blit(struct radeon_device *rdev, +diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c +index 88eb936..9d101a4 100644 +--- a/drivers/gpu/drm/radeon/r600.c ++++ b/drivers/gpu/drm/radeon/r600.c +@@ -2598,7 +2598,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev, + } + } + +-void r600_semaphore_ring_emit(struct radeon_device *rdev, ++bool r600_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +@@ -2612,6 +2612,8 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, + radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); ++ ++ return true; + } + + /** +@@ -2654,13 +2656,8 @@ int r600_copy_cpdma(struct radeon_device *rdev, + return r; + } + +- if (radeon_fence_need_sync(*fence, ring->idx)) { +- radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, +- ring->idx); +- radeon_fence_note_sync(*fence, ring->idx); +- } else { +- radeon_semaphore_free(rdev, &sem, NULL); +- } ++ radeon_semaphore_sync_to(sem, *fence); ++ radeon_semaphore_sync_rings(rdev, sem, ring->idx); + + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); +diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c +index aad3c36..616d37a 100644 +--- a/drivers/gpu/drm/radeon/r600_dma.c ++++ b/drivers/gpu/drm/radeon/r600_dma.c +@@ -302,7 +302,7 @@ void r600_dma_fence_ring_emit(struct radeon_device *rdev, + * Add a DMA semaphore packet to the ring wait on or signal + * other rings (r6xx-SI). + */ +-void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, ++bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +@@ -313,6 +313,8 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); ++ ++ return true; + } + + /** +@@ -453,13 +455,8 @@ int r600_copy_dma(struct radeon_device *rdev, + return r; + } + +- if (radeon_fence_need_sync(*fence, ring->idx)) { +- radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, +- ring->idx); +- radeon_fence_note_sync(*fence, ring->idx); +- } else { +- radeon_semaphore_free(rdev, &sem, NULL); +- } ++ radeon_semaphore_sync_to(sem, *fence); ++ radeon_semaphore_sync_rings(rdev, sem, ring->idx); + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 5c903a8..7601071 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -348,6 +348,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i + void radeon_fence_process(struct radeon_device *rdev, int ring); + bool radeon_fence_signaled(struct radeon_fence *fence); + int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); ++int radeon_fence_wait_locked(struct radeon_fence *fence); + int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); + int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); + int radeon_fence_wait_any(struct radeon_device *rdev, +@@ -548,17 +549,20 @@ struct radeon_semaphore { + struct radeon_sa_bo *sa_bo; + signed waiters; + uint64_t gpu_addr; ++ struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + }; + + int radeon_semaphore_create(struct radeon_device *rdev, + struct radeon_semaphore **semaphore); +-void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, ++bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, + struct radeon_semaphore *semaphore); +-void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, ++bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, + struct radeon_semaphore *semaphore); ++void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, ++ struct radeon_fence *fence); + int radeon_semaphore_sync_rings(struct radeon_device *rdev, + struct radeon_semaphore *semaphore, +- int signaler, int waiter); ++ int waiting_ring); + void radeon_semaphore_free(struct radeon_device *rdev, + struct radeon_semaphore **semaphore, + struct radeon_fence *fence); +@@ -771,7 +775,6 @@ struct radeon_ib { + struct radeon_fence *fence; + struct radeon_vm *vm; + bool is_const_ib; +- struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + struct radeon_semaphore *semaphore; + }; + +@@ -921,7 +924,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, + struct radeon_ib *ib, struct radeon_vm *vm, + unsigned size); + void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); +-void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence); + int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, + struct radeon_ib *const_ib); + int radeon_ib_pool_init(struct radeon_device *rdev); +@@ -1635,7 +1637,7 @@ struct radeon_asic_ring { + /* command emmit functions */ + void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); + void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); +- void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, ++ bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, + struct radeon_semaphore *semaphore, bool emit_wait); + void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + +diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h +index 70c29d5..8588670 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.h ++++ b/drivers/gpu/drm/radeon/radeon_asic.h +@@ -80,7 +80,7 @@ int r100_irq_set(struct radeon_device *rdev); + int r100_irq_process(struct radeon_device *rdev); + void r100_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +-void r100_semaphore_ring_emit(struct radeon_device *rdev, ++bool r100_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *cp, + struct radeon_semaphore *semaphore, + bool emit_wait); +@@ -313,13 +313,13 @@ int r600_cs_parse(struct radeon_cs_parser *p); + int r600_dma_cs_parse(struct radeon_cs_parser *p); + void r600_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +-void r600_semaphore_ring_emit(struct radeon_device *rdev, ++bool r600_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *cp, + struct radeon_semaphore *semaphore, + bool emit_wait); + void r600_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +-void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, ++bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +@@ -566,10 +566,6 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev, + */ + void cayman_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +-void cayman_uvd_semaphore_emit(struct radeon_device *rdev, +- struct radeon_ring *ring, +- struct radeon_semaphore *semaphore, +- bool emit_wait); + void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); + int cayman_init(struct radeon_device *rdev); + void cayman_fini(struct radeon_device *rdev); +@@ -696,7 +692,7 @@ void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); + int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); + void cik_sdma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +-void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, ++bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +@@ -712,7 +708,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); + void cik_fence_compute_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +-void cik_semaphore_ring_emit(struct radeon_device *rdev, ++bool cik_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *cp, + struct radeon_semaphore *semaphore, + bool emit_wait); +@@ -802,7 +798,7 @@ void uvd_v1_0_stop(struct radeon_device *rdev); + + int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); + int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); +-void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, ++bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +@@ -814,7 +810,7 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); + + /* uvd v3.1 */ +-void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, ++bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c +index ed9a997..697004f4 100644 +--- a/drivers/gpu/drm/radeon/radeon_cs.c ++++ b/drivers/gpu/drm/radeon/radeon_cs.c +@@ -165,7 +165,8 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) + if (!p->relocs[i].robj) + continue; + +- radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); ++ radeon_semaphore_sync_to(p->ib.semaphore, ++ p->relocs[i].robj->tbo.sync_obj); + } + } + +@@ -508,9 +509,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, + goto out; + } + radeon_cs_sync_rings(parser); +- radeon_ib_sync_to(&parser->ib, vm->fence); +- radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( +- rdev, vm, parser->ring)); ++ radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); ++ radeon_semaphore_sync_to(parser->ib.semaphore, ++ radeon_vm_grab_id(rdev, vm, parser->ring)); + + if ((rdev->family >= CHIP_TAHITI) && + (parser->chunk_const_ib_idx != -1)) { +diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c +index ddb8f8e..8aea16e 100644 +--- a/drivers/gpu/drm/radeon/radeon_fence.c ++++ b/drivers/gpu/drm/radeon/radeon_fence.c +@@ -404,6 +404,32 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) + return 0; + } + ++/** ++ * radeon_fence_wait_locked - wait for a fence to signal ++ * ++ * @fence: radeon fence object ++ * ++ * Wait for the requested fence to signal (all asics). ++ * Returns 0 if the fence has passed, error for all other cases. ++ */ ++int radeon_fence_wait_locked(struct radeon_fence *fence) ++{ ++ int r; ++ ++ if (fence == NULL) { ++ WARN(1, "Querying an invalid fence : %p !\n", fence); ++ return -EINVAL; ++ } ++ ++ r = radeon_fence_wait_seq(fence->rdev, fence->seq, ++ fence->ring, false, false); ++ if (r) { ++ return r; ++ } ++ fence->seq = RADEON_FENCE_SIGNALED_SEQ; ++ return 0; ++} ++ + static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) + { + unsigned i; +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index 3b1de72..f8d7b16 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -1169,7 +1169,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, + addr, bo_va->flags); + +- radeon_ib_sync_to(&ib, vm->fence); ++ radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); +diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c +index 18254e1..9214403 100644 +--- a/drivers/gpu/drm/radeon/radeon_ring.c ++++ b/drivers/gpu/drm/radeon/radeon_ring.c +@@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, + struct radeon_ib *ib, struct radeon_vm *vm, + unsigned size) + { +- int i, r; ++ int r; + + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); + if (r) { +@@ -87,8 +87,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, + ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); + } + ib->is_const_ib = false; +- for (i = 0; i < RADEON_NUM_RINGS; ++i) +- ib->sync_to[i] = NULL; + + return 0; + } +@@ -109,25 +107,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) + } + + /** +- * radeon_ib_sync_to - sync to fence before executing the IB +- * +- * @ib: IB object to add fence to +- * @fence: fence to sync to +- * +- * Sync to the fence before executing the IB +- */ +-void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence) +-{ +- struct radeon_fence *other; +- +- if (!fence) +- return; +- +- other = ib->sync_to[fence->ring]; +- ib->sync_to[fence->ring] = radeon_fence_later(fence, other); +-} +- +-/** + * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring + * + * @rdev: radeon_device pointer +@@ -151,8 +130,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, + struct radeon_ib *const_ib) + { + struct radeon_ring *ring = &rdev->ring[ib->ring]; +- bool need_sync = false; +- int i, r = 0; ++ int r = 0; + + if (!ib->length_dw || !ring->ready) { + /* TODO: Nothings in the ib we should report. */ +@@ -166,19 +144,15 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, + dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); + return r; + } +- for (i = 0; i < RADEON_NUM_RINGS; ++i) { +- struct radeon_fence *fence = ib->sync_to[i]; +- if (radeon_fence_need_sync(fence, ib->ring)) { +- need_sync = true; +- radeon_semaphore_sync_rings(rdev, ib->semaphore, +- fence->ring, ib->ring); +- radeon_fence_note_sync(fence, ib->ring); +- } +- } +- /* immediately free semaphore when we don't need to sync */ +- if (!need_sync) { +- radeon_semaphore_free(rdev, &ib->semaphore, NULL); ++ ++ /* sync with other rings */ ++ r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); ++ if (r) { ++ dev_err(rdev->dev, "failed to sync rings (%d)\n", r); ++ radeon_ring_unlock_undo(rdev, ring); ++ return r; + } ++ + /* if we can't remember our last VM flush then flush now! */ + /* XXX figure out why we have to flush for every IB */ + if (ib->vm /*&& !ib->vm->last_flush*/) { +diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c +index 8dcc20f..34ea889 100644 +--- a/drivers/gpu/drm/radeon/radeon_semaphore.c ++++ b/drivers/gpu/drm/radeon/radeon_semaphore.c +@@ -34,7 +34,7 @@ + int radeon_semaphore_create(struct radeon_device *rdev, + struct radeon_semaphore **semaphore) + { +- int r; ++ int i, r; + + *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); + if (*semaphore == NULL) { +@@ -50,54 +50,117 @@ int radeon_semaphore_create(struct radeon_device *rdev, + (*semaphore)->waiters = 0; + (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); + *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; ++ ++ for (i = 0; i < RADEON_NUM_RINGS; ++i) ++ (*semaphore)->sync_to[i] = NULL; ++ + return 0; + } + +-void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, ++bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx, + struct radeon_semaphore *semaphore) + { +- --semaphore->waiters; +- radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false); ++ struct radeon_ring *ring = &rdev->ring[ridx]; ++ ++ if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, false)) { ++ --semaphore->waiters; ++ ++ /* for debugging lockup only, used by sysfs debug files */ ++ ring->last_semaphore_signal_addr = semaphore->gpu_addr; ++ return true; ++ } ++ return false; + } + +-void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, ++bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, + struct radeon_semaphore *semaphore) + { +- ++semaphore->waiters; +- radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); ++ struct radeon_ring *ring = &rdev->ring[ridx]; ++ ++ if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, true)) { ++ ++semaphore->waiters; ++ ++ /* for debugging lockup only, used by sysfs debug files */ ++ ring->last_semaphore_wait_addr = semaphore->gpu_addr; ++ return true; ++ } ++ return false; + } + +-/* caller must hold ring lock */ ++/** ++ * radeon_semaphore_sync_to - use the semaphore to sync to a fence ++ * ++ * @semaphore: semaphore object to add fence to ++ * @fence: fence to sync to ++ * ++ * Sync to the fence using this semaphore object ++ */ ++void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, ++ struct radeon_fence *fence) ++{ ++ struct radeon_fence *other; ++ ++ if (!fence) ++ return; ++ ++ other = semaphore->sync_to[fence->ring]; ++ semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); ++} ++ ++/** ++ * radeon_semaphore_sync_rings - sync ring to all registered fences ++ * ++ * @rdev: radeon_device pointer ++ * @semaphore: semaphore object to use for sync ++ * @ring: ring that needs sync ++ * ++ * Ensure that all registered fences are signaled before letting ++ * the ring continue. The caller must hold the ring lock. ++ */ + int radeon_semaphore_sync_rings(struct radeon_device *rdev, + struct radeon_semaphore *semaphore, +- int signaler, int waiter) ++ int ring) + { +- int r; ++ int i, r; + +- /* no need to signal and wait on the same ring */ +- if (signaler == waiter) { +- return 0; +- } ++ for (i = 0; i < RADEON_NUM_RINGS; ++i) { ++ struct radeon_fence *fence = semaphore->sync_to[i]; + +- /* prevent GPU deadlocks */ +- if (!rdev->ring[signaler].ready) { +- dev_err(rdev->dev, "Trying to sync to a disabled ring!"); +- return -EINVAL; +- } ++ /* check if we really need to sync */ ++ if (!radeon_fence_need_sync(fence, ring)) ++ continue; + +- r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); +- if (r) { +- return r; +- } +- radeon_semaphore_emit_signal(rdev, signaler, semaphore); +- radeon_ring_commit(rdev, &rdev->ring[signaler]); ++ /* prevent GPU deadlocks */ ++ if (!rdev->ring[i].ready) { ++ dev_err(rdev->dev, "Syncing to a disabled ring!"); ++ return -EINVAL; ++ } + +- /* we assume caller has already allocated space on waiters ring */ +- radeon_semaphore_emit_wait(rdev, waiter, semaphore); ++ /* allocate enough space for sync command */ ++ r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); ++ if (r) { ++ return r; ++ } + +- /* for debugging lockup only, used by sysfs debug files */ +- rdev->ring[signaler].last_semaphore_signal_addr = semaphore->gpu_addr; +- rdev->ring[waiter].last_semaphore_wait_addr = semaphore->gpu_addr; ++ /* emit the signal semaphore */ ++ if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { ++ /* signaling wasn't successful wait manually */ ++ radeon_ring_undo(&rdev->ring[i]); ++ radeon_fence_wait_locked(fence); ++ continue; ++ } ++ ++ /* we assume caller has already allocated space on waiters ring */ ++ if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { ++ /* waiting wasn't successful wait manually */ ++ radeon_ring_undo(&rdev->ring[i]); ++ radeon_fence_wait_locked(fence); ++ continue; ++ } ++ ++ radeon_ring_commit(rdev, &rdev->ring[i]); ++ radeon_fence_note_sync(fence, ring); ++ } + + return 0; + } +diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c +index f9b02e3..aca8cbe 100644 +--- a/drivers/gpu/drm/radeon/rv770_dma.c ++++ b/drivers/gpu/drm/radeon/rv770_dma.c +@@ -66,13 +66,8 @@ int rv770_copy_dma(struct radeon_device *rdev, + return r; + } + +- if (radeon_fence_need_sync(*fence, ring->idx)) { +- radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, +- ring->idx); +- radeon_fence_note_sync(*fence, ring->idx); +- } else { +- radeon_semaphore_free(rdev, &sem, NULL); +- } ++ radeon_semaphore_sync_to(sem, *fence); ++ radeon_semaphore_sync_rings(rdev, sem, ring->idx); + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; +diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c +index 17205fd..97af34c 100644 +--- a/drivers/gpu/drm/radeon/si_dma.c ++++ b/drivers/gpu/drm/radeon/si_dma.c +@@ -202,13 +202,8 @@ int si_copy_dma(struct radeon_device *rdev, + return r; + } + +- if (radeon_fence_need_sync(*fence, ring->idx)) { +- radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, +- ring->idx); +- radeon_fence_note_sync(*fence, ring->idx); +- } else { +- radeon_semaphore_free(rdev, &sem, NULL); +- } ++ radeon_semaphore_sync_to(sem, *fence); ++ radeon_semaphore_sync_rings(rdev, sem, ring->idx); + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; +diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c +index f680f5f..c310a0a 100644 +--- a/drivers/gpu/drm/radeon/uvd_v1_0.c ++++ b/drivers/gpu/drm/radeon/uvd_v1_0.c +@@ -360,7 +360,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) + * + * Emit a semaphore command (either wait or signal) to the UVD ring. + */ +-void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, ++bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +@@ -375,6 +375,8 @@ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, emit_wait ? 1 : 0); ++ ++ return true; + } + + /** +diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c +index 5b6fa1f..d722db2 100644 +--- a/drivers/gpu/drm/radeon/uvd_v3_1.c ++++ b/drivers/gpu/drm/radeon/uvd_v3_1.c +@@ -37,7 +37,7 @@ + * + * Emit a semaphore command (either wait or signal) to the UVD ring. + */ +-void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, ++bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +@@ -52,4 +52,6 @@ void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); ++ ++ return true; + } +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0005-drm-radeon-improve-ring-debugfs-a-bit.patch b/common/recipes-kernel/linux/linux-amd/0005-drm-radeon-improve-ring-debugfs-a-bit.patch new file mode 100644 index 00000000..97f8f70e --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0005-drm-radeon-improve-ring-debugfs-a-bit.patch @@ -0,0 +1,93 @@ +From ea2e222f240d14964ab271d669e8d2e0176cf483 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 12 Nov 2013 10:55:12 -0700 +Subject: [PATCH 05/60] drm/radeon: improve ring debugfs a bit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_ring.c | 56 ++++++++++++++++++++++++------------ + 1 file changed, 38 insertions(+), 18 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c +index 9214403..f1cec22 100644 +--- a/drivers/gpu/drm/radeon/radeon_ring.c ++++ b/drivers/gpu/drm/radeon/radeon_ring.c +@@ -790,34 +790,54 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) + struct radeon_device *rdev = dev->dev_private; + int ridx = *(int*)node->info_ent->data; + struct radeon_ring *ring = &rdev->ring[ridx]; ++ ++ uint32_t rptr, wptr, rptr_next; + unsigned count, i, j; +- u32 tmp; + + radeon_ring_free_size(rdev, ring); + count = (ring->ring_size / 4) - ring->ring_free_dw; +- tmp = radeon_ring_get_wptr(rdev, ring); +- seq_printf(m, "wptr(0x%04x): 0x%08x [%5d]\n", ring->wptr_reg, tmp, tmp); +- tmp = radeon_ring_get_rptr(rdev, ring); +- seq_printf(m, "rptr(0x%04x): 0x%08x [%5d]\n", ring->rptr_reg, tmp, tmp); ++ ++ wptr = radeon_ring_get_wptr(rdev, ring); ++ seq_printf(m, "wptr(0x%04x): 0x%08x [%5d]\n", ++ ring->wptr_reg, wptr, wptr); ++ ++ rptr = radeon_ring_get_rptr(rdev, ring); ++ seq_printf(m, "rptr(0x%04x): 0x%08x [%5d]\n", ++ ring->rptr_reg, rptr, rptr); ++ + if (ring->rptr_save_reg) { +- seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg, +- RREG32(ring->rptr_save_reg)); +- } +- seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ring->wptr, ring->wptr); +- seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n", ring->rptr, ring->rptr); +- seq_printf(m, "last semaphore signal addr : 0x%016llx\n", ring->last_semaphore_signal_addr); +- seq_printf(m, "last semaphore wait addr : 0x%016llx\n", ring->last_semaphore_wait_addr); ++ rptr_next = RREG32(ring->rptr_save_reg); ++ seq_printf(m, "rptr next(0x%04x): 0x%08x [%5d]\n", ++ ring->rptr_save_reg, rptr_next, rptr_next); ++ } else ++ rptr_next = ~0; ++ ++ seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ++ ring->wptr, ring->wptr); ++ seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n", ++ ring->rptr, ring->rptr); ++ seq_printf(m, "last semaphore signal addr : 0x%016llx\n", ++ ring->last_semaphore_signal_addr); ++ seq_printf(m, "last semaphore wait addr : 0x%016llx\n", ++ ring->last_semaphore_wait_addr); + seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); + seq_printf(m, "%u dwords in ring\n", count); ++ ++ if (!ring->ready) ++ return 0; ++ + /* print 8 dw before current rptr as often it's the last executed + * packet that is the root issue + */ +- i = (ring->rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; +- if (ring->ready) { +- for (j = 0; j <= (count + 32); j++) { +- seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]); +- i = (i + 1) & ring->ptr_mask; +- } ++ i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; ++ for (j = 0; j <= (count + 32); j++) { ++ seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); ++ if (rptr == i) ++ seq_puts(m, " *"); ++ if (rptr_next == i) ++ seq_puts(m, " #"); ++ seq_puts(m, "\n"); ++ i = (i + 1) & ring->ptr_mask; + } + return 0; + } +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0006-drm-radeon-report-the-real-offset-in-radeon_sa_bo_du.patch b/common/recipes-kernel/linux/linux-amd/0006-drm-radeon-report-the-real-offset-in-radeon_sa_bo_du.patch new file mode 100644 index 00000000..4c8a4038 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0006-drm-radeon-report-the-real-offset-in-radeon_sa_bo_du.patch @@ -0,0 +1,39 @@ +From 38c9a6d9c79a23d32b2312372833fc097d5d9fc3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 10 Dec 2013 12:46:23 -0700 +Subject: [PATCH 06/60] drm/radeon: report the real offset in + radeon_sa_bo_dump_debug_info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_sa.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c +index f0bac68..c062580 100644 +--- a/drivers/gpu/drm/radeon/radeon_sa.c ++++ b/drivers/gpu/drm/radeon/radeon_sa.c +@@ -402,13 +402,15 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, + + spin_lock(&sa_manager->wq.lock); + list_for_each_entry(i, &sa_manager->olist, olist) { ++ uint64_t soffset = i->soffset + sa_manager->gpu_addr; ++ uint64_t eoffset = i->eoffset + sa_manager->gpu_addr; + if (&i->olist == sa_manager->hole) { + seq_printf(m, ">"); + } else { + seq_printf(m, " "); + } +- seq_printf(m, "[0x%08x 0x%08x] size %8d", +- i->soffset, i->eoffset, i->eoffset - i->soffset); ++ seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", ++ soffset, eoffset, eoffset - soffset); + if (i->fence) { + seq_printf(m, " protected by 0x%016llx on ring %d", + i->fence->seq, i->fence->ring); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0007-drm-radeon-update-fence-values-in-before-reporting-t.patch b/common/recipes-kernel/linux/linux-amd/0007-drm-radeon-update-fence-values-in-before-reporting-t.patch new file mode 100644 index 00000000..71040d66 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0007-drm-radeon-update-fence-values-in-before-reporting-t.patch @@ -0,0 +1,30 @@ +From 4c98e15897eb43f33089c1726ea4a0c61822749d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 10 Dec 2013 12:48:45 -0700 +Subject: [PATCH 07/60] drm/radeon: update fence values in before reporting + them +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_fence.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c +index 8aea16e..51760b7 100644 +--- a/drivers/gpu/drm/radeon/radeon_fence.c ++++ b/drivers/gpu/drm/radeon/radeon_fence.c +@@ -945,6 +945,8 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) + if (!rdev->fence_drv[i].initialized) + continue; + ++ radeon_fence_process(rdev, i); ++ + seq_printf(m, "--- ring %d ---\n", i); + seq_printf(m, "Last signaled fence 0x%016llx\n", + (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0008-drm-radeon-cleanup-radeon_ttm-debugfs-handling.patch b/common/recipes-kernel/linux/linux-amd/0008-drm-radeon-cleanup-radeon_ttm-debugfs-handling.patch new file mode 100644 index 00000000..32fc3a06 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0008-drm-radeon-cleanup-radeon_ttm-debugfs-handling.patch @@ -0,0 +1,109 @@ +From f2938d5cce087b849c4353d17c636141e00fb624 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Mon, 9 Dec 2013 06:50:21 -0700 +Subject: [PATCH 08/60] drm/radeon: cleanup radeon_ttm debugfs handling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Otherwise we not necessary export the right information. + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_ttm.c | 59 +++++++++++++++---------------------- + 1 file changed, 23 insertions(+), 36 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c +index 84323c9..3e1ea9c 100644 +--- a/drivers/gpu/drm/radeon/radeon_ttm.c ++++ b/drivers/gpu/drm/radeon/radeon_ttm.c +@@ -835,16 +835,15 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma) + return 0; + } + +- +-#define RADEON_DEBUGFS_MEM_TYPES 2 +- + #if defined(CONFIG_DEBUG_FS) ++ + static int radeon_mm_dump_table(struct seq_file *m, void *data) + { + struct drm_info_node *node = (struct drm_info_node *)m->private; +- struct drm_mm *mm = (struct drm_mm *)node->info_ent->data; ++ unsigned ttm_pl = *(int *)node->info_ent->data; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; ++ struct drm_mm *mm = (struct drm_mm *)rdev->mman.bdev.man[ttm_pl].priv; + int ret; + struct ttm_bo_global *glob = rdev->mman.bdev.glob; + +@@ -853,46 +852,34 @@ static int radeon_mm_dump_table(struct seq_file *m, void *data) + spin_unlock(&glob->lru_lock); + return ret; + } ++ ++static int ttm_pl_vram = TTM_PL_VRAM; ++static int ttm_pl_tt = TTM_PL_TT; ++ ++static struct drm_info_list radeon_ttm_debugfs_list[] = { ++ {"radeon_vram_mm", radeon_mm_dump_table, 0, &ttm_pl_vram}, ++ {"radeon_gtt_mm", radeon_mm_dump_table, 0, &ttm_pl_tt}, ++ {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, ++#ifdef CONFIG_SWIOTLB ++ {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL} ++#endif ++}; ++ + #endif + + static int radeon_ttm_debugfs_init(struct radeon_device *rdev) + { + #if defined(CONFIG_DEBUG_FS) +- static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES+2]; +- static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES+2][32]; +- unsigned i; ++ unsigned count = ARRAY_SIZE(radeon_ttm_debugfs_list); + +- for (i = 0; i < RADEON_DEBUGFS_MEM_TYPES; i++) { +- if (i == 0) +- sprintf(radeon_mem_types_names[i], "radeon_vram_mm"); +- else +- sprintf(radeon_mem_types_names[i], "radeon_gtt_mm"); +- radeon_mem_types_list[i].name = radeon_mem_types_names[i]; +- radeon_mem_types_list[i].show = &radeon_mm_dump_table; +- radeon_mem_types_list[i].driver_features = 0; +- if (i == 0) +- radeon_mem_types_list[i].data = rdev->mman.bdev.man[TTM_PL_VRAM].priv; +- else +- radeon_mem_types_list[i].data = rdev->mman.bdev.man[TTM_PL_TT].priv; +- +- } +- /* Add ttm page pool to debugfs */ +- sprintf(radeon_mem_types_names[i], "ttm_page_pool"); +- radeon_mem_types_list[i].name = radeon_mem_types_names[i]; +- radeon_mem_types_list[i].show = &ttm_page_alloc_debugfs; +- radeon_mem_types_list[i].driver_features = 0; +- radeon_mem_types_list[i++].data = NULL; + #ifdef CONFIG_SWIOTLB +- if (swiotlb_nr_tbl()) { +- sprintf(radeon_mem_types_names[i], "ttm_dma_page_pool"); +- radeon_mem_types_list[i].name = radeon_mem_types_names[i]; +- radeon_mem_types_list[i].show = &ttm_dma_page_alloc_debugfs; +- radeon_mem_types_list[i].driver_features = 0; +- radeon_mem_types_list[i++].data = NULL; +- } ++ if (!swiotlb_nr_tbl()) ++ --count; + #endif +- return radeon_debugfs_add_files(rdev, radeon_mem_types_list, i); + +-#endif ++ return radeon_debugfs_add_files(rdev, radeon_ttm_debugfs_list, count); ++#else ++ + return 0; ++#endif + } +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0009-drm-radeon-add-VRAM-debugfs-access-v3.patch b/common/recipes-kernel/linux/linux-amd/0009-drm-radeon-add-VRAM-debugfs-access-v3.patch new file mode 100644 index 00000000..f936cef8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0009-drm-radeon-add-VRAM-debugfs-access-v3.patch @@ -0,0 +1,168 @@ +From c82817c14ce8f586745fd91dfb76bbba29d2b905 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 10 Dec 2013 07:45:24 -0700 +Subject: [PATCH 09/60] drm/radeon: add VRAM debugfs access v3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Not very fast, but makes it possible to access even the +normally inaccessible parts of VRAM from userspace. + +v2: use MM_INDEX_HI for >2GB mem access, add default_llseek +v3: set inode size in the open callback + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/evergreen_reg.h | 1 + + drivers/gpu/drm/radeon/radeon.h | 4 ++ + drivers/gpu/drm/radeon/radeon_ttm.c | 77 +++++++++++++++++++++++++++++++++- + 3 files changed, 81 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h +index 8a4e641..a0f63ff 100644 +--- a/drivers/gpu/drm/radeon/evergreen_reg.h ++++ b/drivers/gpu/drm/radeon/evergreen_reg.h +@@ -33,6 +33,7 @@ + #define EVERGREEN_PIF_PHY0_DATA 0xc + #define EVERGREEN_PIF_PHY1_INDEX 0x10 + #define EVERGREEN_PIF_PHY1_DATA 0x14 ++#define EVERGREEN_MM_INDEX_HI 0x18 + + #define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0x310 + #define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0x324 +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 7601071..429a3c8 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -413,6 +413,10 @@ struct radeon_mman { + struct ttm_bo_device bdev; + bool mem_global_referenced; + bool initialized; ++ ++#if defined(CONFIG_DEBUG_FS) ++ struct dentry *vram; ++#endif + }; + + /* bo virtual address in a specific vm */ +diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c +index 3e1ea9c..eac95bb 100644 +--- a/drivers/gpu/drm/radeon/radeon_ttm.c ++++ b/drivers/gpu/drm/radeon/radeon_ttm.c +@@ -39,12 +39,14 @@ + #include <linux/seq_file.h> + #include <linux/slab.h> + #include <linux/swiotlb.h> ++#include <linux/debugfs.h> + #include "radeon_reg.h" + #include "radeon.h" + + #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) + + static int radeon_ttm_debugfs_init(struct radeon_device *rdev); ++static void radeon_ttm_debugfs_fini(struct radeon_device *rdev); + + static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev) + { +@@ -756,6 +758,7 @@ void radeon_ttm_fini(struct radeon_device *rdev) + + if (!rdev->mman.initialized) + return; ++ radeon_ttm_debugfs_fini(rdev); + if (rdev->stollen_vga_memory) { + r = radeon_bo_reserve(rdev->stollen_vga_memory, false); + if (r == 0) { +@@ -865,12 +868,75 @@ static struct drm_info_list radeon_ttm_debugfs_list[] = { + #endif + }; + ++static int radeon_ttm_vram_open(struct inode *inode, struct file *filep) ++{ ++ struct radeon_device *rdev = inode->i_private; ++ i_size_write(inode, rdev->mc.mc_vram_size); ++ filep->private_data = inode->i_private; ++ return 0; ++} ++ ++static ssize_t radeon_ttm_vram_read(struct file *f, char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct radeon_device *rdev = f->private_data; ++ ssize_t result = 0; ++ int r; ++ ++ if (size & 0x3 || *pos & 0x3) ++ return -EINVAL; ++ ++ while (size) { ++ unsigned long flags; ++ uint32_t value; ++ ++ if (*pos >= rdev->mc.mc_vram_size) ++ return result; ++ ++ spin_lock_irqsave(&rdev->mmio_idx_lock, flags); ++ WREG32(RADEON_MM_INDEX, ((uint32_t)*pos) | 0x80000000); ++ if (rdev->family >= CHIP_CEDAR) ++ WREG32(EVERGREEN_MM_INDEX_HI, *pos >> 31); ++ value = RREG32(RADEON_MM_DATA); ++ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); ++ ++ r = put_user(value, (uint32_t *)buf); ++ if (r) ++ return r; ++ ++ result += 4; ++ buf += 4; ++ *pos += 4; ++ size -= 4; ++ } ++ ++ return result; ++} ++ ++static const struct file_operations radeon_ttm_vram_fops = { ++ .owner = THIS_MODULE, ++ .open = radeon_ttm_vram_open, ++ .read = radeon_ttm_vram_read, ++ .llseek = default_llseek ++}; ++ + #endif + + static int radeon_ttm_debugfs_init(struct radeon_device *rdev) + { + #if defined(CONFIG_DEBUG_FS) +- unsigned count = ARRAY_SIZE(radeon_ttm_debugfs_list); ++ unsigned count; ++ ++ struct drm_minor *minor = rdev->ddev->primary; ++ struct dentry *ent, *root = minor->debugfs_root; ++ ++ ent = debugfs_create_file("radeon_vram", S_IFREG | S_IRUGO, root, ++ rdev, &radeon_ttm_vram_fops); ++ if (IS_ERR(ent)) ++ return PTR_ERR(ent); ++ rdev->mman.vram = ent; ++ ++ count = ARRAY_SIZE(radeon_ttm_debugfs_list); + + #ifdef CONFIG_SWIOTLB + if (!swiotlb_nr_tbl()) +@@ -883,3 +949,12 @@ static int radeon_ttm_debugfs_init(struct radeon_device *rdev) + return 0; + #endif + } ++ ++static void radeon_ttm_debugfs_fini(struct radeon_device *rdev) ++{ ++#if defined(CONFIG_DEBUG_FS) ++ ++ debugfs_remove(rdev->mman.vram); ++ rdev->mman.vram = NULL; ++#endif ++} +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0010-drm-radeon-add-GART-debugfs-access-v3.patch b/common/recipes-kernel/linux/linux-amd/0010-drm-radeon-add-GART-debugfs-access-v3.patch new file mode 100644 index 00000000..4f81a968 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0010-drm-radeon-add-GART-debugfs-access-v3.patch @@ -0,0 +1,119 @@ +From ad896e4b1af2c05a068357e8be7a8be25671df8e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Wed, 11 Dec 2013 06:13:22 -0700 +Subject: [PATCH 10/60] drm/radeon: add GART debugfs access v3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +v2: add default_llseek +v3: set inode size in the open callback + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 1 + + drivers/gpu/drm/radeon/radeon_ttm.c | 63 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 64 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 429a3c8..f1ce3064 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -416,6 +416,7 @@ struct radeon_mman { + + #if defined(CONFIG_DEBUG_FS) + struct dentry *vram; ++ struct dentry *gtt; + #endif + }; + +diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c +index eac95bb..e632042 100644 +--- a/drivers/gpu/drm/radeon/radeon_ttm.c ++++ b/drivers/gpu/drm/radeon/radeon_ttm.c +@@ -920,6 +920,60 @@ static const struct file_operations radeon_ttm_vram_fops = { + .llseek = default_llseek + }; + ++static int radeon_ttm_gtt_open(struct inode *inode, struct file *filep) ++{ ++ struct radeon_device *rdev = inode->i_private; ++ i_size_write(inode, rdev->mc.gtt_size); ++ filep->private_data = inode->i_private; ++ return 0; ++} ++ ++static ssize_t radeon_ttm_gtt_read(struct file *f, char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ struct radeon_device *rdev = f->private_data; ++ ssize_t result = 0; ++ int r; ++ ++ while (size) { ++ loff_t p = *pos / PAGE_SIZE; ++ unsigned off = *pos & ~PAGE_MASK; ++ ssize_t cur_size = min(size, PAGE_SIZE - off); ++ struct page *page; ++ void *ptr; ++ ++ if (p >= rdev->gart.num_cpu_pages) ++ return result; ++ ++ page = rdev->gart.pages[p]; ++ if (page) { ++ ptr = kmap(page); ++ ptr += off; ++ ++ r = copy_to_user(buf, ptr, cur_size); ++ kunmap(rdev->gart.pages[p]); ++ } else ++ r = clear_user(buf, cur_size); ++ ++ if (r) ++ return -EFAULT; ++ ++ result += cur_size; ++ buf += cur_size; ++ *pos += cur_size; ++ size -= cur_size; ++ } ++ ++ return result; ++} ++ ++static const struct file_operations radeon_ttm_gtt_fops = { ++ .owner = THIS_MODULE, ++ .open = radeon_ttm_gtt_open, ++ .read = radeon_ttm_gtt_read, ++ .llseek = default_llseek ++}; ++ + #endif + + static int radeon_ttm_debugfs_init(struct radeon_device *rdev) +@@ -936,6 +990,12 @@ static int radeon_ttm_debugfs_init(struct radeon_device *rdev) + return PTR_ERR(ent); + rdev->mman.vram = ent; + ++ ent = debugfs_create_file("radeon_gtt", S_IFREG | S_IRUGO, root, ++ rdev, &radeon_ttm_gtt_fops); ++ if (IS_ERR(ent)) ++ return PTR_ERR(ent); ++ rdev->mman.gtt = ent; ++ + count = ARRAY_SIZE(radeon_ttm_debugfs_list); + + #ifdef CONFIG_SWIOTLB +@@ -956,5 +1016,8 @@ static void radeon_ttm_debugfs_fini(struct radeon_device *rdev) + + debugfs_remove(rdev->mman.vram); + rdev->mman.vram = NULL; ++ ++ debugfs_remove(rdev->mman.gtt); ++ rdev->mman.gtt = NULL; + #endif + } +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0011-drm-radeon-fix-VMID-use-tracking.patch b/common/recipes-kernel/linux/linux-amd/0011-drm-radeon-fix-VMID-use-tracking.patch new file mode 100644 index 00000000..48343177 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0011-drm-radeon-fix-VMID-use-tracking.patch @@ -0,0 +1,71 @@ +From ee906e626e202a3fd97bbffd9377eead953eac2c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Sat, 14 Dec 2013 10:02:57 -0700 +Subject: [PATCH 11/60] drm/radeon: fix VMID use tracking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Otherwise we allocate a new VMID on nearly every submit. + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 2 ++ + drivers/gpu/drm/radeon/radeon_gart.c | 8 +++++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index f1ce3064..133e9ad 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -863,6 +863,8 @@ struct radeon_vm { + struct radeon_fence *fence; + /* last flush or NULL if we still need to flush */ + struct radeon_fence *last_flush; ++ /* last use of vmid */ ++ struct radeon_fence *last_id_use; + }; + + struct radeon_vm_manager { +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index f8d7b16..cdab083 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -688,7 +688,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + unsigned i; + + /* check if the id is still valid */ +- if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id]) ++ if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) + return NULL; + + /* we definately need to flush */ +@@ -743,6 +743,9 @@ void radeon_vm_fence(struct radeon_device *rdev, + + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(fence); ++ ++ radeon_fence_unref(&vm->last_id_use); ++ vm->last_id_use = radeon_fence_ref(fence); + } + + /** +@@ -1246,6 +1249,8 @@ void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) + { + vm->id = 0; + vm->fence = NULL; ++ vm->last_flush = NULL; ++ vm->last_id_use = NULL; + mutex_init(&vm->mutex); + INIT_LIST_HEAD(&vm->list); + INIT_LIST_HEAD(&vm->va); +@@ -1284,5 +1289,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) + } + radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush); ++ radeon_fence_unref(&vm->last_id_use); + mutex_unlock(&vm->mutex); + } +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0012-drm-radeon-add-missing-trace-point.patch b/common/recipes-kernel/linux/linux-amd/0012-drm-radeon-add-missing-trace-point.patch new file mode 100644 index 00000000..291a8cfa --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0012-drm-radeon-add-missing-trace-point.patch @@ -0,0 +1,28 @@ +From bae97ce5e4a6cff7454459011c374dc153d8815a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Sat, 14 Dec 2013 04:11:08 -0700 +Subject: [PATCH 12/60] drm/radeon: add missing trace point +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_gart.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index cdab083..d1077f6 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -701,6 +701,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + if (fence == NULL) { + /* found a free one */ + vm->id = i; ++ trace_radeon_vm_grab_id(vm->id, ring); + return NULL; + } + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0013-drm-radeon-add-semaphore-trace-point.patch b/common/recipes-kernel/linux/linux-amd/0013-drm-radeon-add-semaphore-trace-point.patch new file mode 100644 index 00000000..310881f8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0013-drm-radeon-add-semaphore-trace-point.patch @@ -0,0 +1,95 @@ +From 80508aff288ba2fa4d9cc35204e83df3ab57d573 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Thu, 7 Nov 2013 05:32:33 -0700 +Subject: [PATCH 13/60] drm/radeon: add semaphore trace point +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_semaphore.c | 6 +++++- + drivers/gpu/drm/radeon/radeon_trace.h | 36 +++++++++++++++++++++++++++++++ + 2 files changed, 41 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c +index 34ea889..2b42aa1 100644 +--- a/drivers/gpu/drm/radeon/radeon_semaphore.c ++++ b/drivers/gpu/drm/radeon/radeon_semaphore.c +@@ -29,7 +29,7 @@ + */ + #include <drm/drmP.h> + #include "radeon.h" +- ++#include "radeon_trace.h" + + int radeon_semaphore_create(struct radeon_device *rdev, + struct radeon_semaphore **semaphore) +@@ -62,6 +62,8 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx, + { + struct radeon_ring *ring = &rdev->ring[ridx]; + ++ trace_radeon_semaphore_signale(ridx, semaphore); ++ + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, false)) { + --semaphore->waiters; + +@@ -77,6 +79,8 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, + { + struct radeon_ring *ring = &rdev->ring[ridx]; + ++ trace_radeon_semaphore_wait(ridx, semaphore); ++ + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, true)) { + ++semaphore->waiters; + +diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h +index 811bca6..9f0e181 100644 +--- a/drivers/gpu/drm/radeon/radeon_trace.h ++++ b/drivers/gpu/drm/radeon/radeon_trace.h +@@ -111,6 +111,42 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_end, + TP_ARGS(dev, seqno) + ); + ++DECLARE_EVENT_CLASS(radeon_semaphore_request, ++ ++ TP_PROTO(int ring, struct radeon_semaphore *sem), ++ ++ TP_ARGS(ring, sem), ++ ++ TP_STRUCT__entry( ++ __field(int, ring) ++ __field(signed, waiters) ++ __field(uint64_t, gpu_addr) ++ ), ++ ++ TP_fast_assign( ++ __entry->ring = ring; ++ __entry->waiters = sem->waiters; ++ __entry->gpu_addr = sem->gpu_addr; ++ ), ++ ++ TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring, ++ __entry->waiters, __entry->gpu_addr) ++); ++ ++DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_signale, ++ ++ TP_PROTO(int ring, struct radeon_semaphore *sem), ++ ++ TP_ARGS(ring, sem) ++); ++ ++DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_wait, ++ ++ TP_PROTO(int ring, struct radeon_semaphore *sem), ++ ++ TP_ARGS(ring, sem) ++); ++ + #endif + + /* This part must be outside protection */ +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0014-drm-radeon-add-VMID-allocation-trace-point.patch b/common/recipes-kernel/linux/linux-amd/0014-drm-radeon-add-VMID-allocation-trace-point.patch new file mode 100644 index 00000000..306f2dc7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0014-drm-radeon-add-VMID-allocation-trace-point.patch @@ -0,0 +1,64 @@ +From 8c7e7c812de53894e1fa574188581bc39be8023d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Mon, 25 Nov 2013 15:42:10 +0100 +Subject: [PATCH 14/60] drm/radeon: add VMID allocation trace point +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon_gart.c | 2 ++ + drivers/gpu/drm/radeon/radeon_trace.h | 15 +++++++++++++++ + 2 files changed, 17 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index d1077f6..f7c0b64 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -29,6 +29,7 @@ + #include <drm/radeon_drm.h> + #include "radeon.h" + #include "radeon_reg.h" ++#include "radeon_trace.h" + + /* + * GART +@@ -714,6 +715,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + for (i = 0; i < 2; ++i) { + if (choices[i]) { + vm->id = choices[i]; ++ trace_radeon_vm_grab_id(vm->id, ring); + return rdev->vm_manager.active[choices[i]]; + } + } +diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h +index 9f0e181..8c13aec 100644 +--- a/drivers/gpu/drm/radeon/radeon_trace.h ++++ b/drivers/gpu/drm/radeon/radeon_trace.h +@@ -47,6 +47,21 @@ TRACE_EVENT(radeon_cs, + __entry->fences) + ); + ++TRACE_EVENT(radeon_vm_grab_id, ++ TP_PROTO(unsigned vmid, int ring), ++ TP_ARGS(vmid, ring), ++ TP_STRUCT__entry( ++ __field(u32, vmid) ++ __field(u32, ring) ++ ), ++ ++ TP_fast_assign( ++ __entry->vmid = vmid; ++ __entry->ring = ring; ++ ), ++ TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) ++); ++ + TRACE_EVENT(radeon_vm_set_page, + TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags), +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0015-drm-radeon-add-uvd-debugfs-support.patch b/common/recipes-kernel/linux/linux-amd/0015-drm-radeon-add-uvd-debugfs-support.patch new file mode 100644 index 00000000..e6c3622e --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0015-drm-radeon-add-uvd-debugfs-support.patch @@ -0,0 +1,99 @@ +From 9829c84462af6185df98889073adaec5c56b3f9d Mon Sep 17 00:00:00 2001 +From: Leo Liu <leo.liu@amd.com> +Date: Mon, 25 Nov 2013 17:25:41 -0500 +Subject: [PATCH 15/60] drm/radeon: add uvd debugfs support + +Signed-off-by: Leo Liu <leo.liu@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 1 + + drivers/gpu/drm/radeon/radeon_uvd.c | 37 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 38 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 133e9ad..7b31922 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1555,6 +1555,7 @@ struct radeon_uvd { + struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; + unsigned img_size[RADEON_MAX_UVD_HANDLES]; + struct delayed_work idle_work; ++ bool status; + }; + + int radeon_uvd_init(struct radeon_device *rdev); +diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c +index a656b1a..a21a6af 100644 +--- a/drivers/gpu/drm/radeon/radeon_uvd.c ++++ b/drivers/gpu/drm/radeon/radeon_uvd.c +@@ -53,6 +53,7 @@ MODULE_FIRMWARE(FIRMWARE_TAHITI); + MODULE_FIRMWARE(FIRMWARE_BONAIRE); + + static void radeon_uvd_idle_work_handler(struct work_struct *work); ++static int radeon_debugfs_uvd_init(struct radeon_device *rdev); + + int radeon_uvd_init(struct radeon_device *rdev) + { +@@ -143,6 +144,10 @@ int radeon_uvd_init(struct radeon_device *rdev) + return r; + } + ++ r = radeon_debugfs_uvd_init(rdev); ++ if (r) ++ dev_err(rdev->dev, "(%d) Register debugfs file for uvd failed\n", r); ++ + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { +@@ -792,6 +797,7 @@ static void radeon_uvd_idle_work_handler(struct work_struct *work) + schedule_delayed_work(&rdev->uvd.idle_work, + msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); + } ++ rdev->uvd.status = false; + } + + void radeon_uvd_note_usage(struct radeon_device *rdev) +@@ -819,6 +825,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) + } else { + radeon_set_uvd_clocks(rdev, 53300, 40000); + } ++ rdev->uvd.status = true; + } + } + +@@ -958,3 +965,33 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, + + return 0; + } ++ ++/* ++ * Debugfs info ++ */ ++#if defined(CONFIG_DEBUG_FS) ++ ++static int radeon_debugfs_uvd_info(struct seq_file *m, void *data) ++{ ++ struct drm_info_node *node = (struct drm_info_node *) m->private; ++ struct drm_device *dev = node->minor->dev; ++ struct radeon_device *rdev = dev->dev_private; ++ ++ seq_printf(m, "UVD Status: %s\n", ((rdev->uvd.status) ? "Busy" : "Idle")); ++ ++ return 0; ++} ++ ++static struct drm_info_list radeon_uvd_info_list[] = { ++ {"radeon_uvd_info", radeon_debugfs_uvd_info, 0, NULL}, ++}; ++#endif ++ ++static int radeon_debugfs_uvd_init(struct radeon_device *rdev) ++{ ++#if defined(CONFIG_DEBUG_FS) ++ return radeon_debugfs_add_files(rdev, radeon_uvd_info_list, ARRAY_SIZE(radeon_uvd_info_list)); ++#else ++ return 0; ++#endif ++} +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0016-drm-radeon-add-radeon_vm_bo_update-trace-point.patch b/common/recipes-kernel/linux/linux-amd/0016-drm-radeon-add-radeon_vm_bo_update-trace-point.patch new file mode 100644 index 00000000..6e2a2cd7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0016-drm-radeon-add-radeon_vm_bo_update-trace-point.patch @@ -0,0 +1,138 @@ +From e24ff1069843abc950527938830ea32a2ed6463a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Mon, 25 Nov 2013 15:42:11 +0100 +Subject: [PATCH 16/60] drm/radeon: add radeon_vm_bo_update trace point +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Also rename the function to better reflect what it is doing. + +agd5f: fix argument size warning + +Signed-off-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 8 ++++---- + drivers/gpu/drm/radeon/radeon_cs.c | 4 ++-- + drivers/gpu/drm/radeon/radeon_gart.c | 14 ++++++++------ + drivers/gpu/drm/radeon/radeon_trace.h | 18 ++++++++++++++++++ + 4 files changed, 32 insertions(+), 12 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 7b31922..6bc42c6 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -2711,10 +2711,10 @@ void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence); + uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); +-int radeon_vm_bo_update_pte(struct radeon_device *rdev, +- struct radeon_vm *vm, +- struct radeon_bo *bo, +- struct ttm_mem_reg *mem); ++int radeon_vm_bo_update(struct radeon_device *rdev, ++ struct radeon_vm *vm, ++ struct radeon_bo *bo, ++ struct ttm_mem_reg *mem); + void radeon_vm_bo_invalidate(struct radeon_device *rdev, + struct radeon_bo *bo); + struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, +diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c +index 697004f4..eec1ad3 100644 +--- a/drivers/gpu/drm/radeon/radeon_cs.c ++++ b/drivers/gpu/drm/radeon/radeon_cs.c +@@ -420,13 +420,13 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, + struct radeon_bo *bo; + int r; + +- r = radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); ++ r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); + if (r) { + return r; + } + list_for_each_entry(lobj, &parser->validated, tv.head) { + bo = lobj->bo; +- r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem); ++ r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); + if (r) { + return r; + } +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index f7c0b64..33bd02e 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -1071,7 +1071,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, + } + + /** +- * radeon_vm_bo_update_pte - map a bo into the vm page table ++ * radeon_vm_bo_update - map a bo into the vm page table + * + * @rdev: radeon_device pointer + * @vm: requested vm +@@ -1083,10 +1083,10 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, + * + * Object have to be reserved & global and local mutex must be locked! + */ +-int radeon_vm_bo_update_pte(struct radeon_device *rdev, +- struct radeon_vm *vm, +- struct radeon_bo *bo, +- struct ttm_mem_reg *mem) ++int radeon_vm_bo_update(struct radeon_device *rdev, ++ struct radeon_vm *vm, ++ struct radeon_bo *bo, ++ struct ttm_mem_reg *mem) + { + unsigned ridx = rdev->asic->vm.pt_ring_index; + struct radeon_ib ib; +@@ -1132,6 +1132,8 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, + bo_va->valid = false; + } + ++ trace_radeon_vm_bo_update(bo_va); ++ + nptes = radeon_bo_ngpu_pages(bo); + + /* assume two extra pdes in case the mapping overlaps the borders */ +@@ -1210,7 +1212,7 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, + mutex_lock(&rdev->vm_manager.lock); + mutex_lock(&bo_va->vm->mutex); + if (bo_va->soffset) { +- r = radeon_vm_bo_update_pte(rdev, bo_va->vm, bo_va->bo, NULL); ++ r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); + } + mutex_unlock(&rdev->vm_manager.lock); + list_del(&bo_va->vm_list); +diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h +index 8c13aec..0473257 100644 +--- a/drivers/gpu/drm/radeon/radeon_trace.h ++++ b/drivers/gpu/drm/radeon/radeon_trace.h +@@ -62,6 +62,24 @@ TRACE_EVENT(radeon_vm_grab_id, + TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) + ); + ++TRACE_EVENT(radeon_vm_bo_update, ++ TP_PROTO(struct radeon_bo_va *bo_va), ++ TP_ARGS(bo_va), ++ TP_STRUCT__entry( ++ __field(u64, soffset) ++ __field(u64, eoffset) ++ __field(u32, flags) ++ ), ++ ++ TP_fast_assign( ++ __entry->soffset = bo_va->soffset; ++ __entry->eoffset = bo_va->eoffset; ++ __entry->flags = bo_va->flags; ++ ), ++ TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", ++ __entry->soffset, __entry->eoffset, __entry->flags) ++); ++ + TRACE_EVENT(radeon_vm_set_page, + TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags), +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0017-drm-radeon-drop-CP-page-table-updates-cleanup-v2.patch b/common/recipes-kernel/linux/linux-amd/0017-drm-radeon-drop-CP-page-table-updates-cleanup-v2.patch new file mode 100644 index 00000000..c0d7af20 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0017-drm-radeon-drop-CP-page-table-updates-cleanup-v2.patch @@ -0,0 +1,676 @@ +From 00110850672bf5b6bb10f7687b39574dbb2acee6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Wed, 30 Oct 2013 11:51:09 -0400 +Subject: [PATCH 17/60] drm/radeon: drop CP page table updates & cleanup v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The DMA ring seems to be stable now. + +v2: remove pt_ring_index as well + +Signed-off-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 56 -------------------------- + drivers/gpu/drm/radeon/cik_sdma.c | 21 ++++------ + drivers/gpu/drm/radeon/ni.c | 76 ------------------------------------ + drivers/gpu/drm/radeon/ni_dma.c | 18 ++++----- + drivers/gpu/drm/radeon/radeon.h | 8 +++- + drivers/gpu/drm/radeon/radeon_asic.c | 15 +++---- + drivers/gpu/drm/radeon/radeon_asic.h | 31 ++++++++------- + drivers/gpu/drm/radeon/radeon_gart.c | 29 +++++++++++--- + drivers/gpu/drm/radeon/si.c | 60 ---------------------------- + drivers/gpu/drm/radeon/si_dma.c | 21 ++++------ + 10 files changed, 73 insertions(+), 262 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index 54a62cf..e3bec288 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -4830,62 +4830,6 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) + } + } + +-/** +- * cik_vm_set_page - update the page tables using sDMA +- * +- * @rdev: radeon_device pointer +- * @ib: indirect buffer to fill with commands +- * @pe: addr of the page entry +- * @addr: dst addr to write into pe +- * @count: number of page entries to update +- * @incr: increase next addr by incr bytes +- * @flags: access flags +- * +- * Update the page tables using CP or sDMA (CIK). +- */ +-void cik_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags) +-{ +- uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); +- uint64_t value; +- unsigned ndw; +- +- if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { +- /* CP */ +- while (count) { +- ndw = 2 + count * 2; +- if (ndw > 0x3FFE) +- ndw = 0x3FFE; +- +- ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); +- ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(1)); +- ib->ptr[ib->length_dw++] = pe; +- ib->ptr[ib->length_dw++] = upper_32_bits(pe); +- for (; ndw > 2; ndw -= 2, --count, pe += 8) { +- if (flags & RADEON_VM_PAGE_SYSTEM) { +- value = radeon_vm_map_gart(rdev, addr); +- value &= 0xFFFFFFFFFFFFF000ULL; +- } else if (flags & RADEON_VM_PAGE_VALID) { +- value = addr; +- } else { +- value = 0; +- } +- addr += incr; +- value |= r600_flags; +- ib->ptr[ib->length_dw++] = value; +- ib->ptr[ib->length_dw++] = upper_32_bits(value); +- } +- } +- } else { +- /* DMA */ +- cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); +- } +-} +- + /* + * RLC + * The RLC is a multi-purpose microengine that handles a +diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c +index e73c49e..bbe0bc8 100644 +--- a/drivers/gpu/drm/radeon/cik_sdma.c ++++ b/drivers/gpu/drm/radeon/cik_sdma.c +@@ -639,13 +639,12 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) + { +- uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + +- trace_radeon_vm_set_page(pe, addr, count, incr, r600_flags); ++ trace_radeon_vm_set_page(pe, addr, count, incr, flags); + +- if (flags & RADEON_VM_PAGE_SYSTEM) { ++ if (flags & R600_PTE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) +@@ -657,16 +656,10 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { +- if (flags & RADEON_VM_PAGE_SYSTEM) { +- value = radeon_vm_map_gart(rdev, addr); +- value &= 0xFFFFFFFFFFFFF000ULL; +- } else if (flags & RADEON_VM_PAGE_VALID) { +- value = addr; +- } else { +- value = 0; +- } ++ value = radeon_vm_map_gart(rdev, addr); ++ value &= 0xFFFFFFFFFFFFF000ULL; + addr += incr; +- value |= r600_flags; ++ value |= flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } +@@ -677,7 +670,7 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, + if (ndw > 0x7FFFF) + ndw = 0x7FFFF; + +- if (flags & RADEON_VM_PAGE_VALID) ++ if (flags & R600_PTE_VALID) + value = addr; + else + value = 0; +@@ -685,7 +678,7 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); +- ib->ptr[ib->length_dw++] = r600_flags; /* mask */ ++ ib->ptr[ib->length_dw++] = flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); +diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c +index 474343a..2443d11 100644 +--- a/drivers/gpu/drm/radeon/ni.c ++++ b/drivers/gpu/drm/radeon/ni.c +@@ -174,11 +174,6 @@ extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev); + extern void evergreen_program_aspm(struct radeon_device *rdev); + extern void sumo_rlc_fini(struct radeon_device *rdev); + extern int sumo_rlc_init(struct radeon_device *rdev); +-extern void cayman_dma_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags); + + /* Firmware Names */ + MODULE_FIRMWARE("radeon/BARTS_pfp.bin"); +@@ -2412,77 +2407,6 @@ void cayman_vm_decode_fault(struct radeon_device *rdev, + block, mc_id); + } + +-#define R600_ENTRY_VALID (1 << 0) +-#define R600_PTE_SYSTEM (1 << 1) +-#define R600_PTE_SNOOPED (1 << 2) +-#define R600_PTE_READABLE (1 << 5) +-#define R600_PTE_WRITEABLE (1 << 6) +- +-uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags) +-{ +- uint32_t r600_flags = 0; +- r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0; +- r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; +- r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; +- if (flags & RADEON_VM_PAGE_SYSTEM) { +- r600_flags |= R600_PTE_SYSTEM; +- r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; +- } +- return r600_flags; +-} +- +-/** +- * cayman_vm_set_page - update the page tables using the CP +- * +- * @rdev: radeon_device pointer +- * @ib: indirect buffer to fill with commands +- * @pe: addr of the page entry +- * @addr: dst addr to write into pe +- * @count: number of page entries to update +- * @incr: increase next addr by incr bytes +- * @flags: access flags +- * +- * Update the page tables using the CP (cayman/TN). +- */ +-void cayman_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags) +-{ +- uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); +- uint64_t value; +- unsigned ndw; +- +- if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { +- while (count) { +- ndw = 1 + count * 2; +- if (ndw > 0x3FFF) +- ndw = 0x3FFF; +- +- ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw); +- ib->ptr[ib->length_dw++] = pe; +- ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; +- for (; ndw > 1; ndw -= 2, --count, pe += 8) { +- if (flags & RADEON_VM_PAGE_SYSTEM) { +- value = radeon_vm_map_gart(rdev, addr); +- value &= 0xFFFFFFFFFFFFF000ULL; +- } else if (flags & RADEON_VM_PAGE_VALID) { +- value = addr; +- } else { +- value = 0; +- } +- addr += incr; +- value |= r600_flags; +- ib->ptr[ib->length_dw++] = value; +- ib->ptr[ib->length_dw++] = upper_32_bits(value); +- } +- } +- } else { +- cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); +- } +-} +- + /** + * cayman_vm_flush - vm flush using the CP + * +diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c +index 599e87b..ddc946b 100644 +--- a/drivers/gpu/drm/radeon/ni_dma.c ++++ b/drivers/gpu/drm/radeon/ni_dma.c +@@ -240,8 +240,7 @@ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes +- * @flags: access flags +- * @r600_flags: hw access flags ++ * @flags: hw access flags + * + * Update the page tables using the DMA (cayman/TN). + */ +@@ -251,13 +250,12 @@ void cayman_dma_vm_set_page(struct radeon_device *rdev, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) + { +- uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + +- trace_radeon_vm_set_page(pe, addr, count, incr, r600_flags); ++ trace_radeon_vm_set_page(pe, addr, count, incr, flags); + +- if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) { ++ if ((flags & R600_PTE_SYSTEM) || (count == 1)) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) +@@ -268,16 +266,16 @@ void cayman_dma_vm_set_page(struct radeon_device *rdev, + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { +- if (flags & RADEON_VM_PAGE_SYSTEM) { ++ if (flags & R600_PTE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; +- } else if (flags & RADEON_VM_PAGE_VALID) { ++ } else if (flags & R600_PTE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; +- value |= r600_flags; ++ value |= flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } +@@ -288,7 +286,7 @@ void cayman_dma_vm_set_page(struct radeon_device *rdev, + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + +- if (flags & RADEON_VM_PAGE_VALID) ++ if (flags & R600_PTE_VALID) + value = addr; + else + value = 0; +@@ -296,7 +294,7 @@ void cayman_dma_vm_set_page(struct radeon_device *rdev, + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; +- ib->ptr[ib->length_dw++] = r600_flags; /* mask */ ++ ib->ptr[ib->length_dw++] = flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 6bc42c6..d478c28 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -846,6 +846,12 @@ struct radeon_mec { + #define RADEON_VM_PTB_ALIGN_MASK (RADEON_VM_PTB_ALIGN_SIZE - 1) + #define RADEON_VM_PTB_ALIGN(a) (((a) + RADEON_VM_PTB_ALIGN_MASK) & ~RADEON_VM_PTB_ALIGN_MASK) + ++#define R600_PTE_VALID (1 << 0) ++#define R600_PTE_SYSTEM (1 << 1) ++#define R600_PTE_SNOOPED (1 << 2) ++#define R600_PTE_READABLE (1 << 5) ++#define R600_PTE_WRITEABLE (1 << 6) ++ + struct radeon_vm { + struct list_head list; + struct list_head va; +@@ -1691,8 +1697,6 @@ struct radeon_asic { + struct { + int (*init)(struct radeon_device *rdev); + void (*fini)(struct radeon_device *rdev); +- +- u32 pt_ring_index; + void (*set_page)(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, +diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c +index 5720e66..123adfe 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.c ++++ b/drivers/gpu/drm/radeon/radeon_asic.c +@@ -1622,8 +1622,7 @@ static struct radeon_asic cayman_asic = { + .vm = { + .init = &cayman_vm_init, + .fini = &cayman_vm_fini, +- .pt_ring_index = R600_RING_TYPE_DMA_INDEX, +- .set_page = &cayman_vm_set_page, ++ .set_page = &cayman_dma_vm_set_page, + }, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, +@@ -1723,8 +1722,7 @@ static struct radeon_asic trinity_asic = { + .vm = { + .init = &cayman_vm_init, + .fini = &cayman_vm_fini, +- .pt_ring_index = R600_RING_TYPE_DMA_INDEX, +- .set_page = &cayman_vm_set_page, ++ .set_page = &cayman_dma_vm_set_page, + }, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, +@@ -1854,8 +1852,7 @@ static struct radeon_asic si_asic = { + .vm = { + .init = &si_vm_init, + .fini = &si_vm_fini, +- .pt_ring_index = R600_RING_TYPE_DMA_INDEX, +- .set_page = &si_vm_set_page, ++ .set_page = &si_dma_vm_set_page, + }, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = &si_gfx_ring, +@@ -2000,8 +1997,7 @@ static struct radeon_asic ci_asic = { + .vm = { + .init = &cik_vm_init, + .fini = &cik_vm_fini, +- .pt_ring_index = R600_RING_TYPE_DMA_INDEX, +- .set_page = &cik_vm_set_page, ++ .set_page = &cik_sdma_vm_set_page, + }, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, +@@ -2102,8 +2098,7 @@ static struct radeon_asic kv_asic = { + .vm = { + .init = &cik_vm_init, + .fini = &cik_vm_fini, +- .pt_ring_index = R600_RING_TYPE_DMA_INDEX, +- .set_page = &cik_vm_set_page, ++ .set_page = &cik_sdma_vm_set_page, + }, + .ring = { + [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, +diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h +index 8588670..8939cb3 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.h ++++ b/drivers/gpu/drm/radeon/radeon_asic.h +@@ -577,17 +577,18 @@ int cayman_vm_init(struct radeon_device *rdev); + void cayman_vm_fini(struct radeon_device *rdev); + void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); +-void cayman_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags); + int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); + int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); + void cayman_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib); + bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); + bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); ++void cayman_dma_vm_set_page(struct radeon_device *rdev, ++ struct radeon_ib *ib, ++ uint64_t pe, ++ uint64_t addr, unsigned count, ++ uint32_t incr, uint32_t flags); ++ + void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + + int ni_dpm_init(struct radeon_device *rdev); +@@ -649,17 +650,17 @@ int si_irq_set(struct radeon_device *rdev); + int si_irq_process(struct radeon_device *rdev); + int si_vm_init(struct radeon_device *rdev); + void si_vm_fini(struct radeon_device *rdev); +-void si_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags); + void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); + int si_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence); ++void si_dma_vm_set_page(struct radeon_device *rdev, ++ struct radeon_ib *ib, ++ uint64_t pe, ++ uint64_t addr, unsigned count, ++ uint32_t incr, uint32_t flags); + void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + u32 si_get_xclk(struct radeon_device *rdev); + uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); +@@ -727,11 +728,11 @@ int cik_irq_process(struct radeon_device *rdev); + int cik_vm_init(struct radeon_device *rdev); + void cik_vm_fini(struct radeon_device *rdev); + void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +-void cik_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags); ++void cik_sdma_vm_set_page(struct radeon_device *rdev, ++ struct radeon_ib *ib, ++ uint64_t pe, ++ uint64_t addr, unsigned count, ++ uint32_t incr, uint32_t flags); + void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); + u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index 33bd02e..9ceabdf 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -920,6 +920,26 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) + } + + /** ++ * radeon_vm_page_flags - translate page flags to what the hw uses ++ * ++ * @flags: flags comming from userspace ++ * ++ * Translate the flags the userspace ABI uses to hw flags. ++ */ ++static uint32_t radeon_vm_page_flags(uint32_t flags) ++{ ++ uint32_t hw_flags = 0; ++ hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; ++ hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; ++ hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; ++ if (flags & RADEON_VM_PAGE_SYSTEM) { ++ hw_flags |= R600_PTE_SYSTEM; ++ hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; ++ } ++ return hw_flags; ++} ++ ++/** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer +@@ -980,7 +1000,7 @@ retry: + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, + last_pt, count, incr, +- RADEON_VM_PAGE_VALID); ++ R600_PTE_VALID); + } + + count = 1; +@@ -993,7 +1013,7 @@ retry: + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, +- incr, RADEON_VM_PAGE_VALID); ++ incr, R600_PTE_VALID); + + } + +@@ -1088,7 +1108,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, + struct radeon_bo *bo, + struct ttm_mem_reg *mem) + { +- unsigned ridx = rdev->asic->vm.pt_ring_index; + struct radeon_ib ib; + struct radeon_bo_va *bo_va; + unsigned nptes, npdes, ndw; +@@ -1163,7 +1182,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, + if (ndw > 0xfffff) + return -ENOMEM; + +- r = radeon_ib_get(rdev, ridx, &ib, NULL, ndw * 4); ++ r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; +@@ -1175,7 +1194,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, + } + + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, +- addr, bo_va->flags); ++ addr, radeon_vm_page_flags(bo_va->flags)); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); +diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c +index 50482e7..8a0bc79 100644 +--- a/drivers/gpu/drm/radeon/si.c ++++ b/drivers/gpu/drm/radeon/si.c +@@ -83,11 +83,6 @@ extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_ + extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev); + extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev); + extern bool evergreen_is_display_hung(struct radeon_device *rdev); +-extern void si_dma_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags); + static void si_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable); + static void si_fini_pg(struct radeon_device *rdev); +@@ -4696,61 +4691,6 @@ static void si_vm_decode_fault(struct radeon_device *rdev, + block, mc_id); + } + +-/** +- * si_vm_set_page - update the page tables using the CP +- * +- * @rdev: radeon_device pointer +- * @ib: indirect buffer to fill with commands +- * @pe: addr of the page entry +- * @addr: dst addr to write into pe +- * @count: number of page entries to update +- * @incr: increase next addr by incr bytes +- * @flags: access flags +- * +- * Update the page tables using the CP (SI). +- */ +-void si_vm_set_page(struct radeon_device *rdev, +- struct radeon_ib *ib, +- uint64_t pe, +- uint64_t addr, unsigned count, +- uint32_t incr, uint32_t flags) +-{ +- uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); +- uint64_t value; +- unsigned ndw; +- +- if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { +- while (count) { +- ndw = 2 + count * 2; +- if (ndw > 0x3FFE) +- ndw = 0x3FFE; +- +- ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); +- ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(1)); +- ib->ptr[ib->length_dw++] = pe; +- ib->ptr[ib->length_dw++] = upper_32_bits(pe); +- for (; ndw > 2; ndw -= 2, --count, pe += 8) { +- if (flags & RADEON_VM_PAGE_SYSTEM) { +- value = radeon_vm_map_gart(rdev, addr); +- value &= 0xFFFFFFFFFFFFF000ULL; +- } else if (flags & RADEON_VM_PAGE_VALID) { +- value = addr; +- } else { +- value = 0; +- } +- addr += incr; +- value |= r600_flags; +- ib->ptr[ib->length_dw++] = value; +- ib->ptr[ib->length_dw++] = upper_32_bits(value); +- } +- } +- } else { +- /* DMA */ +- si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); +- } +-} +- + void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) + { + struct radeon_ring *ring = &rdev->ring[ridx]; +diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c +index 97af34c..59be2cf 100644 +--- a/drivers/gpu/drm/radeon/si_dma.c ++++ b/drivers/gpu/drm/radeon/si_dma.c +@@ -76,13 +76,12 @@ void si_dma_vm_set_page(struct radeon_device *rdev, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) + { +- uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + +- trace_radeon_vm_set_page(pe, addr, count, incr, r600_flags); ++ trace_radeon_vm_set_page(pe, addr, count, incr, flags); + +- if (flags & RADEON_VM_PAGE_SYSTEM) { ++ if (flags & R600_PTE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) +@@ -93,16 +92,10 @@ void si_dma_vm_set_page(struct radeon_device *rdev, + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { +- if (flags & RADEON_VM_PAGE_SYSTEM) { +- value = radeon_vm_map_gart(rdev, addr); +- value &= 0xFFFFFFFFFFFFF000ULL; +- } else if (flags & RADEON_VM_PAGE_VALID) { +- value = addr; +- } else { +- value = 0; +- } ++ value = radeon_vm_map_gart(rdev, addr); ++ value &= 0xFFFFFFFFFFFFF000ULL; + addr += incr; +- value |= r600_flags; ++ value |= flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } +@@ -113,7 +106,7 @@ void si_dma_vm_set_page(struct radeon_device *rdev, + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + +- if (flags & RADEON_VM_PAGE_VALID) ++ if (flags & R600_PTE_VALID) + value = addr; + else + value = 0; +@@ -121,7 +114,7 @@ void si_dma_vm_set_page(struct radeon_device *rdev, + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; +- ib->ptr[ib->length_dw++] = r600_flags; /* mask */ ++ ib->ptr[ib->length_dw++] = flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0018-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch b/common/recipes-kernel/linux/linux-amd/0018-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch new file mode 100644 index 00000000..f93f84ad --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0018-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch @@ -0,0 +1,236 @@ +From c3679d52cb42a2cc76c0c893ad364157dc3699dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Fri, 25 Oct 2013 18:07:55 +0200 +Subject: [PATCH 18/60] drm/radeon: add large PTE support for NI, SI and CIK v3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This patch implements support for VRAM page table entry compression. +PTE construction is enhanced to identify physically contiguous page +ranges and mark them in the PTE fragment field. L1 TLB and L2 cache +support is enabled for 64KB (SI/CIK) and 256KB (NI) PTE fragments, +significantly improving TLB utilization for VRAM allocations. + +Linear store bandwidth is improved from 60GB/s to 125GB/s on Pitcairn. +Unigine Heaven 3.0 sees an average improvement from 24.7 to 27.7 FPS +on default settings at 1920x1200 resolution with vsync disabled. + +See main comment in radeon_gart.c gives a technical description. + +v2 (chk): rebased and simplified. +v3 (chk): add missing hw setup + +Signed-off-by: Jay Cornwall <jay@jcornwall.me> +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 4 +- + drivers/gpu/drm/radeon/ni.c | 2 + + drivers/gpu/drm/radeon/radeon.h | 5 ++ + drivers/gpu/drm/radeon/radeon_gart.c | 91 +++++++++++++++++++++++++++++++++--- + drivers/gpu/drm/radeon/si.c | 5 +- + 5 files changed, 98 insertions(+), 9 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index e3bec288..e84005a 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -4526,6 +4526,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) + WREG32(MC_VM_MX_L1_TLB_CNTL, + (0xA << 7) | + ENABLE_L1_TLB | ++ ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + ENABLE_ADVANCED_DRIVER_MODEL | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); +@@ -4538,7 +4539,8 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) + CONTEXT1_IDENTITY_ACCESS_MODE(1)); + WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); + WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | +- L2_CACHE_BIGK_FRAGMENT_SIZE(6)); ++ BANK_SELECT(4) | ++ L2_CACHE_BIGK_FRAGMENT_SIZE(4)); + /* setup context0 */ + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); +diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c +index 2443d11..a3c7826 100644 +--- a/drivers/gpu/drm/radeon/ni.c ++++ b/drivers/gpu/drm/radeon/ni.c +@@ -1227,12 +1227,14 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ++ ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7) | + CONTEXT1_IDENTITY_ACCESS_MODE(1)); + WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); + WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | ++ BANK_SELECT(6) | + L2_CACHE_BIGK_FRAGMENT_SIZE(6)); + /* setup context0 */ + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index d478c28..e796f9a 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -852,6 +852,11 @@ struct radeon_mec { + #define R600_PTE_READABLE (1 << 5) + #define R600_PTE_WRITEABLE (1 << 6) + ++/* PTE (Page Table Entry) fragment field for different page sizes */ ++#define R600_PTE_FRAG_4KB (0 << 7) ++#define R600_PTE_FRAG_64KB (4 << 7) ++#define R600_PTE_FRAG_256KB (6 << 7) ++ + struct radeon_vm { + struct list_head list; + struct list_head va; +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index 9ceabdf..f960ce6 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -1021,6 +1021,84 @@ retry: + } + + /** ++ * radeon_vm_frag_ptes - add fragment information to PTEs ++ * ++ * @rdev: radeon_device pointer ++ * @ib: IB for the update ++ * @pe_start: first PTE to handle ++ * @pe_end: last PTE to handle ++ * @addr: addr those PTEs should point to ++ * @flags: hw mapping flags ++ * ++ * Global and local mutex must be locked! ++ */ ++static void radeon_vm_frag_ptes(struct radeon_device *rdev, ++ struct radeon_ib *ib, ++ uint64_t pe_start, uint64_t pe_end, ++ uint64_t addr, uint32_t flags) ++{ ++ /** ++ * The MC L1 TLB supports variable sized pages, based on a fragment ++ * field in the PTE. When this field is set to a non-zero value, page ++ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE ++ * flags are considered valid for all PTEs within the fragment range ++ * and corresponding mappings are assumed to be physically contiguous. ++ * ++ * The L1 TLB can store a single PTE for the whole fragment, ++ * significantly increasing the space available for translation ++ * caching. This leads to large improvements in throughput when the ++ * TLB is under pressure. ++ * ++ * The L2 cache distributes small and large fragments into two ++ * asymmetric partitions. The large fragment cache is significantly ++ * larger. Thus, we try to use large fragments wherever possible. ++ * Userspace can support this by aligning virtual base address and ++ * allocation size to the fragment size. ++ */ ++ ++ /* NI is optimized for 256KB fragments, SI and newer for 64KB */ ++ uint64_t frag_flags = rdev->family == CHIP_CAYMAN ? ++ R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; ++ uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80; ++ ++ uint64_t frag_start = ALIGN(pe_start, frag_align); ++ uint64_t frag_end = pe_end & ~(frag_align - 1); ++ ++ unsigned count; ++ ++ /* system pages are non continuously */ ++ if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || ++ (frag_start >= frag_end)) { ++ ++ count = (pe_end - pe_start) / 8; ++ radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, ++ RADEON_GPU_PAGE_SIZE, flags); ++ return; ++ } ++ ++ /* handle the 4K area at the beginning */ ++ if (pe_start != frag_start) { ++ count = (frag_start - pe_start) / 8; ++ radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count, ++ RADEON_GPU_PAGE_SIZE, flags); ++ addr += RADEON_GPU_PAGE_SIZE * count; ++ } ++ ++ /* handle the area in the middle */ ++ count = (frag_end - frag_start) / 8; ++ radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count, ++ RADEON_GPU_PAGE_SIZE, flags | frag_flags); ++ ++ /* handle the 4K area at the end */ ++ if (frag_end != pe_end) { ++ addr += RADEON_GPU_PAGE_SIZE * count; ++ count = (pe_end - frag_end) / 8; ++ radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count, ++ RADEON_GPU_PAGE_SIZE, flags); ++ } ++} ++ ++/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer +@@ -1066,10 +1144,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, + if ((last_pte + 8 * count) != pte) { + + if (count) { +- radeon_asic_vm_set_page(rdev, ib, last_pte, +- last_dst, count, +- RADEON_GPU_PAGE_SIZE, +- flags); ++ radeon_vm_frag_ptes(rdev, ib, last_pte, ++ last_pte + 8 * count, ++ last_dst, flags); + } + + count = nptes; +@@ -1084,9 +1161,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, + } + + if (count) { +- radeon_asic_vm_set_page(rdev, ib, last_pte, +- last_dst, count, +- RADEON_GPU_PAGE_SIZE, flags); ++ radeon_vm_frag_ptes(rdev, ib, last_pte, ++ last_pte + 8 * count, ++ last_dst, flags); + } + } + +diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c +index 8a0bc79..46b57e1 100644 +--- a/drivers/gpu/drm/radeon/si.c ++++ b/drivers/gpu/drm/radeon/si.c +@@ -3942,18 +3942,21 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) + WREG32(MC_VM_MX_L1_TLB_CNTL, + (0xA << 7) | + ENABLE_L1_TLB | ++ ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + ENABLE_ADVANCED_DRIVER_MODEL | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ++ ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7) | + CONTEXT1_IDENTITY_ACCESS_MODE(1)); + WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); + WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | +- L2_CACHE_BIGK_FRAGMENT_SIZE(0)); ++ BANK_SELECT(4) | ++ L2_CACHE_BIGK_FRAGMENT_SIZE(4)); + /* setup context0 */ + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0019-drm-radeon-add-proper-support-for-RADEON_VM_BLOCK_SI.patch b/common/recipes-kernel/linux/linux-amd/0019-drm-radeon-add-proper-support-for-RADEON_VM_BLOCK_SI.patch new file mode 100644 index 00000000..f1750572 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0019-drm-radeon-add-proper-support-for-RADEON_VM_BLOCK_SI.patch @@ -0,0 +1,132 @@ +From cb32f1bcbf6f50c751d86b4527b1b01b2549ea11 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 29 Oct 2013 09:30:16 +0100 +Subject: [PATCH 19/60] drm/radeon: add proper support for RADEON_VM_BLOCK_SIZE +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This patch makes it possible to decide how many address +bits are spend on the page directory vs the page tables. + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 1 + + drivers/gpu/drm/radeon/cikd.h | 1 + + drivers/gpu/drm/radeon/ni.c | 1 + + drivers/gpu/drm/radeon/nid.h | 1 + + drivers/gpu/drm/radeon/radeon.h | 2 +- + drivers/gpu/drm/radeon/radeon_gart.c | 3 ++- + drivers/gpu/drm/radeon/si.c | 1 + + drivers/gpu/drm/radeon/sid.h | 1 + + 8 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index e84005a..3741a68 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -4576,6 +4576,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) + (u32)(rdev->dummy_page.addr >> 12)); + WREG32(VM_CONTEXT1_CNTL2, 4); + WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | ++ PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) | + RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | +diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h +index 9c8ef20..c4738bc 100644 +--- a/drivers/gpu/drm/radeon/cikd.h ++++ b/drivers/gpu/drm/radeon/cikd.h +@@ -474,6 +474,7 @@ + #define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) + #define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) + #define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) ++#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24) + #define VM_CONTEXT1_CNTL 0x1414 + #define VM_CONTEXT0_CNTL2 0x1430 + #define VM_CONTEXT1_CNTL2 0x1434 +diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c +index a3c7826..4317e57 100644 +--- a/drivers/gpu/drm/radeon/ni.c ++++ b/drivers/gpu/drm/radeon/ni.c +@@ -1267,6 +1267,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) + (u32)(rdev->dummy_page.addr >> 12)); + WREG32(VM_CONTEXT1_CNTL2, 4); + WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | ++ PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) | + RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | +diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h +index d996033..2e12e4d 100644 +--- a/drivers/gpu/drm/radeon/nid.h ++++ b/drivers/gpu/drm/radeon/nid.h +@@ -128,6 +128,7 @@ + #define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) + #define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) + #define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) ++#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24) + #define VM_CONTEXT1_CNTL 0x1414 + #define VM_CONTEXT0_CNTL2 0x1430 + #define VM_CONTEXT1_CNTL2 0x1434 +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index e796f9a..6ad3fb2 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -836,7 +836,7 @@ struct radeon_mec { + /* defines number of bits in page table versus page directory, + * a page is 4KB so we have 12 bits offset, 9 bits in the page + * table and the remaining 19 bits are in the page directory */ +-#define RADEON_VM_BLOCK_SIZE 9 ++#define RADEON_VM_BLOCK_SIZE 12 + + /* number of entries in page table */ + #define RADEON_VM_PTE_COUNT (1 << RADEON_VM_BLOCK_SIZE) +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index f960ce6..34b09ac 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -959,6 +959,7 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev, + uint64_t start, uint64_t end) + { + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; ++ const unsigned align = min((uint32_t)RADEON_VM_PTB_ALIGN_SIZE, incr); + + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0; +@@ -979,7 +980,7 @@ retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_tables[pt_idx], + RADEON_VM_PTE_COUNT * 8, +- RADEON_GPU_PAGE_SIZE, false); ++ align, false); + + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); +diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c +index 46b57e1..48ad0ae 100644 +--- a/drivers/gpu/drm/radeon/si.c ++++ b/drivers/gpu/drm/radeon/si.c +@@ -3993,6 +3993,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) + (u32)(rdev->dummy_page.addr >> 12)); + WREG32(VM_CONTEXT1_CNTL2, 4); + WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | ++ PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) | + RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | +diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h +index 205a961..940e36b 100644 +--- a/drivers/gpu/drm/radeon/sid.h ++++ b/drivers/gpu/drm/radeon/sid.h +@@ -357,6 +357,7 @@ + #define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) + #define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) + #define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) ++#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24) + #define VM_CONTEXT1_CNTL 0x1414 + #define VM_CONTEXT0_CNTL2 0x1430 + #define VM_CONTEXT1_CNTL2 0x1434 +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0020-drm-radeon-WIP-add-copy-trace-point.patch b/common/recipes-kernel/linux/linux-amd/0020-drm-radeon-WIP-add-copy-trace-point.patch new file mode 100644 index 00000000..cfafb4f1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0020-drm-radeon-WIP-add-copy-trace-point.patch @@ -0,0 +1,59 @@ +From 6f5f465b25a55eed827f2490b84a8b6c6547dcd2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 17 Dec 2013 11:43:21 -0700 +Subject: [PATCH 20/60] drm/radeon: WIP add copy trace point +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/cik_sdma.c | 2 ++ + drivers/gpu/drm/radeon/radeon_trace.h | 18 ++++++++++++++++++ + 2 files changed, 20 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c +index bbe0bc8..42abd1e 100644 +--- a/drivers/gpu/drm/radeon/cik_sdma.c ++++ b/drivers/gpu/drm/radeon/cik_sdma.c +@@ -426,6 +426,8 @@ int cik_copy_dma(struct radeon_device *rdev, + int i, num_loops; + int r = 0; + ++ trace_radeon_copy(src_offset, dst_offset, num_gpu_pages); ++ + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); +diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h +index 0473257..6e966bb 100644 +--- a/drivers/gpu/drm/radeon/radeon_trace.h ++++ b/drivers/gpu/drm/radeon/radeon_trace.h +@@ -27,6 +27,24 @@ TRACE_EVENT(radeon_bo_create, + TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages) + ); + ++TRACE_EVENT(radeon_copy, ++ TP_PROTO(u64 src, u64 dst, u32 pages), ++ TP_ARGS(src, dst, pages), ++ TP_STRUCT__entry( ++ __field(u64, src) ++ __field(u64, dst) ++ __field(u32, pages) ++ ), ++ ++ TP_fast_assign( ++ __entry->src = src; ++ __entry->dst = dst; ++ __entry->pages = pages; ++ ), ++ TP_printk("src=%010llx, dst=%010llx, pages=%u", ++ __entry->src, __entry->dst, __entry->pages) ++); ++ + TRACE_EVENT(radeon_cs, + TP_PROTO(struct radeon_cs_parser *p), + TP_ARGS(p), +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0021-drm-radeon-cik-Return-backend-map-information-to-use.patch b/common/recipes-kernel/linux/linux-amd/0021-drm-radeon-cik-Return-backend-map-information-to-use.patch new file mode 100644 index 00000000..bb2b939f --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0021-drm-radeon-cik-Return-backend-map-information-to-use.patch @@ -0,0 +1,34 @@ +From ab462fbd1f7b5fe6ac6335fd0614a287e880875f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com> +Date: Mon, 18 Nov 2013 18:25:59 +0900 +Subject: [PATCH 21/60] drm/radeon/cik: Return backend map information to + userspace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is required to properly calculate the tiling parameters +in userspace. + +Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon_kms.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c +index a134e8b..39f3a12 100644 +--- a/drivers/gpu/drm/radeon/radeon_kms.c ++++ b/drivers/gpu/drm/radeon/radeon_kms.c +@@ -324,7 +324,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) + break; + case RADEON_INFO_BACKEND_MAP: + if (rdev->family >= CHIP_BONAIRE) +- return -EINVAL; ++ *value = rdev->config.cik.backend_map; + else if (rdev->family >= CHIP_TAHITI) + *value = rdev->config.si.backend_map; + else if (rdev->family >= CHIP_CAYMAN) +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0022-drm-radeon-cik-Add-macrotile-mode-array-query.patch b/common/recipes-kernel/linux/linux-amd/0022-drm-radeon-cik-Add-macrotile-mode-array-query.patch new file mode 100644 index 00000000..f71ad69d --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0022-drm-radeon-cik-Add-macrotile-mode-array-query.patch @@ -0,0 +1,113 @@ +From c264fd5d63ea30124efe2af14fa1fca5b171a113 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com> +Date: Mon, 18 Nov 2013 18:26:00 +0900 +Subject: [PATCH 22/60] drm/radeon/cik: Add macrotile mode array query +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is required to properly calculate the tiling parameters +in userspace. + +Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 3 +++ + drivers/gpu/drm/radeon/radeon.h | 1 + + drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- + drivers/gpu/drm/radeon/radeon_kms.c | 9 +++++++++ + include/uapi/drm/radeon_drm.h | 2 ++ + 5 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index 3741a68..17bbdfd 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -1981,6 +1981,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) + gb_tile_moden = 0; + break; + } ++ rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; + WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); + } + } else if (num_pipe_configs == 4) { +@@ -2327,6 +2328,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) + gb_tile_moden = 0; + break; + } ++ rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; + WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); + } + } else if (num_pipe_configs == 2) { +@@ -2544,6 +2546,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) + gb_tile_moden = 0; + break; + } ++ rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; + WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); + } + } else +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 6ad3fb2..af893e3 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1997,6 +1997,7 @@ struct cik_asic { + + unsigned tile_config; + uint32_t tile_mode_array[32]; ++ uint32_t macrotile_mode_array[16]; + }; + + union radeon_asic_config { +diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c +index 9c14a1b..51b3214 100644 +--- a/drivers/gpu/drm/radeon/radeon_drv.c ++++ b/drivers/gpu/drm/radeon/radeon_drv.c +@@ -75,9 +75,10 @@ + * 2.32.0 - new info request for rings working + * 2.33.0 - Add SI tiling mode array query + * 2.34.0 - Add CIK tiling mode array query ++ * 2.35.0 - Add CIK macrotile mode array query + */ + #define KMS_DRIVER_MAJOR 2 +-#define KMS_DRIVER_MINOR 34 ++#define KMS_DRIVER_MINOR 35 + #define KMS_DRIVER_PATCHLEVEL 0 + int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); + int radeon_driver_unload_kms(struct drm_device *dev); +diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c +index 39f3a12..e90c42b 100644 +--- a/drivers/gpu/drm/radeon/radeon_kms.c ++++ b/drivers/gpu/drm/radeon/radeon_kms.c +@@ -433,6 +433,15 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) + return -EINVAL; + } + break; ++ case RADEON_INFO_CIK_MACROTILE_MODE_ARRAY: ++ if (rdev->family >= CHIP_BONAIRE) { ++ value = rdev->config.cik.macrotile_mode_array; ++ value_size = sizeof(uint32_t)*16; ++ } else { ++ DRM_DEBUG_KMS("macrotile mode array is cik+ only!\n"); ++ return -EINVAL; ++ } ++ break; + case RADEON_INFO_SI_CP_DMA_COMPUTE: + *value = 1; + break; +diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h +index a71442b..fe421e8a 100644 +--- a/include/uapi/drm/radeon_drm.h ++++ b/include/uapi/drm/radeon_drm.h +@@ -981,6 +981,8 @@ struct drm_radeon_cs { + #define RADEON_INFO_SI_TILE_MODE_ARRAY 0x16 + /* query if CP DMA is supported on the compute ring */ + #define RADEON_INFO_SI_CP_DMA_COMPUTE 0x17 ++/* CIK macrotile mode array */ ++#define RADEON_INFO_CIK_MACROTILE_MODE_ARRAY 0x18 + /* query the number of render backends */ + #define RADEON_INFO_SI_BACKEND_ENABLED_MASK 0x19 + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0023-drm-radeon-set-correct-number-of-banks-for-CIK-chips.patch b/common/recipes-kernel/linux/linux-amd/0023-drm-radeon-set-correct-number-of-banks-for-CIK-chips.patch new file mode 100644 index 00000000..95399ce1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0023-drm-radeon-set-correct-number-of-banks-for-CIK-chips.patch @@ -0,0 +1,102 @@ +From 755e61a59b4a354e3b58909408f1510f2a3705d0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com> +Date: Mon, 23 Dec 2013 17:11:36 +0100 +Subject: [PATCH 23/60] drm/radeon: set correct number of banks for CIK chips + in DCE +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We don't have the NUM_BANKS parameter, so we have to calculate it +from the other parameters. NUM_BANKS is not constant on CIK. + +This fixes 2D tiling for the display engine on CIK. + +Signed-off-by: Marek Olšák <marek.olsak@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/atombios_crtc.c | 64 +++++++++++++++++++++++----------- + 1 file changed, 43 insertions(+), 21 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c +index ba8742a..4be69b5 100644 +--- a/drivers/gpu/drm/radeon/atombios_crtc.c ++++ b/drivers/gpu/drm/radeon/atombios_crtc.c +@@ -1170,31 +1170,53 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, + } + + if (tiling_flags & RADEON_TILING_MACRO) { +- if (rdev->family >= CHIP_BONAIRE) +- tmp = rdev->config.cik.tile_config; +- else if (rdev->family >= CHIP_TAHITI) +- tmp = rdev->config.si.tile_config; +- else if (rdev->family >= CHIP_CAYMAN) +- tmp = rdev->config.cayman.tile_config; +- else +- tmp = rdev->config.evergreen.tile_config; ++ evergreen_tiling_fields(tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); + +- switch ((tmp & 0xf0) >> 4) { +- case 0: /* 4 banks */ +- fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_4_BANK); +- break; +- case 1: /* 8 banks */ +- default: +- fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_8_BANK); +- break; +- case 2: /* 16 banks */ +- fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_16_BANK); +- break; ++ /* Set NUM_BANKS. */ ++ if (rdev->family >= CHIP_BONAIRE) { ++ unsigned tileb, index, num_banks, tile_split_bytes; ++ ++ /* Calculate the macrotile mode index. */ ++ tile_split_bytes = 64 << tile_split; ++ tileb = 8 * 8 * target_fb->bits_per_pixel / 8; ++ tileb = min(tile_split_bytes, tileb); ++ ++ for (index = 0; tileb > 64; index++) { ++ tileb >>= 1; ++ } ++ ++ if (index >= 16) { ++ DRM_ERROR("Wrong screen bpp (%u) or tile split (%u)\n", ++ target_fb->bits_per_pixel, tile_split); ++ return -EINVAL; ++ } ++ ++ num_banks = (rdev->config.cik.macrotile_mode_array[index] >> 6) & 0x3; ++ fb_format |= EVERGREEN_GRPH_NUM_BANKS(num_banks); ++ } else { ++ /* SI and older. */ ++ if (rdev->family >= CHIP_TAHITI) ++ tmp = rdev->config.si.tile_config; ++ else if (rdev->family >= CHIP_CAYMAN) ++ tmp = rdev->config.cayman.tile_config; ++ else ++ tmp = rdev->config.evergreen.tile_config; ++ ++ switch ((tmp & 0xf0) >> 4) { ++ case 0: /* 4 banks */ ++ fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_4_BANK); ++ break; ++ case 1: /* 8 banks */ ++ default: ++ fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_8_BANK); ++ break; ++ case 2: /* 16 banks */ ++ fb_format |= EVERGREEN_GRPH_NUM_BANKS(EVERGREEN_ADDR_SURF_16_BANK); ++ break; ++ } + } + + fb_format |= EVERGREEN_GRPH_ARRAY_MODE(EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1); +- +- evergreen_tiling_fields(tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); + fb_format |= EVERGREEN_GRPH_TILE_SPLIT(tile_split); + fb_format |= EVERGREEN_GRPH_BANK_WIDTH(bankw); + fb_format |= EVERGREEN_GRPH_BANK_HEIGHT(bankh); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0024-drm-radeon-don-t-power-gate-paused-UVD-streams.patch b/common/recipes-kernel/linux/linux-amd/0024-drm-radeon-don-t-power-gate-paused-UVD-streams.patch new file mode 100644 index 00000000..363d8a6a --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0024-drm-radeon-don-t-power-gate-paused-UVD-streams.patch @@ -0,0 +1,45 @@ +From 4710e465b8fe77a80c66a90a8e77aed01f5408ab Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Fri, 10 Jan 2014 06:59:40 -0700 +Subject: [PATCH 24/60] drm/radeon: don't power gate paused UVD streams +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_pm.c | 4 ++++ + drivers/gpu/drm/radeon/radeon_uvd.c | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c +index 10fc977..1b10e2e 100644 +--- a/drivers/gpu/drm/radeon/radeon_pm.c ++++ b/drivers/gpu/drm/radeon/radeon_pm.c +@@ -938,6 +938,10 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) + + if (rdev->asic->dpm.powergate_uvd) { + mutex_lock(&rdev->pm.mutex); ++ /* don't powergate anything if we ++ have active but pause streams */ ++ enable |= rdev->pm.dpm.sd > 0; ++ enable |= rdev->pm.dpm.hd > 0; + /* enable/disable UVD */ + radeon_dpm_powergate_uvd(rdev, !enable); + mutex_unlock(&rdev->pm.mutex); +diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c +index a21a6af..839a0f9 100644 +--- a/drivers/gpu/drm/radeon/radeon_uvd.c ++++ b/drivers/gpu/drm/radeon/radeon_uvd.c +@@ -789,6 +789,8 @@ static void radeon_uvd_idle_work_handler(struct work_struct *work) + + if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { ++ radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, ++ &rdev->pm.dpm.hd); + radeon_dpm_enable_uvd(rdev, false); + } else { + radeon_set_uvd_clocks(rdev, 0, 0); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0025-drm-radeon-dpm-retain-user-selected-performance-leve.patch b/common/recipes-kernel/linux/linux-amd/0025-drm-radeon-dpm-retain-user-selected-performance-leve.patch new file mode 100644 index 00000000..866252cf --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0025-drm-radeon-dpm-retain-user-selected-performance-leve.patch @@ -0,0 +1,45 @@ +From 5c2a86555ecf9da30f409b504fb7c823bd5e9551 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Wed, 23 Oct 2013 21:31:42 +0000 +Subject: [PATCH 25/60] drm/radeon/dpm: retain user selected performance level + across state changes + +If the user has forced the state high or low, retain that preference +even when we switch power states. + +Fixes: +https://bugs.freedesktop.org/show_bug.cgi?id=70654 + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon_pm.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c +index 1b10e2e..58132a5 100644 +--- a/drivers/gpu/drm/radeon/radeon_pm.c ++++ b/drivers/gpu/drm/radeon/radeon_pm.c +@@ -918,12 +918,16 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) + radeon_dpm_post_set_power_state(rdev); + + if (rdev->asic->dpm.force_performance_level) { +- if (rdev->pm.dpm.thermal_active) ++ if (rdev->pm.dpm.thermal_active) { ++ enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; + /* force low perf level for thermal */ + radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_LOW); +- else +- /* otherwise, enable auto */ +- radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); ++ /* save the user's level */ ++ rdev->pm.dpm.forced_level = level; ++ } else { ++ /* otherwise, user selected level */ ++ radeon_dpm_force_performance_level(rdev, rdev->pm.dpm.forced_level); ++ } + } + + done: +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0026-drm-radeon-remove-generic-rptr-wptr-functions-v2.patch b/common/recipes-kernel/linux/linux-amd/0026-drm-radeon-remove-generic-rptr-wptr-functions-v2.patch new file mode 100644 index 00000000..91f97cc2 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0026-drm-radeon-remove-generic-rptr-wptr-functions-v2.patch @@ -0,0 +1,1049 @@ +From 84f74653d13d86b530b43982a9bdfecf257d367e Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Tue, 10 Dec 2013 00:44:30 +0000 +Subject: [PATCH 26/60] drm/radeon: remove generic rptr/wptr functions (v2) + +Fill in asic family specific versions rather than +using the generic version. This lets us handle asic +specific differences more easily. In this case, we +disable sw swapping of the rtpr writeback value on +r6xx+ since the hw does it for us. Fixes bogus +rptr readback on BE systems. + +v2: remove missed cpu_to_le32(), add comments + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 58 ++++++++++++++++++++---------- + drivers/gpu/drm/radeon/cik_sdma.c | 69 ++++++++++++++++++++++++++++++++++++ + drivers/gpu/drm/radeon/evergreen.c | 3 -- + drivers/gpu/drm/radeon/ni.c | 69 +++++++++++++++++++++++++++++++----- + drivers/gpu/drm/radeon/ni_dma.c | 69 ++++++++++++++++++++++++++++++++++++ + drivers/gpu/drm/radeon/r100.c | 31 +++++++++++++++- + drivers/gpu/drm/radeon/r600.c | 32 +++++++++++++++-- + drivers/gpu/drm/radeon/r600_dma.c | 13 +++++-- + drivers/gpu/drm/radeon/radeon.h | 4 +-- + drivers/gpu/drm/radeon/radeon_asic.c | 66 +++++++++++++++++----------------- + drivers/gpu/drm/radeon/radeon_asic.h | 57 ++++++++++++++++++++++------- + drivers/gpu/drm/radeon/radeon_ring.c | 44 +++-------------------- + drivers/gpu/drm/radeon/rv770.c | 3 -- + drivers/gpu/drm/radeon/si.c | 8 ----- + 14 files changed, 392 insertions(+), 134 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index 17bbdfd..cef0bbe 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -3462,15 +3462,43 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev) + return 0; + } + +-u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, +- struct radeon_ring *ring) ++u32 cik_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) + { + u32 rptr; + ++ if (rdev->wb.enabled) ++ rptr = rdev->wb.wb[ring->rptr_offs/4]; ++ else ++ rptr = RREG32(CP_RB0_RPTR); ++ ++ return rptr; ++} + ++u32 cik_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 wptr; ++ ++ wptr = RREG32(CP_RB0_WPTR); ++ ++ return wptr; ++} ++ ++void cik_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ WREG32(CP_RB0_WPTR, ring->wptr); ++ (void)RREG32(CP_RB0_WPTR); ++} ++ ++u32 cik_compute_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 rptr; + + if (rdev->wb.enabled) { +- rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); ++ rptr = rdev->wb.wb[ring->rptr_offs/4]; + } else { + mutex_lock(&rdev->srbm_mutex); + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); +@@ -3482,13 +3510,14 @@ u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, + return rptr; + } + +-u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring) ++u32 cik_compute_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) + { + u32 wptr; + + if (rdev->wb.enabled) { +- wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); ++ /* XXX check if swapping is necessary on BE */ ++ wptr = rdev->wb.wb[ring->wptr_offs/4]; + } else { + mutex_lock(&rdev->srbm_mutex); + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); +@@ -3500,10 +3529,11 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, + return wptr; + } + +-void cik_compute_ring_set_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring) ++void cik_compute_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) + { +- rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); ++ /* XXX check if swapping is necessary on BE */ ++ rdev->wb.wb[ring->wptr_offs/4] = ring->wptr; + WDOORBELL32(ring->doorbell_offset, ring->wptr); + } + +@@ -7121,8 +7151,7 @@ static int cik_startup(struct radeon_device *rdev) + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, +- CP_RB0_RPTR, CP_RB0_WPTR, +- RADEON_CP_PACKET2); ++ PACKET3(PACKET3_NOP, 0x3FFF)); + if (r) + return r; + +@@ -7130,7 +7159,6 @@ static int cik_startup(struct radeon_device *rdev) + /* type-2 packets are deprecated on MEC, use type-3 instead */ + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, +- CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, + PACKET3(PACKET3_NOP, 0x3FFF)); + if (r) + return r; +@@ -7142,7 +7170,6 @@ static int cik_startup(struct radeon_device *rdev) + /* type-2 packets are deprecated on MEC, use type-3 instead */ + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, +- CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, + PACKET3(PACKET3_NOP, 0x3FFF)); + if (r) + return r; +@@ -7154,16 +7181,12 @@ static int cik_startup(struct radeon_device *rdev) + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, +- SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, +- SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, +- SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, +- SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + if (r) + return r; +@@ -7179,7 +7202,6 @@ static int cik_startup(struct radeon_device *rdev) + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, +- UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); +diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c +index 42abd1e..a34176b 100644 +--- a/drivers/gpu/drm/radeon/cik_sdma.c ++++ b/drivers/gpu/drm/radeon/cik_sdma.c +@@ -60,6 +60,75 @@ static void cik_sdma_hdp_flush(struct radeon_device *rdev, + } + + /** ++ * cik_sdma_get_rptr - get the current read pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon ring pointer ++ * ++ * Get the current rptr from the hardware (CIK+). ++ */ ++uint32_t cik_sdma_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 rptr, reg; ++ ++ if (rdev->wb.enabled) { ++ rptr = rdev->wb.wb[ring->rptr_offs/4]; ++ } else { ++ if (ring->idx == R600_RING_TYPE_DMA_INDEX) ++ reg = SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET; ++ else ++ reg = SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET; ++ ++ rptr = RREG32(reg); ++ } ++ ++ return (rptr & 0x3fffc) >> 2; ++} ++ ++/** ++ * cik_sdma_get_wptr - get the current write pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon ring pointer ++ * ++ * Get the current wptr from the hardware (CIK+). ++ */ ++uint32_t cik_sdma_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 reg; ++ ++ if (ring->idx == R600_RING_TYPE_DMA_INDEX) ++ reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET; ++ else ++ reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET; ++ ++ return (RREG32(reg) & 0x3fffc) >> 2; ++} ++ ++/** ++ * cik_sdma_set_wptr - commit the write pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon ring pointer ++ * ++ * Write the wptr back to the hardware (CIK+). ++ */ ++void cik_sdma_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 reg; ++ ++ if (ring->idx == R600_RING_TYPE_DMA_INDEX) ++ reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET; ++ else ++ reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET; ++ ++ WREG32(reg, (ring->wptr << 2) & 0x3fffc); ++} ++ ++/** + * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer +diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c +index 20b00a0..5829318 100644 +--- a/drivers/gpu/drm/radeon/evergreen.c ++++ b/drivers/gpu/drm/radeon/evergreen.c +@@ -5154,14 +5154,12 @@ static int evergreen_startup(struct radeon_device *rdev) + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, +- R600_CP_RB_RPTR, R600_CP_RB_WPTR, + RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, +- DMA_RB_RPTR, DMA_RB_WPTR, + DMA_PACKET(DMA_PACKET_NOP, 0, 0)); + if (r) + return r; +@@ -5179,7 +5177,6 @@ static int evergreen_startup(struct radeon_device *rdev) + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, +- UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); +diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c +index 4317e57..e059b89 100644 +--- a/drivers/gpu/drm/radeon/ni.c ++++ b/drivers/gpu/drm/radeon/ni.c +@@ -1399,6 +1399,55 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable) + } + } + ++u32 cayman_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 rptr; ++ ++ if (rdev->wb.enabled) ++ rptr = rdev->wb.wb[ring->rptr_offs/4]; ++ else { ++ if (ring->idx == RADEON_RING_TYPE_GFX_INDEX) ++ rptr = RREG32(CP_RB0_RPTR); ++ else if (ring->idx == CAYMAN_RING_TYPE_CP1_INDEX) ++ rptr = RREG32(CP_RB1_RPTR); ++ else ++ rptr = RREG32(CP_RB2_RPTR); ++ } ++ ++ return rptr; ++} ++ ++u32 cayman_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 wptr; ++ ++ if (ring->idx == RADEON_RING_TYPE_GFX_INDEX) ++ wptr = RREG32(CP_RB0_WPTR); ++ else if (ring->idx == CAYMAN_RING_TYPE_CP1_INDEX) ++ wptr = RREG32(CP_RB1_WPTR); ++ else ++ wptr = RREG32(CP_RB2_WPTR); ++ ++ return wptr; ++} ++ ++void cayman_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ if (ring->idx == RADEON_RING_TYPE_GFX_INDEX) { ++ WREG32(CP_RB0_WPTR, ring->wptr); ++ (void)RREG32(CP_RB0_WPTR); ++ } else if (ring->idx == CAYMAN_RING_TYPE_CP1_INDEX) { ++ WREG32(CP_RB1_WPTR, ring->wptr); ++ (void)RREG32(CP_RB1_WPTR); ++ } else { ++ WREG32(CP_RB2_WPTR, ring->wptr); ++ (void)RREG32(CP_RB2_WPTR); ++ } ++} ++ + static int cayman_cp_load_microcode(struct radeon_device *rdev) + { + const __be32 *fw_data; +@@ -1527,6 +1576,16 @@ static int cayman_cp_resume(struct radeon_device *rdev) + CP_RB1_BASE, + CP_RB2_BASE + }; ++ static const unsigned cp_rb_rptr[] = { ++ CP_RB0_RPTR, ++ CP_RB1_RPTR, ++ CP_RB2_RPTR ++ }; ++ static const unsigned cp_rb_wptr[] = { ++ CP_RB0_WPTR, ++ CP_RB1_WPTR, ++ CP_RB2_WPTR ++ }; + struct radeon_ring *ring; + int i, r; + +@@ -1585,8 +1644,8 @@ static int cayman_cp_resume(struct radeon_device *rdev) + WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA); + + ring->rptr = ring->wptr = 0; +- WREG32(ring->rptr_reg, ring->rptr); +- WREG32(ring->wptr_reg, ring->wptr); ++ WREG32(cp_rb_rptr[i], ring->rptr); ++ WREG32(cp_rb_wptr[i], ring->wptr); + + mdelay(1); + WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA); +@@ -1966,23 +2025,18 @@ static int cayman_startup(struct radeon_device *rdev) + evergreen_irq_set(rdev); + + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, +- CP_RB0_RPTR, CP_RB0_WPTR, + RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, +- DMA_RB_RPTR + DMA0_REGISTER_OFFSET, +- DMA_RB_WPTR + DMA0_REGISTER_OFFSET, + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, +- DMA_RB_RPTR + DMA1_REGISTER_OFFSET, +- DMA_RB_WPTR + DMA1_REGISTER_OFFSET, + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; +@@ -2001,7 +2055,6 @@ static int cayman_startup(struct radeon_device *rdev) + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, +- UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); +diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c +index ddc946b..b0f44f3 100644 +--- a/drivers/gpu/drm/radeon/ni_dma.c ++++ b/drivers/gpu/drm/radeon/ni_dma.c +@@ -43,6 +43,75 @@ u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); + */ + + /** ++ * cayman_dma_get_rptr - get the current read pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon ring pointer ++ * ++ * Get the current rptr from the hardware (cayman+). ++ */ ++uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 rptr, reg; ++ ++ if (rdev->wb.enabled) { ++ rptr = rdev->wb.wb[ring->rptr_offs/4]; ++ } else { ++ if (ring->idx == R600_RING_TYPE_DMA_INDEX) ++ reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; ++ else ++ reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; ++ ++ rptr = RREG32(reg); ++ } ++ ++ return (rptr & 0x3fffc) >> 2; ++} ++ ++/** ++ * cayman_dma_get_wptr - get the current write pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon ring pointer ++ * ++ * Get the current wptr from the hardware (cayman+). ++ */ ++uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 reg; ++ ++ if (ring->idx == R600_RING_TYPE_DMA_INDEX) ++ reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; ++ else ++ reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; ++ ++ return (RREG32(reg) & 0x3fffc) >> 2; ++} ++ ++/** ++ * cayman_dma_set_wptr - commit the write pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon ring pointer ++ * ++ * Write the wptr back to the hardware (cayman+). ++ */ ++void cayman_dma_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 reg; ++ ++ if (ring->idx == R600_RING_TYPE_DMA_INDEX) ++ reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; ++ else ++ reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; ++ ++ WREG32(reg, (ring->wptr << 2) & 0x3fffc); ++} ++ ++/** + * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer +diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c +index 16997d3..12645a3 100644 +--- a/drivers/gpu/drm/radeon/r100.c ++++ b/drivers/gpu/drm/radeon/r100.c +@@ -1050,6 +1050,36 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) + return err; + } + ++u32 r100_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 rptr; ++ ++ if (rdev->wb.enabled) ++ rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); ++ else ++ rptr = RREG32(RADEON_CP_RB_RPTR); ++ ++ return rptr; ++} ++ ++u32 r100_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 wptr; ++ ++ wptr = RREG32(RADEON_CP_RB_WPTR); ++ ++ return wptr; ++} ++ ++void r100_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ WREG32(RADEON_CP_RB_WPTR, ring->wptr); ++ (void)RREG32(RADEON_CP_RB_WPTR); ++} ++ + static void r100_cp_load_microcode(struct radeon_device *rdev) + { + const __be32 *fw_data; +@@ -1102,7 +1132,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) + ring_size = (1 << (rb_bufsz + 1)) * 4; + r100_cp_load_microcode(rdev); + r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET, +- RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR, + RADEON_CP_PACKET2); + if (r) { + return r; +diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c +index 9d101a4..6bb944b 100644 +--- a/drivers/gpu/drm/radeon/r600.c ++++ b/drivers/gpu/drm/radeon/r600.c +@@ -2329,6 +2329,36 @@ out: + return err; + } + ++u32 r600_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 rptr; ++ ++ if (rdev->wb.enabled) ++ rptr = rdev->wb.wb[ring->rptr_offs/4]; ++ else ++ rptr = RREG32(R600_CP_RB_RPTR); ++ ++ return rptr; ++} ++ ++u32 r600_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ u32 wptr; ++ ++ wptr = RREG32(R600_CP_RB_WPTR); ++ ++ return wptr; ++} ++ ++void r600_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ WREG32(R600_CP_RB_WPTR, ring->wptr); ++ (void)RREG32(R600_CP_RB_WPTR); ++} ++ + static int r600_cp_load_microcode(struct radeon_device *rdev) + { + const __be32 *fw_data; +@@ -2766,14 +2796,12 @@ static int r600_startup(struct radeon_device *rdev) + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, +- R600_CP_RB_RPTR, R600_CP_RB_WPTR, + RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, +- DMA_RB_RPTR, DMA_RB_WPTR, + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; +diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c +index 616d37a..ee68579 100644 +--- a/drivers/gpu/drm/radeon/r600_dma.c ++++ b/drivers/gpu/drm/radeon/r600_dma.c +@@ -51,7 +51,14 @@ u32 r600_gpu_check_soft_reset(struct radeon_device *rdev); + uint32_t r600_dma_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) + { +- return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2; ++ u32 rptr; ++ ++ if (rdev->wb.enabled) ++ rptr = rdev->wb.wb[ring->rptr_offs/4]; ++ else ++ rptr = RREG32(DMA_RB_RPTR); ++ ++ return (rptr & 0x3fffc) >> 2; + } + + /** +@@ -65,7 +72,7 @@ uint32_t r600_dma_get_rptr(struct radeon_device *rdev, + uint32_t r600_dma_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) + { +- return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2; ++ return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2; + } + + /** +@@ -79,7 +86,7 @@ uint32_t r600_dma_get_wptr(struct radeon_device *rdev, + void r600_dma_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) + { +- WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc); ++ WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc); + } + + /** +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index af893e3..aa1ffa9 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -788,13 +788,11 @@ struct radeon_ring { + volatile uint32_t *ring; + unsigned rptr; + unsigned rptr_offs; +- unsigned rptr_reg; + unsigned rptr_save_reg; + u64 next_rptr_gpu_addr; + volatile u32 *next_rptr_cpu_addr; + unsigned wptr; + unsigned wptr_old; +- unsigned wptr_reg; + unsigned ring_size; + unsigned ring_free_dw; + int count_dw; +@@ -966,7 +964,7 @@ unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring + int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned size, uint32_t *data); + int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size, +- unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop); ++ unsigned rptr_offs, u32 nop); + void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); + + +diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c +index 123adfe..a539869 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.c ++++ b/drivers/gpu/drm/radeon/radeon_asic.c +@@ -182,9 +182,9 @@ static struct radeon_asic_ring r100_gfx_ring = { + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, +- .get_rptr = &radeon_ring_generic_get_rptr, +- .get_wptr = &radeon_ring_generic_get_wptr, +- .set_wptr = &radeon_ring_generic_set_wptr, ++ .get_rptr = &r100_gfx_get_rptr, ++ .get_wptr = &r100_gfx_get_wptr, ++ .set_wptr = &r100_gfx_set_wptr, + }; + + static struct radeon_asic r100_asic = { +@@ -330,9 +330,9 @@ static struct radeon_asic_ring r300_gfx_ring = { + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, +- .get_rptr = &radeon_ring_generic_get_rptr, +- .get_wptr = &radeon_ring_generic_get_wptr, +- .set_wptr = &radeon_ring_generic_set_wptr, ++ .get_rptr = &r100_gfx_get_rptr, ++ .get_wptr = &r100_gfx_get_wptr, ++ .set_wptr = &r100_gfx_set_wptr, + }; + + static struct radeon_asic r300_asic = { +@@ -883,9 +883,9 @@ static struct radeon_asic_ring r600_gfx_ring = { + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + .is_lockup = &r600_gfx_is_lockup, +- .get_rptr = &radeon_ring_generic_get_rptr, +- .get_wptr = &radeon_ring_generic_get_wptr, +- .set_wptr = &radeon_ring_generic_set_wptr, ++ .get_rptr = &r600_gfx_get_rptr, ++ .get_wptr = &r600_gfx_get_wptr, ++ .set_wptr = &r600_gfx_set_wptr, + }; + + static struct radeon_asic_ring r600_dma_ring = { +@@ -1267,9 +1267,9 @@ static struct radeon_asic_ring evergreen_gfx_ring = { + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + .is_lockup = &evergreen_gfx_is_lockup, +- .get_rptr = &radeon_ring_generic_get_rptr, +- .get_wptr = &radeon_ring_generic_get_wptr, +- .set_wptr = &radeon_ring_generic_set_wptr, ++ .get_rptr = &r600_gfx_get_rptr, ++ .get_wptr = &r600_gfx_get_wptr, ++ .set_wptr = &r600_gfx_set_wptr, + }; + + static struct radeon_asic_ring evergreen_dma_ring = { +@@ -1570,9 +1570,9 @@ static struct radeon_asic_ring cayman_gfx_ring = { + .ib_test = &r600_ib_test, + .is_lockup = &cayman_gfx_is_lockup, + .vm_flush = &cayman_vm_flush, +- .get_rptr = &radeon_ring_generic_get_rptr, +- .get_wptr = &radeon_ring_generic_get_wptr, +- .set_wptr = &radeon_ring_generic_set_wptr, ++ .get_rptr = &cayman_gfx_get_rptr, ++ .get_wptr = &cayman_gfx_get_wptr, ++ .set_wptr = &cayman_gfx_set_wptr, + }; + + static struct radeon_asic_ring cayman_dma_ring = { +@@ -1585,9 +1585,9 @@ static struct radeon_asic_ring cayman_dma_ring = { + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &cayman_dma_vm_flush, +- .get_rptr = &r600_dma_get_rptr, +- .get_wptr = &r600_dma_get_wptr, +- .set_wptr = &r600_dma_set_wptr ++ .get_rptr = &cayman_dma_get_rptr, ++ .get_wptr = &cayman_dma_get_wptr, ++ .set_wptr = &cayman_dma_set_wptr + }; + + static struct radeon_asic_ring cayman_uvd_ring = { +@@ -1813,9 +1813,9 @@ static struct radeon_asic_ring si_gfx_ring = { + .ib_test = &r600_ib_test, + .is_lockup = &si_gfx_is_lockup, + .vm_flush = &si_vm_flush, +- .get_rptr = &radeon_ring_generic_get_rptr, +- .get_wptr = &radeon_ring_generic_get_wptr, +- .set_wptr = &radeon_ring_generic_set_wptr, ++ .get_rptr = &cayman_gfx_get_rptr, ++ .get_wptr = &cayman_gfx_get_wptr, ++ .set_wptr = &cayman_gfx_set_wptr, + }; + + static struct radeon_asic_ring si_dma_ring = { +@@ -1828,9 +1828,9 @@ static struct radeon_asic_ring si_dma_ring = { + .ib_test = &r600_dma_ib_test, + .is_lockup = &si_dma_is_lockup, + .vm_flush = &si_dma_vm_flush, +- .get_rptr = &r600_dma_get_rptr, +- .get_wptr = &r600_dma_get_wptr, +- .set_wptr = &r600_dma_set_wptr, ++ .get_rptr = &cayman_dma_get_rptr, ++ .get_wptr = &cayman_dma_get_wptr, ++ .set_wptr = &cayman_dma_set_wptr, + }; + + static struct radeon_asic si_asic = { +@@ -1943,9 +1943,9 @@ static struct radeon_asic_ring ci_gfx_ring = { + .ib_test = &cik_ib_test, + .is_lockup = &cik_gfx_is_lockup, + .vm_flush = &cik_vm_flush, +- .get_rptr = &radeon_ring_generic_get_rptr, +- .get_wptr = &radeon_ring_generic_get_wptr, +- .set_wptr = &radeon_ring_generic_set_wptr, ++ .get_rptr = &cik_gfx_get_rptr, ++ .get_wptr = &cik_gfx_get_wptr, ++ .set_wptr = &cik_gfx_set_wptr, + }; + + static struct radeon_asic_ring ci_cp_ring = { +@@ -1958,9 +1958,9 @@ static struct radeon_asic_ring ci_cp_ring = { + .ib_test = &cik_ib_test, + .is_lockup = &cik_gfx_is_lockup, + .vm_flush = &cik_vm_flush, +- .get_rptr = &cik_compute_ring_get_rptr, +- .get_wptr = &cik_compute_ring_get_wptr, +- .set_wptr = &cik_compute_ring_set_wptr, ++ .get_rptr = &cik_compute_get_rptr, ++ .get_wptr = &cik_compute_get_wptr, ++ .set_wptr = &cik_compute_set_wptr, + }; + + static struct radeon_asic_ring ci_dma_ring = { +@@ -1973,9 +1973,9 @@ static struct radeon_asic_ring ci_dma_ring = { + .ib_test = &cik_sdma_ib_test, + .is_lockup = &cik_sdma_is_lockup, + .vm_flush = &cik_dma_vm_flush, +- .get_rptr = &r600_dma_get_rptr, +- .get_wptr = &r600_dma_get_wptr, +- .set_wptr = &r600_dma_set_wptr, ++ .get_rptr = &cik_sdma_get_rptr, ++ .get_wptr = &cik_sdma_get_wptr, ++ .set_wptr = &cik_sdma_set_wptr, + }; + + static struct radeon_asic ci_asic = { +diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h +index 8939cb3..998042e 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.h ++++ b/drivers/gpu/drm/radeon/radeon_asic.h +@@ -47,13 +47,6 @@ u8 atombios_get_backlight_level(struct radeon_encoder *radeon_encoder); + void radeon_legacy_set_backlight_level(struct radeon_encoder *radeon_encoder, u8 level); + u8 radeon_legacy_get_backlight_level(struct radeon_encoder *radeon_encoder); + +-u32 radeon_ring_generic_get_rptr(struct radeon_device *rdev, +- struct radeon_ring *ring); +-u32 radeon_ring_generic_get_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring); +-void radeon_ring_generic_set_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring); +- + /* + * r100,rv100,rs100,rv200,rs200 + */ +@@ -148,6 +141,13 @@ extern void r100_post_page_flip(struct radeon_device *rdev, int crtc); + extern void r100_wait_for_vblank(struct radeon_device *rdev, int crtc); + extern int r100_mc_wait_for_idle(struct radeon_device *rdev); + ++u32 r100_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 r100_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void r100_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++ + /* + * r200,rv250,rs300,rv280 + */ +@@ -368,6 +368,12 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev); + int r600_pcie_gart_init(struct radeon_device *rdev); + void r600_scratch_init(struct radeon_device *rdev); + int r600_init_microcode(struct radeon_device *rdev); ++u32 r600_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 r600_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void r600_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); + /* r600 irq */ + int r600_irq_process(struct radeon_device *rdev); + int r600_irq_init(struct radeon_device *rdev); +@@ -591,6 +597,19 @@ void cayman_dma_vm_set_page(struct radeon_device *rdev, + + void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + ++u32 cayman_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 cayman_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void cayman_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void cayman_dma_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++ + int ni_dpm_init(struct radeon_device *rdev); + void ni_dpm_setup_asic(struct radeon_device *rdev); + int ni_dpm_enable(struct radeon_device *rdev); +@@ -735,12 +754,24 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, + uint32_t incr, uint32_t flags); + void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); +-u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, +- struct radeon_ring *ring); +-u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring); +-void cik_compute_ring_set_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring); ++u32 cik_gfx_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 cik_gfx_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void cik_gfx_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 cik_compute_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 cik_compute_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void cik_compute_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 cik_sdma_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++u32 cik_sdma_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void cik_sdma_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); + int ci_get_temp(struct radeon_device *rdev); + int kv_get_temp(struct radeon_device *rdev); + +diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c +index f1cec22..65f1cea 100644 +--- a/drivers/gpu/drm/radeon/radeon_ring.c ++++ b/drivers/gpu/drm/radeon/radeon_ring.c +@@ -332,36 +332,6 @@ bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, + } + } + +-u32 radeon_ring_generic_get_rptr(struct radeon_device *rdev, +- struct radeon_ring *ring) +-{ +- u32 rptr; +- +- if (rdev->wb.enabled) +- rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); +- else +- rptr = RREG32(ring->rptr_reg); +- +- return rptr; +-} +- +-u32 radeon_ring_generic_get_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring) +-{ +- u32 wptr; +- +- wptr = RREG32(ring->wptr_reg); +- +- return wptr; +-} +- +-void radeon_ring_generic_set_wptr(struct radeon_device *rdev, +- struct radeon_ring *ring) +-{ +- WREG32(ring->wptr_reg, ring->wptr); +- (void)RREG32(ring->wptr_reg); +-} +- + /** + * radeon_ring_free_size - update the free size + * +@@ -689,22 +659,18 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, + * @ring: radeon_ring structure holding ring information + * @ring_size: size of the ring + * @rptr_offs: offset of the rptr writeback location in the WB buffer +- * @rptr_reg: MMIO offset of the rptr register +- * @wptr_reg: MMIO offset of the wptr register + * @nop: nop packet for this ring + * + * Initialize the driver information for the selected ring (all asics). + * Returns 0 on success, error on failure. + */ + int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size, +- unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop) ++ unsigned rptr_offs, u32 nop) + { + int r; + + ring->ring_size = ring_size; + ring->rptr_offs = rptr_offs; +- ring->rptr_reg = rptr_reg; +- ring->wptr_reg = wptr_reg; + ring->nop = nop; + /* Allocate ring buffer */ + if (ring->ring_obj == NULL) { +@@ -798,12 +764,12 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) + count = (ring->ring_size / 4) - ring->ring_free_dw; + + wptr = radeon_ring_get_wptr(rdev, ring); +- seq_printf(m, "wptr(0x%04x): 0x%08x [%5d]\n", +- ring->wptr_reg, wptr, wptr); ++ seq_printf(m, "wptr: 0x%08x [%5d]\n", ++ wptr, wptr); + + rptr = radeon_ring_get_rptr(rdev, ring); +- seq_printf(m, "rptr(0x%04x): 0x%08x [%5d]\n", +- ring->rptr_reg, rptr, rptr); ++ seq_printf(m, "rptr: 0x%08x [%5d]\n", ++ rptr, rptr); + + if (ring->rptr_save_reg) { + rptr_next = RREG32(ring->rptr_save_reg); +diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c +index c4960ad..34c2217 100644 +--- a/drivers/gpu/drm/radeon/rv770.c ++++ b/drivers/gpu/drm/radeon/rv770.c +@@ -1720,14 +1720,12 @@ static int rv770_startup(struct radeon_device *rdev) + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, +- R600_CP_RB_RPTR, R600_CP_RB_WPTR, + RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, +- DMA_RB_RPTR, DMA_RB_WPTR, + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; +@@ -1746,7 +1744,6 @@ static int rv770_startup(struct radeon_device *rdev) + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, +- UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); +diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c +index 48ad0ae..f2586cb 100644 +--- a/drivers/gpu/drm/radeon/si.c ++++ b/drivers/gpu/drm/radeon/si.c +@@ -6448,37 +6448,30 @@ static int si_startup(struct radeon_device *rdev) + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, +- CP_RB0_RPTR, CP_RB0_WPTR, + RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, +- CP_RB1_RPTR, CP_RB1_WPTR, + RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, +- CP_RB2_RPTR, CP_RB2_WPTR, + RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, +- DMA_RB_RPTR + DMA0_REGISTER_OFFSET, +- DMA_RB_WPTR + DMA0_REGISTER_OFFSET, + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, +- DMA_RB_RPTR + DMA1_REGISTER_OFFSET, +- DMA_RB_WPTR + DMA1_REGISTER_OFFSET, + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + if (r) + return r; +@@ -6498,7 +6491,6 @@ static int si_startup(struct radeon_device *rdev) + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, +- UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0027-drm-radeon-initial-VCE-support-v4.patch b/common/recipes-kernel/linux/linux-amd/0027-drm-radeon-initial-VCE-support-v4.patch new file mode 100644 index 00000000..00996021 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0027-drm-radeon-initial-VCE-support-v4.patch @@ -0,0 +1,1434 @@ +From 769bdc09213b2d223c39eb5f8df2c741fca88321 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Thu, 23 May 2013 12:10:04 +0200 +Subject: [PATCH 27/60] drm/radeon: initial VCE support v4 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Only VCE 2.0 support so far. + +v2: squashing multiple patches into this one +v3: add IRQ support for CIK, major cleanups, + basic code documentation +v4: remove HAINAN from chipset list + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/Makefile | 6 + + drivers/gpu/drm/radeon/cik.c | 60 ++++ + drivers/gpu/drm/radeon/cikd.h | 33 ++ + drivers/gpu/drm/radeon/radeon.h | 56 +++- + drivers/gpu/drm/radeon/radeon_asic.c | 17 + + drivers/gpu/drm/radeon/radeon_asic.h | 13 + + drivers/gpu/drm/radeon/radeon_cs.c | 4 + + drivers/gpu/drm/radeon/radeon_kms.c | 1 + + drivers/gpu/drm/radeon/radeon_ring.c | 4 + + drivers/gpu/drm/radeon/radeon_test.c | 39 ++- + drivers/gpu/drm/radeon/radeon_vce.c | 588 +++++++++++++++++++++++++++++++++++ + drivers/gpu/drm/radeon/sid.h | 47 +++ + drivers/gpu/drm/radeon/vce_v1_0.c | 187 +++++++++++ + drivers/gpu/drm/radeon/vce_v2_0.c | 70 +++++ + include/uapi/drm/radeon_drm.h | 1 + + 15 files changed, 1117 insertions(+), 9 deletions(-) + create mode 100644 drivers/gpu/drm/radeon/radeon_vce.c + create mode 100644 drivers/gpu/drm/radeon/vce_v1_0.c + create mode 100644 drivers/gpu/drm/radeon/vce_v2_0.c + +diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile +index 306364a..ed60caa 100644 +--- a/drivers/gpu/drm/radeon/Makefile ++++ b/drivers/gpu/drm/radeon/Makefile +@@ -99,6 +99,12 @@ radeon-y += \ + uvd_v3_1.o \ + uvd_v4_2.o + ++# add VCE block ++radeon-y += \ ++ radeon_vce.o \ ++ vce_v1_0.o \ ++ vce_v2_0.o \ ++ + radeon-$(CONFIG_COMPAT) += radeon_ioc32.o + radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o + radeon-$(CONFIG_ACPI) += radeon_acpi.o +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index cef0bbe..e256340 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -6835,6 +6835,20 @@ restart_ih: + /* reset addr and status */ + WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); + break; ++ case 167: /* VCE */ ++ DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data); ++ switch (src_data) { ++ case 0: ++ radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX); ++ break; ++ case 1: ++ radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX); ++ break; ++ default: ++ DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); ++ break; ++ } ++ break; + case 176: /* GFX RB CP_INT */ + case 177: /* GFX IB CP_INT */ + radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); +@@ -7134,6 +7148,22 @@ static int cik_startup(struct radeon_device *rdev) + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + ++ r = radeon_vce_resume(rdev); ++ if (!r) { ++ r = vce_v2_0_resume(rdev); ++ if (!r) ++ r = radeon_fence_driver_start_ring(rdev, ++ TN_RING_TYPE_VCE1_INDEX); ++ if (!r) ++ r = radeon_fence_driver_start_ring(rdev, ++ TN_RING_TYPE_VCE2_INDEX); ++ } ++ if (r) { ++ dev_err(rdev->dev, "VCE init error (%d).\n", r); ++ rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; ++ rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; ++ } ++ + /* Enable IRQ */ + if (!rdev->irq.installed) { + r = radeon_irq_kms_init(rdev); +@@ -7209,6 +7239,23 @@ static int cik_startup(struct radeon_device *rdev) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } + ++ r = -ENOENT; ++ ++ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; ++ if (ring->ring_size) ++ r = radeon_ring_init(rdev, ring, ring->ring_size, 0, ++ VCE_CMD_NO_OP); ++ ++ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; ++ if (ring->ring_size) ++ r = radeon_ring_init(rdev, ring, ring->ring_size, 0, ++ VCE_CMD_NO_OP); ++ ++ if (!r) ++ r = vce_v1_0_init(rdev); ++ else if (r != -ENOENT) ++ DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); ++ + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); +@@ -7276,6 +7323,7 @@ int cik_suspend(struct radeon_device *rdev) + cik_sdma_enable(rdev, false); + uvd_v1_0_fini(rdev); + radeon_uvd_suspend(rdev); ++ radeon_vce_suspend(rdev); + cik_fini_pg(rdev); + cik_fini_cg(rdev); + cik_irq_suspend(rdev); +@@ -7405,6 +7453,17 @@ int cik_init(struct radeon_device *rdev) + r600_ring_init(rdev, ring, 4096); + } + ++ r = radeon_vce_init(rdev); ++ if (!r) { ++ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; ++ ring->ring_obj = NULL; ++ r600_ring_init(rdev, ring, 4096); ++ ++ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; ++ ring->ring_obj = NULL; ++ r600_ring_init(rdev, ring, 4096); ++ } ++ + rdev->ih.ring_obj = NULL; + r600_ih_ring_init(rdev, 64 * 1024); + +@@ -7465,6 +7524,7 @@ void cik_fini(struct radeon_device *rdev) + radeon_irq_kms_fini(rdev); + uvd_v1_0_fini(rdev); + radeon_uvd_fini(rdev); ++ radeon_vce_fini(rdev); + cik_pcie_gart_fini(rdev); + r600_vram_scratch_fini(rdev); + radeon_gem_fini(rdev); +diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h +index c4738bc..54eb8be 100644 +--- a/drivers/gpu/drm/radeon/cikd.h ++++ b/drivers/gpu/drm/radeon/cikd.h +@@ -1908,4 +1908,37 @@ + /* UVD CTX indirect */ + #define UVD_CGC_MEM_CTRL 0xC0 + ++/* VCE */ ++ ++#define VCE_VCPU_CACHE_OFFSET0 0x20024 ++#define VCE_VCPU_CACHE_SIZE0 0x20028 ++#define VCE_VCPU_CACHE_OFFSET1 0x2002c ++#define VCE_VCPU_CACHE_SIZE1 0x20030 ++#define VCE_VCPU_CACHE_OFFSET2 0x20034 ++#define VCE_VCPU_CACHE_SIZE2 0x20038 ++#define VCE_RB_RPTR2 0x20178 ++#define VCE_RB_WPTR2 0x2017c ++#define VCE_RB_RPTR 0x2018c ++#define VCE_RB_WPTR 0x20190 ++#define VCE_CLOCK_GATING_A 0x202f8 ++#define VCE_CLOCK_GATING_B 0x202fc ++#define VCE_UENC_CLOCK_GATING 0x207bc ++#define VCE_UENC_REG_CLOCK_GATING 0x207c0 ++#define VCE_SYS_INT_EN 0x21300 ++# define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) ++#define VCE_LMI_CTRL2 0x21474 ++#define VCE_LMI_CTRL 0x21498 ++#define VCE_LMI_VM_CTRL 0x214a0 ++#define VCE_LMI_SWAP_CNTL 0x214b4 ++#define VCE_LMI_SWAP_CNTL1 0x214b8 ++#define VCE_LMI_CACHE_CTRL 0x214f4 ++ ++#define VCE_CMD_NO_OP 0x00000000 ++#define VCE_CMD_END 0x00000001 ++#define VCE_CMD_IB 0x00000002 ++#define VCE_CMD_FENCE 0x00000003 ++#define VCE_CMD_TRAP 0x00000004 ++#define VCE_CMD_IB_AUTO 0x00000005 ++#define VCE_CMD_SEMAPHORE 0x00000006 ++ + #endif +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index aa1ffa9..0abbe5e 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -111,19 +111,16 @@ extern int radeon_aspm; + #define RADEONFB_CONN_LIMIT 4 + #define RADEON_BIOS_NUM_SCRATCH 8 + +-/* max number of rings */ +-#define RADEON_NUM_RINGS 6 +- + /* fence seq are set to this number when signaled */ + #define RADEON_FENCE_SIGNALED_SEQ 0LL + + /* internal ring indices */ + /* r1xx+ has gfx CP ring */ +-#define RADEON_RING_TYPE_GFX_INDEX 0 ++#define RADEON_RING_TYPE_GFX_INDEX 0 + + /* cayman has 2 compute CP rings */ +-#define CAYMAN_RING_TYPE_CP1_INDEX 1 +-#define CAYMAN_RING_TYPE_CP2_INDEX 2 ++#define CAYMAN_RING_TYPE_CP1_INDEX 1 ++#define CAYMAN_RING_TYPE_CP2_INDEX 2 + + /* R600+ has an async dma ring */ + #define R600_RING_TYPE_DMA_INDEX 3 +@@ -131,7 +128,14 @@ extern int radeon_aspm; + #define CAYMAN_RING_TYPE_DMA1_INDEX 4 + + /* R600+ */ +-#define R600_RING_TYPE_UVD_INDEX 5 ++#define R600_RING_TYPE_UVD_INDEX 5 ++ ++/* TN+ */ ++#define TN_RING_TYPE_VCE1_INDEX 6 ++#define TN_RING_TYPE_VCE2_INDEX 7 ++ ++/* max number of rings */ ++#define RADEON_NUM_RINGS 8 + + /* hardcode those limit for now */ + #define RADEON_VA_IB_OFFSET (1 << 20) +@@ -1592,6 +1596,42 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, + int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, + unsigned cg_upll_func_cntl); + ++/* ++ * VCE ++ */ ++#define RADEON_MAX_VCE_HANDLES 16 ++#define RADEON_VCE_STACK_SIZE (1024*1024) ++#define RADEON_VCE_HEAP_SIZE (4*1024*1024) ++ ++struct radeon_vce { ++ struct radeon_bo *vcpu_bo; ++ void *cpu_addr; ++ uint64_t gpu_addr; ++ atomic_t handles[RADEON_MAX_VCE_HANDLES]; ++ struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; ++}; ++ ++int radeon_vce_init(struct radeon_device *rdev); ++void radeon_vce_fini(struct radeon_device *rdev); ++int radeon_vce_suspend(struct radeon_device *rdev); ++int radeon_vce_resume(struct radeon_device *rdev); ++int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, ++ uint32_t handle, struct radeon_fence **fence); ++int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, ++ uint32_t handle, struct radeon_fence **fence); ++void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); ++int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); ++int radeon_vce_cs_parse(struct radeon_cs_parser *p); ++bool radeon_vce_semaphore_emit(struct radeon_device *rdev, ++ struct radeon_ring *ring, ++ struct radeon_semaphore *semaphore, ++ bool emit_wait); ++void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); ++void radeon_vce_fence_emit(struct radeon_device *rdev, ++ struct radeon_fence *fence); ++int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); ++int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); ++ + struct r600_audio_pin { + int channels; + int rate; +@@ -2186,6 +2226,7 @@ struct radeon_device { + struct radeon_gem gem; + struct radeon_pm pm; + struct radeon_uvd uvd; ++ struct radeon_vce vce; + uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; + struct radeon_wb wb; + struct radeon_dummy_page dummy_page; +@@ -2204,6 +2245,7 @@ struct radeon_device { + const struct firmware *sdma_fw; /* CIK SDMA firmware */ + const struct firmware *smc_fw; /* SMC firmware */ + const struct firmware *uvd_fw; /* UVD firmware */ ++ const struct firmware *vce_fw; /* VCE firmware */ + struct r600_vram_scratch vram_scratch; + int msi_enabled; /* msi enabled */ + struct r600_ih ih; /* r6/700 interrupt ring */ +diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c +index a539869..763280b 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.c ++++ b/drivers/gpu/drm/radeon/radeon_asic.c +@@ -1978,6 +1978,19 @@ static struct radeon_asic_ring ci_dma_ring = { + .set_wptr = &cik_sdma_set_wptr, + }; + ++static struct radeon_asic_ring ci_vce_ring = { ++ .ib_execute = &radeon_vce_ib_execute, ++ .emit_fence = &radeon_vce_fence_emit, ++ .emit_semaphore = &radeon_vce_semaphore_emit, ++ .cs_parse = &radeon_vce_cs_parse, ++ .ring_test = &radeon_vce_ring_test, ++ .ib_test = &radeon_vce_ib_test, ++ .is_lockup = &radeon_ring_test_lockup, ++ .get_rptr = &vce_v1_0_get_rptr, ++ .get_wptr = &vce_v1_0_get_wptr, ++ .set_wptr = &vce_v1_0_set_wptr, ++}; ++ + static struct radeon_asic ci_asic = { + .init = &cik_init, + .fini = &cik_fini, +@@ -2006,6 +2019,8 @@ static struct radeon_asic ci_asic = { + [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, + [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, ++ [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, ++ [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, + }, + .irq = { + .set = &cik_irq_set, +@@ -2107,6 +2122,8 @@ static struct radeon_asic kv_asic = { + [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, + [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, ++ [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, ++ [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, + }, + .irq = { + .set = &cik_irq_set, +diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h +index 998042e..a6c3eeb 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.h ++++ b/drivers/gpu/drm/radeon/radeon_asic.h +@@ -850,4 +850,17 @@ bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + /* uvd v4.2 */ + int uvd_v4_2_resume(struct radeon_device *rdev); + ++/* vce v1.0 */ ++uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++void vce_v1_0_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring); ++int vce_v1_0_init(struct radeon_device *rdev); ++int vce_v1_0_start(struct radeon_device *rdev); ++ ++/* vce v2.0 */ ++int vce_v2_0_resume(struct radeon_device *rdev); ++ + #endif +diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c +index eec1ad3..9d4e25d 100644 +--- a/drivers/gpu/drm/radeon/radeon_cs.c ++++ b/drivers/gpu/drm/radeon/radeon_cs.c +@@ -153,6 +153,10 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority + case RADEON_CS_RING_UVD: + p->ring = R600_RING_TYPE_UVD_INDEX; + break; ++ case RADEON_CS_RING_VCE: ++ /* TODO: only use the low priority ring for now */ ++ p->ring = TN_RING_TYPE_VCE1_INDEX; ++ break; + } + return 0; + } +diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c +index e90c42b..b35967a 100644 +--- a/drivers/gpu/drm/radeon/radeon_kms.c ++++ b/drivers/gpu/drm/radeon/radeon_kms.c +@@ -588,6 +588,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, + if (rdev->cmask_filp == file_priv) + rdev->cmask_filp = NULL; + radeon_uvd_free_handles(rdev, file_priv); ++ radeon_vce_free_handles(rdev, file_priv); + } + + /* +diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c +index 65f1cea..91457f8 100644 +--- a/drivers/gpu/drm/radeon/radeon_ring.c ++++ b/drivers/gpu/drm/radeon/radeon_ring.c +@@ -814,6 +814,8 @@ static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; + static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; + static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; + static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; ++static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX; ++static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX; + + static struct drm_info_list radeon_debugfs_ring_info_list[] = { + {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, +@@ -822,6 +824,8 @@ static struct drm_info_list radeon_debugfs_ring_info_list[] = { + {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, + {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, + {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, ++ {"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index}, ++ {"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index}, + }; + + static int radeon_debugfs_sa_info(struct seq_file *m, void *data) +diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c +index 12e8099..3a13e0d 100644 +--- a/drivers/gpu/drm/radeon/radeon_test.c ++++ b/drivers/gpu/drm/radeon/radeon_test.c +@@ -257,20 +257,36 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_fence **fence) + { ++ uint32_t handle = ring->idx ^ 0xdeafbeef; + int r; + + if (ring->idx == R600_RING_TYPE_UVD_INDEX) { +- r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); ++ r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL); + if (r) { + DRM_ERROR("Failed to get dummy create msg\n"); + return r; + } + +- r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); ++ r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence); + if (r) { + DRM_ERROR("Failed to get dummy destroy msg\n"); + return r; + } ++ ++ } else if (ring->idx == TN_RING_TYPE_VCE1_INDEX || ++ ring->idx == TN_RING_TYPE_VCE2_INDEX) { ++ r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL); ++ if (r) { ++ DRM_ERROR("Failed to get dummy create msg\n"); ++ return r; ++ } ++ ++ r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence); ++ if (r) { ++ DRM_ERROR("Failed to get dummy destroy msg\n"); ++ return r; ++ } ++ + } else { + r = radeon_ring_lock(rdev, ring, 64); + if (r) { +@@ -486,6 +502,16 @@ out_cleanup: + printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); + } + ++static bool radeon_test_sync_possible(struct radeon_ring *ringA, ++ struct radeon_ring *ringB) ++{ ++ if (ringA->idx == TN_RING_TYPE_VCE2_INDEX && ++ ringB->idx == TN_RING_TYPE_VCE1_INDEX) ++ return false; ++ ++ return true; ++} ++ + void radeon_test_syncing(struct radeon_device *rdev) + { + int i, j, k; +@@ -500,6 +526,9 @@ void radeon_test_syncing(struct radeon_device *rdev) + if (!ringB->ready) + continue; + ++ if (!radeon_test_sync_possible(ringA, ringB)) ++ continue; ++ + DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); + radeon_test_ring_sync(rdev, ringA, ringB); + +@@ -511,6 +540,12 @@ void radeon_test_syncing(struct radeon_device *rdev) + if (!ringC->ready) + continue; + ++ if (!radeon_test_sync_possible(ringA, ringC)) ++ continue; ++ ++ if (!radeon_test_sync_possible(ringB, ringC)) ++ continue; ++ + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); + radeon_test_ring_sync2(rdev, ringA, ringB, ringC); + +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +new file mode 100644 +index 0000000..2547d8e +--- /dev/null ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -0,0 +1,588 @@ ++/* ++ * Copyright 2013 Advanced Micro Devices, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * Authors: Christian König <christian.koenig@amd.com> ++ */ ++ ++#include <linux/firmware.h> ++#include <linux/module.h> ++#include <drm/drmP.h> ++#include <drm/drm.h> ++ ++#include "radeon.h" ++#include "radeon_asic.h" ++#include "sid.h" ++ ++/* Firmware Names */ ++#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" ++ ++MODULE_FIRMWARE(FIRMWARE_BONAIRE); ++ ++/** ++ * radeon_vce_init - allocate memory, load vce firmware ++ * ++ * @rdev: radeon_device pointer ++ * ++ * First step to get VCE online, allocate memory and load the firmware ++ */ ++int radeon_vce_init(struct radeon_device *rdev) ++{ ++ unsigned long bo_size; ++ const char *fw_name; ++ int i, r; ++ ++ switch (rdev->family) { ++ case CHIP_BONAIRE: ++ case CHIP_KAVERI: ++ case CHIP_KABINI: ++ fw_name = FIRMWARE_BONAIRE; ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); ++ if (r) { ++ dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n", ++ fw_name); ++ return r; ++ } ++ ++ bo_size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + ++ RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; ++ r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, ++ RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); ++ if (r) { ++ dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); ++ return r; ++ } ++ ++ r = radeon_vce_resume(rdev); ++ if (r) ++ return r; ++ ++ memset(rdev->vce.cpu_addr, 0, bo_size); ++ memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); ++ ++ r = radeon_vce_suspend(rdev); ++ if (r) ++ return r; ++ ++ for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { ++ atomic_set(&rdev->vce.handles[i], 0); ++ rdev->vce.filp[i] = NULL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * radeon_vce_fini - free memory ++ * ++ * @rdev: radeon_device pointer ++ * ++ * Last step on VCE teardown, free firmware memory ++ */ ++void radeon_vce_fini(struct radeon_device *rdev) ++{ ++ radeon_vce_suspend(rdev); ++ radeon_bo_unref(&rdev->vce.vcpu_bo); ++} ++ ++/** ++ * radeon_vce_suspend - unpin VCE fw memory ++ * ++ * @rdev: radeon_device pointer ++ * ++ * TODO: Test VCE suspend/resume ++ */ ++int radeon_vce_suspend(struct radeon_device *rdev) ++{ ++ int r; ++ ++ if (rdev->vce.vcpu_bo == NULL) ++ return 0; ++ ++ r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); ++ if (!r) { ++ radeon_bo_kunmap(rdev->vce.vcpu_bo); ++ radeon_bo_unpin(rdev->vce.vcpu_bo); ++ radeon_bo_unreserve(rdev->vce.vcpu_bo); ++ } ++ return r; ++} ++ ++/** ++ * radeon_vce_resume - pin VCE fw memory ++ * ++ * @rdev: radeon_device pointer ++ * ++ * TODO: Test VCE suspend/resume ++ */ ++int radeon_vce_resume(struct radeon_device *rdev) ++{ ++ int r; ++ ++ if (rdev->vce.vcpu_bo == NULL) ++ return -EINVAL; ++ ++ r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); ++ if (r) { ++ radeon_bo_unref(&rdev->vce.vcpu_bo); ++ dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); ++ return r; ++ } ++ ++ r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, ++ &rdev->vce.gpu_addr); ++ if (r) { ++ radeon_bo_unreserve(rdev->vce.vcpu_bo); ++ radeon_bo_unref(&rdev->vce.vcpu_bo); ++ dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); ++ return r; ++ } ++ ++ r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr); ++ if (r) { ++ dev_err(rdev->dev, "(%d) VCE map failed\n", r); ++ return r; ++ } ++ ++ radeon_bo_unreserve(rdev->vce.vcpu_bo); ++ ++ return 0; ++} ++ ++/** ++ * radeon_vce_free_handles - free still open VCE handles ++ * ++ * @rdev: radeon_device pointer ++ * @filp: drm file pointer ++ * ++ * Close all VCE handles still open by this file pointer ++ */ ++void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) ++{ ++ int i, r; ++ for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { ++ uint32_t handle = atomic_read(&rdev->vce.handles[i]); ++ if (!handle || rdev->vce.filp[i] != filp) ++ continue; ++ ++ r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, ++ handle, NULL); ++ if (r) ++ DRM_ERROR("Error destroying VCE handle (%d)!\n", r); ++ ++ rdev->vce.filp[i] = NULL; ++ atomic_set(&rdev->vce.handles[i], 0); ++ } ++} ++ ++/** ++ * radeon_vce_get_create_msg - generate a VCE create msg ++ * ++ * @rdev: radeon_device pointer ++ * @ring: ring we should submit the msg to ++ * @handle: VCE session handle to use ++ * @fence: optional fence to return ++ * ++ * Open up a stream for HW test ++ */ ++int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, ++ uint32_t handle, struct radeon_fence **fence) ++{ ++ const unsigned ib_size_dw = 1024; ++ struct radeon_ib ib; ++ uint64_t dummy; ++ int i, r; ++ ++ r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); ++ if (r) { ++ DRM_ERROR("radeon: failed to get ib (%d).\n", r); ++ return r; ++ } ++ ++ dummy = ib.gpu_addr + 1024; ++ ++ /* stitch together an VCE create msg */ ++ ib.length_dw = 0; ++ ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ ++ ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ ++ ib.ptr[ib.length_dw++] = handle; ++ ++ ib.ptr[ib.length_dw++] = 0x00000030; /* len */ ++ ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */ ++ ib.ptr[ib.length_dw++] = 0x00000000; ++ ib.ptr[ib.length_dw++] = 0x00000042; ++ ib.ptr[ib.length_dw++] = 0x0000000a; ++ ib.ptr[ib.length_dw++] = 0x00000001; ++ ib.ptr[ib.length_dw++] = 0x00000080; ++ ib.ptr[ib.length_dw++] = 0x00000060; ++ ib.ptr[ib.length_dw++] = 0x00000100; ++ ib.ptr[ib.length_dw++] = 0x00000100; ++ ib.ptr[ib.length_dw++] = 0x0000000c; ++ ib.ptr[ib.length_dw++] = 0x00000000; ++ ++ ib.ptr[ib.length_dw++] = 0x00000014; /* len */ ++ ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ ++ ib.ptr[ib.length_dw++] = upper_32_bits(dummy); ++ ib.ptr[ib.length_dw++] = dummy; ++ ib.ptr[ib.length_dw++] = 0x00000001; ++ ++ for (i = ib.length_dw; i < ib_size_dw; ++i) ++ ib.ptr[i] = 0x0; ++ ++ r = radeon_ib_schedule(rdev, &ib, NULL); ++ if (r) { ++ DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); ++ } ++ ++ if (fence) ++ *fence = radeon_fence_ref(ib.fence); ++ ++ radeon_ib_free(rdev, &ib); ++ ++ return r; ++} ++ ++/** ++ * radeon_vce_get_destroy_msg - generate a VCE destroy msg ++ * ++ * @rdev: radeon_device pointer ++ * @ring: ring we should submit the msg to ++ * @handle: VCE session handle to use ++ * @fence: optional fence to return ++ * ++ * Close up a stream for HW test or if userspace failed to do so ++ */ ++int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, ++ uint32_t handle, struct radeon_fence **fence) ++{ ++ const unsigned ib_size_dw = 1024; ++ struct radeon_ib ib; ++ uint64_t dummy; ++ int i, r; ++ ++ r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); ++ if (r) { ++ DRM_ERROR("radeon: failed to get ib (%d).\n", r); ++ return r; ++ } ++ ++ dummy = ib.gpu_addr + 1024; ++ ++ /* stitch together an VCE destroy msg */ ++ ib.length_dw = 0; ++ ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ ++ ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ ++ ib.ptr[ib.length_dw++] = handle; ++ ++ ib.ptr[ib.length_dw++] = 0x00000014; /* len */ ++ ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ ++ ib.ptr[ib.length_dw++] = upper_32_bits(dummy); ++ ib.ptr[ib.length_dw++] = dummy; ++ ib.ptr[ib.length_dw++] = 0x00000001; ++ ++ ib.ptr[ib.length_dw++] = 0x00000008; /* len */ ++ ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */ ++ ++ for (i = ib.length_dw; i < ib_size_dw; ++i) ++ ib.ptr[i] = 0x0; ++ ++ r = radeon_ib_schedule(rdev, &ib, NULL); ++ if (r) { ++ DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); ++ } ++ ++ if (fence) ++ *fence = radeon_fence_ref(ib.fence); ++ ++ radeon_ib_free(rdev, &ib); ++ ++ return r; ++} ++ ++/** ++ * radeon_vce_cs_reloc - command submission relocation ++ * ++ * @p: parser context ++ * @lo: address of lower dword ++ * @hi: address of higher dword ++ * ++ * Patch relocation inside command stream with real buffer address ++ */ ++int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) ++{ ++ struct radeon_cs_chunk *relocs_chunk; ++ uint64_t offset; ++ unsigned idx; ++ ++ relocs_chunk = &p->chunks[p->chunk_relocs_idx]; ++ offset = radeon_get_ib_value(p, lo); ++ idx = radeon_get_ib_value(p, hi); ++ ++ if (idx >= relocs_chunk->length_dw) { ++ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", ++ idx, relocs_chunk->length_dw); ++ return -EINVAL; ++ } ++ ++ offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset; ++ ++ p->ib.ptr[lo] = offset & 0xFFFFFFFF; ++ p->ib.ptr[hi] = offset >> 32; ++ ++ return 0; ++} ++ ++/** ++ * radeon_vce_cs_parse - parse and validate the command stream ++ * ++ * @p: parser context ++ * ++ */ ++int radeon_vce_cs_parse(struct radeon_cs_parser *p) ++{ ++ uint32_t handle = 0; ++ bool destroy = false; ++ int i, r; ++ ++ while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) { ++ uint32_t len = radeon_get_ib_value(p, p->idx); ++ uint32_t cmd = radeon_get_ib_value(p, p->idx + 1); ++ ++ if ((len < 8) || (len & 3)) { ++ DRM_ERROR("invalid VCE command length (%d)!\n", len); ++ return -EINVAL; ++ } ++ ++ switch (cmd) { ++ case 0x00000001: // session ++ handle = radeon_get_ib_value(p, p->idx + 2); ++ break; ++ ++ case 0x00000002: // task info ++ case 0x01000001: // create ++ case 0x04000001: // config extension ++ case 0x04000002: // pic control ++ case 0x04000005: // rate control ++ case 0x04000007: // motion estimation ++ case 0x04000008: // rdo ++ break; ++ ++ case 0x03000001: // encode ++ r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9); ++ if (r) ++ return r; ++ ++ r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11); ++ if (r) ++ return r; ++ break; ++ ++ case 0x02000001: // destroy ++ destroy = true; ++ break; ++ ++ case 0x05000001: // context buffer ++ case 0x05000004: // video bitstream buffer ++ case 0x05000005: // feedback buffer ++ r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2); ++ if (r) ++ return r; ++ break; ++ ++ default: ++ DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); ++ return -EINVAL; ++ } ++ ++ p->idx += len / 4; ++ } ++ ++ if (destroy) { ++ /* IB contains a destroy msg, free the handle */ ++ for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) ++ atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0); ++ ++ return 0; ++ } ++ ++ /* create or encode, validate the handle */ ++ for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { ++ if (atomic_read(&p->rdev->vce.handles[i]) == handle) ++ return 0; ++ } ++ ++ /* handle not found try to alloc a new one */ ++ for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { ++ if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { ++ p->rdev->vce.filp[i] = p->filp; ++ return 0; ++ } ++ } ++ ++ DRM_ERROR("No more free VCE handles!\n"); ++ return -EINVAL; ++} ++ ++/** ++ * radeon_vce_semaphore_emit - emit a semaphore command ++ * ++ * @rdev: radeon_device pointer ++ * @ring: engine to use ++ * @semaphore: address of semaphore ++ * @emit_wait: true=emit wait, false=emit signal ++ * ++ */ ++bool radeon_vce_semaphore_emit(struct radeon_device *rdev, ++ struct radeon_ring *ring, ++ struct radeon_semaphore *semaphore, ++ bool emit_wait) ++{ ++ uint64_t addr = semaphore->gpu_addr; ++ ++ radeon_ring_write(ring, VCE_CMD_SEMAPHORE); ++ radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); ++ radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); ++ radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); ++ if (!emit_wait) ++ radeon_ring_write(ring, VCE_CMD_END); ++ ++ return true; ++} ++ ++/** ++ * radeon_vce_ib_execute - execute indirect buffer ++ * ++ * @rdev: radeon_device pointer ++ * @ib: the IB to execute ++ * ++ */ ++void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) ++{ ++ struct radeon_ring *ring = &rdev->ring[ib->ring]; ++ radeon_ring_write(ring, VCE_CMD_IB); ++ radeon_ring_write(ring, ib->gpu_addr); ++ radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); ++ radeon_ring_write(ring, ib->length_dw); ++} ++ ++/** ++ * radeon_vce_fence_emit - add a fence command to the ring ++ * ++ * @rdev: radeon_device pointer ++ * @fence: the fence ++ * ++ */ ++void radeon_vce_fence_emit(struct radeon_device *rdev, ++ struct radeon_fence *fence) ++{ ++ struct radeon_ring *ring = &rdev->ring[fence->ring]; ++ uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; ++ ++ radeon_ring_write(ring, VCE_CMD_FENCE); ++ radeon_ring_write(ring, addr); ++ radeon_ring_write(ring, upper_32_bits(addr)); ++ radeon_ring_write(ring, fence->seq); ++ radeon_ring_write(ring, VCE_CMD_TRAP); ++ radeon_ring_write(ring, VCE_CMD_END); ++} ++ ++/** ++ * radeon_vce_ring_test - test if VCE ring is working ++ * ++ * @rdev: radeon_device pointer ++ * @ring: the engine to test on ++ * ++ */ ++int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) ++{ ++ uint32_t rptr = vce_v1_0_get_rptr(rdev, ring); ++ unsigned i; ++ int r; ++ ++ r = radeon_ring_lock(rdev, ring, 16); ++ if (r) { ++ DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n", ++ ring->idx, r); ++ return r; ++ } ++ radeon_ring_write(ring, VCE_CMD_END); ++ radeon_ring_unlock_commit(rdev, ring); ++ ++ for (i = 0; i < rdev->usec_timeout; i++) { ++ if (vce_v1_0_get_rptr(rdev, ring) != rptr) ++ break; ++ DRM_UDELAY(1); ++ } ++ ++ if (i < rdev->usec_timeout) { ++ DRM_INFO("ring test on %d succeeded in %d usecs\n", ++ ring->idx, i); ++ } else { ++ DRM_ERROR("radeon: ring %d test failed\n", ++ ring->idx); ++ r = -ETIMEDOUT; ++ } ++ ++ return r; ++} ++ ++/** ++ * radeon_vce_ib_test - test if VCE IBs are working ++ * ++ * @rdev: radeon_device pointer ++ * @ring: the engine to test on ++ * ++ */ ++int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ++{ ++ struct radeon_fence *fence = NULL; ++ int r; ++ ++ r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL); ++ if (r) { ++ DRM_ERROR("radeon: failed to get create msg (%d).\n", r); ++ goto error; ++ } ++ ++ r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence); ++ if (r) { ++ DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); ++ goto error; ++ } ++ ++ r = radeon_fence_wait(fence, false); ++ if (r) { ++ DRM_ERROR("radeon: fence wait failed (%d).\n", r); ++ } else { ++ DRM_INFO("ib test on ring %d succeeded\n", ring->idx); ++ } ++error: ++ radeon_fence_unref(&fence); ++ return r; ++} +diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h +index 940e36b..2b71e67 100644 +--- a/drivers/gpu/drm/radeon/sid.h ++++ b/drivers/gpu/drm/radeon/sid.h +@@ -1747,4 +1747,51 @@ + #define DMA_PACKET_CONSTANT_FILL 0xd + #define DMA_PACKET_NOP 0xf + ++#define VCE_STATUS 0x20004 ++#define VCE_VCPU_CNTL 0x20014 ++#define VCE_CLK_EN (1 << 0) ++#define VCE_VCPU_CACHE_OFFSET0 0x20024 ++#define VCE_VCPU_CACHE_SIZE0 0x20028 ++#define VCE_VCPU_CACHE_OFFSET1 0x2002c ++#define VCE_VCPU_CACHE_SIZE1 0x20030 ++#define VCE_VCPU_CACHE_OFFSET2 0x20034 ++#define VCE_VCPU_CACHE_SIZE2 0x20038 ++#define VCE_SOFT_RESET 0x20120 ++#define VCE_ECPU_SOFT_RESET (1 << 0) ++#define VCE_FME_SOFT_RESET (1 << 2) ++#define VCE_RB_BASE_LO2 0x2016c ++#define VCE_RB_BASE_HI2 0x20170 ++#define VCE_RB_SIZE2 0x20174 ++#define VCE_RB_RPTR2 0x20178 ++#define VCE_RB_WPTR2 0x2017c ++#define VCE_RB_BASE_LO 0x20180 ++#define VCE_RB_BASE_HI 0x20184 ++#define VCE_RB_SIZE 0x20188 ++#define VCE_RB_RPTR 0x2018c ++#define VCE_RB_WPTR 0x20190 ++#define VCE_CLOCK_GATING_A 0x202f8 ++#define VCE_CLOCK_GATING_B 0x202fc ++#define VCE_UENC_CLOCK_GATING 0x205bc ++#define VCE_UENC_REG_CLOCK_GATING 0x205c0 ++#define VCE_FW_REG_STATUS 0x20e10 ++# define VCE_FW_REG_STATUS_BUSY (1 << 0) ++# define VCE_FW_REG_STATUS_PASS (1 << 3) ++# define VCE_FW_REG_STATUS_DONE (1 << 11) ++#define VCE_LMI_FW_START_KEYSEL 0x20e18 ++#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20 ++#define VCE_LMI_CTRL2 0x20e74 ++#define VCE_LMI_CTRL 0x20e98 ++#define VCE_LMI_VM_CTRL 0x20ea0 ++#define VCE_LMI_SWAP_CNTL 0x20eb4 ++#define VCE_LMI_SWAP_CNTL1 0x20eb8 ++#define VCE_LMI_CACHE_CTRL 0x20ef4 ++ ++#define VCE_CMD_NO_OP 0x00000000 ++#define VCE_CMD_END 0x00000001 ++#define VCE_CMD_IB 0x00000002 ++#define VCE_CMD_FENCE 0x00000003 ++#define VCE_CMD_TRAP 0x00000004 ++#define VCE_CMD_IB_AUTO 0x00000005 ++#define VCE_CMD_SEMAPHORE 0x00000006 ++ + #endif +diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c +new file mode 100644 +index 0000000..e0c3534 +--- /dev/null ++++ b/drivers/gpu/drm/radeon/vce_v1_0.c +@@ -0,0 +1,187 @@ ++/* ++ * Copyright 2013 Advanced Micro Devices, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * Authors: Christian König <christian.koenig@amd.com> ++ */ ++ ++#include <linux/firmware.h> ++#include <drm/drmP.h> ++#include "radeon.h" ++#include "radeon_asic.h" ++#include "sid.h" ++ ++/** ++ * vce_v1_0_get_rptr - get read pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon_ring pointer ++ * ++ * Returns the current hardware read pointer ++ */ ++uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ if (ring->idx == TN_RING_TYPE_VCE1_INDEX) ++ return RREG32(VCE_RB_RPTR); ++ else ++ return RREG32(VCE_RB_RPTR2); ++} ++ ++/** ++ * vce_v1_0_get_wptr - get write pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon_ring pointer ++ * ++ * Returns the current hardware write pointer ++ */ ++uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ if (ring->idx == TN_RING_TYPE_VCE1_INDEX) ++ return RREG32(VCE_RB_WPTR); ++ else ++ return RREG32(VCE_RB_WPTR2); ++} ++ ++/** ++ * vce_v1_0_set_wptr - set write pointer ++ * ++ * @rdev: radeon_device pointer ++ * @ring: radeon_ring pointer ++ * ++ * Commits the write pointer to the hardware ++ */ ++void vce_v1_0_set_wptr(struct radeon_device *rdev, ++ struct radeon_ring *ring) ++{ ++ if (ring->idx == TN_RING_TYPE_VCE1_INDEX) ++ WREG32(VCE_RB_WPTR, ring->wptr); ++ else ++ WREG32(VCE_RB_WPTR2, ring->wptr); ++} ++ ++/** ++ * vce_v1_0_start - start VCE block ++ * ++ * @rdev: radeon_device pointer ++ * ++ * Setup and start the VCE block ++ */ ++int vce_v1_0_start(struct radeon_device *rdev) ++{ ++ struct radeon_ring *ring; ++ int i, j, r; ++ ++ /* set BUSY flag */ ++ WREG32_P(VCE_STATUS, 1, ~1); ++ ++ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; ++ WREG32(VCE_RB_RPTR, ring->rptr); ++ WREG32(VCE_RB_WPTR, ring->wptr); ++ WREG32(VCE_RB_BASE_LO, ring->gpu_addr); ++ WREG32(VCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); ++ WREG32(VCE_RB_SIZE, ring->ring_size / 4); ++ ++ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; ++ WREG32(VCE_RB_RPTR2, ring->rptr); ++ WREG32(VCE_RB_WPTR2, ring->wptr); ++ WREG32(VCE_RB_BASE_LO2, ring->gpu_addr); ++ WREG32(VCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); ++ WREG32(VCE_RB_SIZE2, ring->ring_size / 4); ++ ++ WREG32_P(VCE_VCPU_CNTL, VCE_CLK_EN, ~VCE_CLK_EN); ++ ++ WREG32_P(VCE_SOFT_RESET, ++ VCE_ECPU_SOFT_RESET | ++ VCE_FME_SOFT_RESET, ~( ++ VCE_ECPU_SOFT_RESET | ++ VCE_FME_SOFT_RESET)); ++ ++ mdelay(100); ++ ++ WREG32_P(VCE_SOFT_RESET, 0, ~( ++ VCE_ECPU_SOFT_RESET | ++ VCE_FME_SOFT_RESET)); ++ ++ for (i = 0; i < 10; ++i) { ++ uint32_t status; ++ for (j = 0; j < 100; ++j) { ++ status = RREG32(VCE_STATUS); ++ if (status & 2) ++ break; ++ mdelay(10); ++ } ++ r = 0; ++ if (status & 2) ++ break; ++ ++ DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); ++ WREG32_P(VCE_SOFT_RESET, VCE_ECPU_SOFT_RESET, ~VCE_ECPU_SOFT_RESET); ++ mdelay(10); ++ WREG32_P(VCE_SOFT_RESET, 0, ~VCE_ECPU_SOFT_RESET); ++ mdelay(10); ++ r = -1; ++ } ++ ++ /* clear BUSY flag */ ++ WREG32_P(VCE_STATUS, 0, ~1); ++ ++ if (r) { ++ DRM_ERROR("VCE not responding, giving up!!!\n"); ++ return r; ++ } ++ ++ return 0; ++} ++ ++int vce_v1_0_init(struct radeon_device *rdev) ++{ ++ struct radeon_ring *ring; ++ int r; ++ ++ r = vce_v1_0_start(rdev); ++ if (r) ++ return r; ++ ++ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; ++ ring->ready = true; ++ r = radeon_ring_test(rdev, TN_RING_TYPE_VCE1_INDEX, ring); ++ if (r) { ++ ring->ready = false; ++ return r; ++ } ++ ++ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; ++ ring->ready = true; ++ r = radeon_ring_test(rdev, TN_RING_TYPE_VCE2_INDEX, ring); ++ if (r) { ++ ring->ready = false; ++ return r; ++ } ++ ++ DRM_INFO("VCE initialized successfully.\n"); ++ ++ return 0; ++} +diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c +new file mode 100644 +index 0000000..4911d1b +--- /dev/null ++++ b/drivers/gpu/drm/radeon/vce_v2_0.c +@@ -0,0 +1,70 @@ ++/* ++ * Copyright 2013 Advanced Micro Devices, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * Authors: Christian König <christian.koenig@amd.com> ++ */ ++ ++#include <linux/firmware.h> ++#include <drm/drmP.h> ++#include "radeon.h" ++#include "radeon_asic.h" ++#include "cikd.h" ++ ++int vce_v2_0_resume(struct radeon_device *rdev) ++{ ++ uint64_t addr = rdev->vce.gpu_addr; ++ uint32_t size; ++ ++ WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); ++ WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); ++ WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); ++ WREG32(VCE_CLOCK_GATING_B, 0xf7); ++ ++ WREG32(VCE_LMI_CTRL, 0x00398000); ++ WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); ++ WREG32(VCE_LMI_SWAP_CNTL, 0); ++ WREG32(VCE_LMI_SWAP_CNTL1, 0); ++ WREG32(VCE_LMI_VM_CTRL, 0); ++ ++ size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); ++ WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); ++ WREG32(VCE_VCPU_CACHE_SIZE0, size); ++ ++ addr += size; ++ size = RADEON_VCE_STACK_SIZE; ++ WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); ++ WREG32(VCE_VCPU_CACHE_SIZE1, size); ++ ++ addr += size; ++ size = RADEON_VCE_HEAP_SIZE; ++ WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); ++ WREG32(VCE_VCPU_CACHE_SIZE2, size); ++ ++ WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); ++ ++ WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, ++ ~VCE_SYS_INT_TRAP_INTERRUPT_EN); ++ ++ return 0; ++} +diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h +index fe421e8a..b93c92a 100644 +--- a/include/uapi/drm/radeon_drm.h ++++ b/include/uapi/drm/radeon_drm.h +@@ -919,6 +919,7 @@ struct drm_radeon_gem_va { + #define RADEON_CS_RING_COMPUTE 1 + #define RADEON_CS_RING_DMA 2 + #define RADEON_CS_RING_UVD 3 ++#define RADEON_CS_RING_VCE 4 + /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ + /* 0 = normal, + = higher priority, - = lower priority */ + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0028-drm-radeon-add-VCE-ring-query.patch b/common/recipes-kernel/linux/linux-amd/0028-drm-radeon-add-VCE-ring-query.patch new file mode 100644 index 00000000..cba682d4 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0028-drm-radeon-add-VCE-ring-query.patch @@ -0,0 +1,30 @@ +From d14ffb8ce3e16178fdcad9ae06b3a09833251498 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Mon, 27 Jan 2014 10:16:06 -0700 +Subject: [PATCH 28/60] drm/radeon: add VCE ring query +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon_kms.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c +index b35967a..5f37f06 100644 +--- a/drivers/gpu/drm/radeon/radeon_kms.c ++++ b/drivers/gpu/drm/radeon/radeon_kms.c +@@ -417,6 +417,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) + case RADEON_CS_RING_UVD: + *value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready; + break; ++ case RADEON_CS_RING_VCE: ++ *value = rdev->ring[TN_RING_TYPE_VCE1_INDEX].ready; ++ break; + default: + return -EINVAL; + } +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0029-drm-radeon-add-VCE-version-parsing-and-checking.patch b/common/recipes-kernel/linux/linux-amd/0029-drm-radeon-add-VCE-version-parsing-and-checking.patch new file mode 100644 index 00000000..694d4f32 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0029-drm-radeon-add-VCE-version-parsing-and-checking.patch @@ -0,0 +1,147 @@ +From ce7f14da5704849b2e549cf17df8fcc1e2852906 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Thu, 23 Jan 2014 09:50:49 -0700 +Subject: [PATCH 29/60] drm/radeon: add VCE version parsing and checking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Also make the result available to userspace. + +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 2 ++ + drivers/gpu/drm/radeon/radeon_kms.c | 6 ++++ + drivers/gpu/drm/radeon/radeon_vce.c | 56 +++++++++++++++++++++++++++++++++---- + include/uapi/drm/radeon_drm.h | 4 +++ + 4 files changed, 62 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 0abbe5e..43f8f74 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1607,6 +1607,8 @@ struct radeon_vce { + struct radeon_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; ++ unsigned fw_version; ++ unsigned fb_version; + atomic_t handles[RADEON_MAX_VCE_HANDLES]; + struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; + }; +diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c +index 5f37f06..131b517 100644 +--- a/drivers/gpu/drm/radeon/radeon_kms.c ++++ b/drivers/gpu/drm/radeon/radeon_kms.c +@@ -457,6 +457,12 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) + DRM_DEBUG_KMS("BACKEND_ENABLED_MASK is si+ only!\n"); + } + break; ++ case RADEON_INFO_VCE_FW_VERSION: ++ *value = rdev->vce.fw_version; ++ break; ++ case RADEON_INFO_VCE_FB_VERSION: ++ *value = rdev->vce.fb_version; ++ break; + default: + DRM_DEBUG_KMS("Invalid request %d\n", info->request); + return -EINVAL; +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +index 2547d8e..f46563b 100644 +--- a/drivers/gpu/drm/radeon/radeon_vce.c ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -48,8 +48,11 @@ MODULE_FIRMWARE(FIRMWARE_BONAIRE); + */ + int radeon_vce_init(struct radeon_device *rdev) + { +- unsigned long bo_size; +- const char *fw_name; ++ static const char *fw_version = "[ATI LIB=VCEFW,"; ++ static const char *fb_version = "[ATI LIB=VCEFWSTATS,"; ++ unsigned long size; ++ const char *fw_name, *c; ++ uint8_t start, mid, end; + int i, r; + + switch (rdev->family) { +@@ -70,9 +73,50 @@ int radeon_vce_init(struct radeon_device *rdev) + return r; + } + +- bo_size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + +- RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; +- r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, ++ /* search for firmware version */ ++ ++ size = rdev->vce_fw->size - strlen(fw_version) - 9; ++ c = rdev->vce_fw->data; ++ for (;size > 0; --size, ++c) ++ if (strncmp(c, fw_version, strlen(fw_version)) == 0) ++ break; ++ ++ if (size == 0) ++ return -EINVAL; ++ ++ c += strlen(fw_version); ++ if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3) ++ return -EINVAL; ++ ++ /* search for feedback version */ ++ ++ size = rdev->vce_fw->size - strlen(fb_version) - 3; ++ c = rdev->vce_fw->data; ++ for (;size > 0; --size, ++c) ++ if (strncmp(c, fb_version, strlen(fb_version)) == 0) ++ break; ++ ++ if (size == 0) ++ return -EINVAL; ++ ++ c += strlen(fb_version); ++ if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1) ++ return -EINVAL; ++ ++ DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n", ++ start, mid, end, rdev->vce.fb_version); ++ ++ rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); ++ ++ /* we can only work with this fw version for now */ ++ if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) ++ return -EINVAL; ++ ++ /* load firmware into VRAM */ ++ ++ size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + ++ RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; ++ r = radeon_bo_create(rdev, size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); + if (r) { + dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); +@@ -83,7 +127,7 @@ int radeon_vce_init(struct radeon_device *rdev) + if (r) + return r; + +- memset(rdev->vce.cpu_addr, 0, bo_size); ++ memset(rdev->vce.cpu_addr, 0, size); + memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); + + r = radeon_vce_suspend(rdev); +diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h +index b93c92a..2ff6e71 100644 +--- a/include/uapi/drm/radeon_drm.h ++++ b/include/uapi/drm/radeon_drm.h +@@ -986,6 +986,10 @@ struct drm_radeon_cs { + #define RADEON_INFO_CIK_MACROTILE_MODE_ARRAY 0x18 + /* query the number of render backends */ + #define RADEON_INFO_SI_BACKEND_ENABLED_MASK 0x19 ++/* version of VCE firmware */ ++#define RADEON_INFO_VCE_FW_VERSION 0x1b ++/* version of VCE feedback */ ++#define RADEON_INFO_VCE_FB_VERSION 0x1c + + + struct drm_radeon_info { +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0030-drm-radeon-add-callback-for-setting-vce-clocks.patch b/common/recipes-kernel/linux/linux-amd/0030-drm-radeon-add-callback-for-setting-vce-clocks.patch new file mode 100644 index 00000000..3f163a7e --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0030-drm-radeon-add-callback-for-setting-vce-clocks.patch @@ -0,0 +1,35 @@ +From da6c83dd38cbe6aed578e194877060820432d4f6 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Tue, 20 Aug 2013 20:01:18 -0400 +Subject: [PATCH 30/60] drm/radeon: add callback for setting vce clocks + +Similar to uvd clock setting. + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 43f8f74..5865ae7 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1823,6 +1823,7 @@ struct radeon_asic { + void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes); + void (*set_clock_gating)(struct radeon_device *rdev, int enable); + int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk); ++ int (*set_vce_clocks)(struct radeon_device *rdev, u32 evclk, u32 ecclk); + int (*get_temperature)(struct radeon_device *rdev); + } pm; + /* dynamic power management */ +@@ -2679,6 +2680,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); + #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l)) + #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e)) + #define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d)) ++#define radeon_set_vce_clocks(rdev, ev, ec) (rdev)->asic->pm.set_vce_clocks((rdev), (ev), (ec)) + #define radeon_get_temperature(rdev) (rdev)->asic->pm.get_temperature((rdev)) + #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s))) + #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r))) +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0031-drm-radeon-dpm-move-platform-caps-fetching-to-a-sepa.patch b/common/recipes-kernel/linux/linux-amd/0031-drm-radeon-dpm-move-platform-caps-fetching-to-a-sepa.patch new file mode 100644 index 00000000..248ddcd5 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0031-drm-radeon-dpm-move-platform-caps-fetching-to-a-sepa.patch @@ -0,0 +1,330 @@ +From dd43f0af944746140ad17efe0c53295b1457da50 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Wed, 21 Aug 2013 10:02:32 -0400 +Subject: [PATCH 31/60] drm/radeon/dpm: move platform caps fetching to a + separate function + +It's needed by by both the asic specific functions and the +extended table parser. + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/btc_dpm.c | 4 ++++ + drivers/gpu/drm/radeon/ci_dpm.c | 9 ++++++--- + drivers/gpu/drm/radeon/cypress_dpm.c | 4 ++++ + drivers/gpu/drm/radeon/kv_dpm.c | 7 ++++--- + drivers/gpu/drm/radeon/ni_dpm.c | 7 ++++--- + drivers/gpu/drm/radeon/r600_dpm.c | 20 ++++++++++++++++++++ + drivers/gpu/drm/radeon/r600_dpm.h | 2 ++ + drivers/gpu/drm/radeon/rs780_dpm.c | 7 ++++--- + drivers/gpu/drm/radeon/rv6xx_dpm.c | 7 ++++--- + drivers/gpu/drm/radeon/rv770_dpm.c | 7 ++++--- + drivers/gpu/drm/radeon/si_dpm.c | 7 ++++--- + drivers/gpu/drm/radeon/sumo_dpm.c | 7 ++++--- + drivers/gpu/drm/radeon/trinity_dpm.c | 7 ++++--- + 13 files changed, 68 insertions(+), 27 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c +index 9b6950d..e910299 100644 +--- a/drivers/gpu/drm/radeon/btc_dpm.c ++++ b/drivers/gpu/drm/radeon/btc_dpm.c +@@ -2610,6 +2610,10 @@ int btc_dpm_init(struct radeon_device *rdev) + pi->min_vddc_in_table = 0; + pi->max_vddc_in_table = 0; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = rv7xx_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c +index 95f4ab9..56a517b 100644 +--- a/drivers/gpu/drm/radeon/ci_dpm.c ++++ b/drivers/gpu/drm/radeon/ci_dpm.c +@@ -4954,9 +4954,6 @@ static int ci_parse_power_table(struct radeon_device *rdev) + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; + power_state = (union pplib_power_state *)power_state_offset; +@@ -5072,6 +5069,12 @@ int ci_dpm_init(struct radeon_device *rdev) + ci_dpm_fini(rdev); + return ret; + } ++ ++ ret = r600_get_platform_caps(rdev); ++ if (ret) { ++ ci_dpm_fini(rdev); ++ return ret; ++ } + ret = ci_parse_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); +diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c +index 7143783..1935c4c 100644 +--- a/drivers/gpu/drm/radeon/cypress_dpm.c ++++ b/drivers/gpu/drm/radeon/cypress_dpm.c +@@ -2049,6 +2049,10 @@ int cypress_dpm_init(struct radeon_device *rdev) + pi->min_vddc_in_table = 0; + pi->max_vddc_in_table = 0; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = rv7xx_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c +index b419055..58b5a5d 100644 +--- a/drivers/gpu/drm/radeon/kv_dpm.c ++++ b/drivers/gpu/drm/radeon/kv_dpm.c +@@ -2556,9 +2556,6 @@ static int kv_parse_power_table(struct radeon_device *rdev) + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; + power_state = (union pplib_power_state *)power_state_offset; +@@ -2608,6 +2605,10 @@ int kv_dpm_init(struct radeon_device *rdev) + return -ENOMEM; + rdev->pm.dpm.priv = pi; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = r600_parse_extended_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c +index 85f36e7..70045d1 100644 +--- a/drivers/gpu/drm/radeon/ni_dpm.c ++++ b/drivers/gpu/drm/radeon/ni_dpm.c +@@ -4041,9 +4041,6 @@ static int ni_parse_power_table(struct radeon_device *rdev) + power_info->pplib.ucNumStates, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + + for (i = 0; i < power_info->pplib.ucNumStates; i++) { + power_state = (union pplib_power_state *) +@@ -4105,6 +4102,10 @@ int ni_dpm_init(struct radeon_device *rdev) + pi->min_vddc_in_table = 0; + pi->max_vddc_in_table = 0; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = ni_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c +index 2df683a..63c5be0 100644 +--- a/drivers/gpu/drm/radeon/r600_dpm.c ++++ b/drivers/gpu/drm/radeon/r600_dpm.c +@@ -821,6 +821,26 @@ static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependen + return 0; + } + ++int r600_get_platform_caps(struct radeon_device *rdev) ++{ ++ struct radeon_mode_info *mode_info = &rdev->mode_info; ++ union power_info *power_info; ++ int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); ++ u16 data_offset; ++ u8 frev, crev; ++ ++ if (!atom_parse_data_header(mode_info->atom_context, index, NULL, ++ &frev, &crev, &data_offset)) ++ return -EINVAL; ++ power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); ++ ++ rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); ++ rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); ++ rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); ++ ++ return 0; ++} ++ + /* sizeof(ATOM_PPLIB_EXTENDEDHEADER) */ + #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2 12 + #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3 14 +diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h +index 1000bf9..7e5d2c2 100644 +--- a/drivers/gpu/drm/radeon/r600_dpm.h ++++ b/drivers/gpu/drm/radeon/r600_dpm.h +@@ -217,6 +217,8 @@ int r600_set_thermal_temperature_range(struct radeon_device *rdev, + int min_temp, int max_temp); + bool r600_is_internal_thermal_sensor(enum radeon_int_thermal_type sensor); + ++int r600_get_platform_caps(struct radeon_device *rdev); ++ + int r600_parse_extended_power_table(struct radeon_device *rdev); + void r600_free_extended_power_table(struct radeon_device *rdev); + +diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c +index 6af8505..f3143c0 100644 +--- a/drivers/gpu/drm/radeon/rs780_dpm.c ++++ b/drivers/gpu/drm/radeon/rs780_dpm.c +@@ -815,9 +815,6 @@ static int rs780_parse_power_table(struct radeon_device *rdev) + power_info->pplib.ucNumStates, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + + for (i = 0; i < power_info->pplib.ucNumStates; i++) { + power_state = (union pplib_power_state *) +@@ -867,6 +864,10 @@ int rs780_dpm_init(struct radeon_device *rdev) + return -ENOMEM; + rdev->pm.dpm.priv = pi; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = rs780_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c +index 5811d27..3aa0a8c 100644 +--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c ++++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c +@@ -1901,9 +1901,6 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev) + power_info->pplib.ucNumStates, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + + for (i = 0; i < power_info->pplib.ucNumStates; i++) { + power_state = (union pplib_power_state *) +@@ -1953,6 +1950,10 @@ int rv6xx_dpm_init(struct radeon_device *rdev) + return -ENOMEM; + rdev->pm.dpm.priv = pi; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = rv6xx_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c +index 890cf17..f67a1b5 100644 +--- a/drivers/gpu/drm/radeon/rv770_dpm.c ++++ b/drivers/gpu/drm/radeon/rv770_dpm.c +@@ -2277,9 +2277,6 @@ int rv7xx_parse_power_table(struct radeon_device *rdev) + power_info->pplib.ucNumStates, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + + for (i = 0; i < power_info->pplib.ucNumStates; i++) { + power_state = (union pplib_power_state *) +@@ -2351,6 +2348,10 @@ int rv770_dpm_init(struct radeon_device *rdev) + pi->min_vddc_in_table = 0; + pi->max_vddc_in_table = 0; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = rv7xx_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c +index f5cdc86..ce4463f 100644 +--- a/drivers/gpu/drm/radeon/si_dpm.c ++++ b/drivers/gpu/drm/radeon/si_dpm.c +@@ -6291,9 +6291,6 @@ static int si_parse_power_table(struct radeon_device *rdev) + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; + power_state = (union pplib_power_state *)power_state_offset; +@@ -6370,6 +6367,10 @@ int si_dpm_init(struct radeon_device *rdev) + pi->min_vddc_in_table = 0; + pi->max_vddc_in_table = 0; + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = si_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c +index 96ea6db8..485d006 100644 +--- a/drivers/gpu/drm/radeon/sumo_dpm.c ++++ b/drivers/gpu/drm/radeon/sumo_dpm.c +@@ -1477,9 +1477,6 @@ static int sumo_parse_power_table(struct radeon_device *rdev) + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; + power_state = (union pplib_power_state *)power_state_offset; +@@ -1765,6 +1762,10 @@ int sumo_dpm_init(struct radeon_device *rdev) + + sumo_construct_boot_and_acpi_state(rdev); + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = sumo_parse_power_table(rdev); + if (ret) + return ret; +diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c +index bf980ea..b4dd29b 100644 +--- a/drivers/gpu/drm/radeon/trinity_dpm.c ++++ b/drivers/gpu/drm/radeon/trinity_dpm.c +@@ -1685,9 +1685,6 @@ static int trinity_parse_power_table(struct radeon_device *rdev) + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; +- rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); +- rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); +- rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; + power_state = (union pplib_power_state *)power_state_offset; +@@ -1895,6 +1892,10 @@ int trinity_dpm_init(struct radeon_device *rdev) + + trinity_construct_boot_state(rdev); + ++ ret = r600_get_platform_caps(rdev); ++ if (ret) ++ return ret; ++ + ret = trinity_parse_power_table(rdev); + if (ret) + return ret; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0032-drm-radeon-dpm-fill-in-some-initial-vce-infrastructu.patch b/common/recipes-kernel/linux/linux-amd/0032-drm-radeon-dpm-fill-in-some-initial-vce-infrastructu.patch new file mode 100644 index 00000000..d1fcc9b7 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0032-drm-radeon-dpm-fill-in-some-initial-vce-infrastructu.patch @@ -0,0 +1,83 @@ +From 71192a6216ee85b70cac1a217e72170c4ce65271 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Tue, 20 Aug 2013 20:29:05 -0400 +Subject: [PATCH 32/60] drm/radeon/dpm: fill in some initial vce infrastructure + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 12 ++++++++++++ + drivers/gpu/drm/radeon/radeon_pm.c | 7 +++++++ + 2 files changed, 19 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 5865ae7..12b1854 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1259,6 +1259,15 @@ enum radeon_dpm_event_src { + RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 + }; + ++enum radeon_vce_level { ++ RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ ++ RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ ++ RADEON_VCE_LEVEL_DC_LL_LOW = 2, /* DC, low latency queue, res <= 720 */ ++ RADEON_VCE_LEVEL_DC_LL_HIGH = 3, /* DC, low latency queue, 1080 >= res > 720 */ ++ RADEON_VCE_LEVEL_DC_GP_LOW = 4, /* DC, general purpose queue, res <= 720 */ ++ RADEON_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */ ++}; ++ + struct radeon_ps { + u32 caps; /* vbios flags */ + u32 class; /* vbios flags */ +@@ -1269,6 +1278,8 @@ struct radeon_ps { + /* VCE clocks */ + u32 evclk; + u32 ecclk; ++ bool vce_active; ++ enum radeon_vce_level vce_level; + /* asic priv */ + void *ps_priv; + }; +@@ -1480,6 +1491,7 @@ struct radeon_dpm { + /* special states active */ + bool thermal_active; + bool uvd_active; ++ bool vce_active; + /* thermal handling */ + struct radeon_dpm_thermal thermal; + /* forced levels */ +diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c +index 58132a5..f8afbf9 100644 +--- a/drivers/gpu/drm/radeon/radeon_pm.c ++++ b/drivers/gpu/drm/radeon/radeon_pm.c +@@ -845,6 +845,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) + + /* no need to reprogram if nothing changed unless we are on BTC+ */ + if (rdev->pm.dpm.current_ps == rdev->pm.dpm.requested_ps) { ++ /* vce just modifies an existing state so force a change */ ++ if (ps->vce_active != rdev->pm.dpm.vce_active) ++ goto force; + if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) { + /* for pre-BTC and APUs if the num crtcs changed but state is the same, + * all we need to do is update the display configuration. +@@ -881,6 +884,7 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) + } + } + ++force: + printk("switching from power state:\n"); + radeon_dpm_print_power_state(rdev, rdev->pm.dpm.current_ps); + printk("switching to power state:\n"); +@@ -890,6 +894,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) + down_write(&rdev->pm.mclk_lock); + mutex_lock(&rdev->ring_lock); + ++ /* update whether vce is active */ ++ ps->vce_active = rdev->pm.dpm.vce_active; ++ + ret = radeon_dpm_pre_set_power_state(rdev); + if (ret) + goto done; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0033-drm-radeon-dpm-fetch-vce-states-from-the-vbios.patch b/common/recipes-kernel/linux/linux-amd/0033-drm-radeon-dpm-fetch-vce-states-from-the-vbios.patch new file mode 100644 index 00000000..f62d3b47 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0033-drm-radeon-dpm-fetch-vce-states-from-the-vbios.patch @@ -0,0 +1,110 @@ +From 93b2ef800055caf91ec55ee415a6c7960b454805 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Wed, 4 Sep 2013 16:13:56 -0400 +Subject: [PATCH 33/60] drm/radeon/dpm: fetch vce states from the vbios + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/r600_dpm.c | 28 +++++++++++++++++++++++++++- + drivers/gpu/drm/radeon/radeon.h | 16 ++++++++++++++++ + 2 files changed, 43 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c +index 63c5be0..2b2a876 100644 +--- a/drivers/gpu/drm/radeon/r600_dpm.c ++++ b/drivers/gpu/drm/radeon/r600_dpm.c +@@ -1050,7 +1050,15 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + + 1 + array->ucNumEntries * sizeof(VCEClockInfo)); ++ ATOM_PPLIB_VCE_State_Table *states = ++ (ATOM_PPLIB_VCE_State_Table *) ++ (mode_info->atom_context->bios + data_offset + ++ le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + ++ 1 + (array->ucNumEntries * sizeof (VCEClockInfo)) + ++ 1 + (limits->numEntries * sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record))); + ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry; ++ ATOM_PPLIB_VCE_State_Record *state_entry; ++ VCEClockInfo *vce_clk; + u32 size = limits->numEntries * + sizeof(struct radeon_vce_clock_voltage_dependency_entry); + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = +@@ -1062,8 +1070,9 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = + limits->numEntries; + entry = &limits->entries[0]; ++ state_entry = &states->entries[0]; + for (i = 0; i < limits->numEntries; i++) { +- VCEClockInfo *vce_clk = (VCEClockInfo *) ++ vce_clk = (VCEClockInfo *) + ((u8 *)&array->entries[0] + + (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk = +@@ -1075,6 +1084,23 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) + entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record)); + } ++ for (i = 0; i < states->numEntries; i++) { ++ if (i >= RADEON_MAX_VCE_LEVELS) ++ break; ++ vce_clk = (VCEClockInfo *) ++ ((u8 *)&array->entries[0] + ++ (state_entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); ++ rdev->pm.dpm.vce_states[i].evclk = ++ le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16); ++ rdev->pm.dpm.vce_states[i].ecclk = ++ le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16); ++ rdev->pm.dpm.vce_states[i].clk_idx = ++ state_entry->ucClockInfoIndex & 0x3f; ++ rdev->pm.dpm.vce_states[i].pstate = ++ (state_entry->ucClockInfoIndex & 0xc0) >> 6; ++ state_entry = (ATOM_PPLIB_VCE_State_Record *) ++ ((u8 *)state_entry + sizeof(ATOM_PPLIB_VCE_State_Record)); ++ } + } + if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) && + ext_hdr->usUVDTableOffset) { +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 12b1854..fb5ffa4 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1259,6 +1259,8 @@ enum radeon_dpm_event_src { + RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 + }; + ++#define RADEON_MAX_VCE_LEVELS 6 ++ + enum radeon_vce_level { + RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ + RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ +@@ -1454,6 +1456,17 @@ enum radeon_dpm_forced_level { + RADEON_DPM_FORCED_LEVEL_HIGH = 2, + }; + ++struct radeon_vce_state { ++ /* vce clocks */ ++ u32 evclk; ++ u32 ecclk; ++ /* gpu clocks */ ++ u32 sclk; ++ u32 mclk; ++ u8 clk_idx; ++ u8 pstate; ++}; ++ + struct radeon_dpm { + struct radeon_ps *ps; + /* number of valid power states */ +@@ -1466,6 +1479,9 @@ struct radeon_dpm { + struct radeon_ps *boot_ps; + /* default uvd power state */ + struct radeon_ps *uvd_ps; ++ /* vce requirements */ ++ struct radeon_vce_state vce_states[RADEON_MAX_VCE_LEVELS]; ++ enum radeon_vce_level vce_level; + enum radeon_pm_state_type state; + enum radeon_pm_state_type user_state; + u32 platform_caps; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0034-drm-radeon-fill-in-set_vce_clocks-for-CIK-asics.patch b/common/recipes-kernel/linux/linux-amd/0034-drm-radeon-fill-in-set_vce_clocks-for-CIK-asics.patch new file mode 100644 index 00000000..3c878d7e --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0034-drm-radeon-fill-in-set_vce_clocks-for-CIK-asics.patch @@ -0,0 +1,111 @@ +From f3a3992d2b13f43b335aa189cdcd0e4febe3d4fb Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Thu, 22 Aug 2013 17:09:06 -0400 +Subject: [PATCH 34/60] drm/radeon: fill in set_vce_clocks for CIK asics + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 35 +++++++++++++++++++++++++++++++++++ + drivers/gpu/drm/radeon/cikd.h | 6 ++++++ + drivers/gpu/drm/radeon/radeon_asic.c | 2 ++ + drivers/gpu/drm/radeon/radeon_asic.h | 1 + + 4 files changed, 44 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index e256340..9d2762d 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -8202,6 +8202,41 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) + return r; + } + ++int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) ++{ ++ int r, i; ++ struct atom_clock_dividers dividers; ++ u32 tmp; ++ ++ r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, ++ ecclk, false, ÷rs); ++ if (r) ++ return r; ++ ++ for (i = 0; i < 100; i++) { ++ if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) ++ break; ++ mdelay(10); ++ } ++ if (i == 100) ++ return -ETIMEDOUT; ++ ++ tmp = RREG32_SMC(CG_ECLK_CNTL); ++ tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK); ++ tmp |= dividers.post_divider; ++ WREG32_SMC(CG_ECLK_CNTL, tmp); ++ ++ for (i = 0; i < 100; i++) { ++ if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) ++ break; ++ mdelay(10); ++ } ++ if (i == 100) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ + static void cik_pcie_gen3_enable(struct radeon_device *rdev) + { + struct pci_dev *root = rdev->pdev->bus->self; +diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h +index 54eb8be..481d56e 100644 +--- a/drivers/gpu/drm/radeon/cikd.h ++++ b/drivers/gpu/drm/radeon/cikd.h +@@ -201,6 +201,12 @@ + #define CTF_TEMP_MASK 0x0003fe00 + #define CTF_TEMP_SHIFT 9 + ++#define CG_ECLK_CNTL 0xC05000AC ++# define ECLK_DIVIDER_MASK 0x7f ++# define ECLK_DIR_CNTL_EN (1 << 8) ++#define CG_ECLK_STATUS 0xC05000B0 ++# define ECLK_STATUS (1 << 0) ++ + #define CG_SPLL_FUNC_CNTL 0xC0500140 + #define SPLL_RESET (1 << 0) + #define SPLL_PWRON (1 << 1) +diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c +index 763280b..19b2eea 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.c ++++ b/drivers/gpu/drm/radeon/radeon_asic.c +@@ -2067,6 +2067,7 @@ static struct radeon_asic ci_asic = { + .set_pcie_lanes = NULL, + .set_clock_gating = NULL, + .set_uvd_clocks = &cik_set_uvd_clocks, ++ .set_vce_clocks = &cik_set_vce_clocks, + .get_temperature = &ci_get_temp, + }, + .dpm = { +@@ -2170,6 +2171,7 @@ static struct radeon_asic kv_asic = { + .set_pcie_lanes = NULL, + .set_clock_gating = NULL, + .set_uvd_clocks = &cik_set_uvd_clocks, ++ .set_vce_clocks = &cik_set_vce_clocks, + .get_temperature = &kv_get_temp, + }, + .dpm = { +diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h +index a6c3eeb..900ffd7 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.h ++++ b/drivers/gpu/drm/radeon/radeon_asic.h +@@ -710,6 +710,7 @@ u32 cik_get_xclk(struct radeon_device *rdev); + uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg); + void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); + int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); ++int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); + void cik_sdma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); + bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0035-drm-radeon-add-vce-dpm-support-for-CI.patch b/common/recipes-kernel/linux/linux-amd/0035-drm-radeon-add-vce-dpm-support-for-CI.patch new file mode 100644 index 00000000..c7afb042 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0035-drm-radeon-add-vce-dpm-support-for-CI.patch @@ -0,0 +1,149 @@ +From 4f1d80b8061b86af39361b48df9be82aef437188 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Fri, 23 Aug 2013 11:05:24 -0400 +Subject: [PATCH 35/60] drm/radeon: add vce dpm support for CI + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/ci_dpm.c | 50 ++++++++++++++++++++++++++++++++--------- + 1 file changed, 40 insertions(+), 10 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c +index 56a517b..7fa91b4 100644 +--- a/drivers/gpu/drm/radeon/ci_dpm.c ++++ b/drivers/gpu/drm/radeon/ci_dpm.c +@@ -717,6 +717,14 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; + int i; + ++ if (rps->vce_active) { ++ rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; ++ rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; ++ } else { ++ rps->evclk = 0; ++ rps->ecclk = 0; ++ } ++ + if ((rdev->pm.dpm.new_active_crtc_count > 1) || + ci_dpm_vblank_too_short(rdev)) + disable_mclk_switching = true; +@@ -775,6 +783,13 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, + sclk = ps->performance_levels[0].sclk; + } + ++ if (rps->vce_active) { ++ if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) ++ sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; ++ if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk) ++ mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk; ++ } ++ + ps->performance_levels[0].sclk = sclk; + ps->performance_levels[0].mclk = mclk; + +@@ -3442,7 +3457,6 @@ static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable) + 0 : -EINVAL; + } + +-#if 0 + static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) + { + struct ci_power_info *pi = ci_get_pi(rdev); +@@ -3475,6 +3489,7 @@ static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) + 0 : -EINVAL; + } + ++#if 0 + static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable) + { + struct ci_power_info *pi = ci_get_pi(rdev); +@@ -3561,7 +3576,6 @@ static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate) + return ci_enable_uvd_dpm(rdev, !gate); + } + +-#if 0 + static u8 ci_get_vce_boot_level(struct radeon_device *rdev) + { + u8 i; +@@ -3582,13 +3596,11 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, + struct radeon_ps *radeon_current_state) + { + struct ci_power_info *pi = ci_get_pi(rdev); +- bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0); +- bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0); + int ret = 0; + u32 tmp; + +- if (new_vce_clock_non_zero != old_vce_clock_non_zero) { +- if (new_vce_clock_non_zero) { ++ if (radeon_current_state->evclk != radeon_new_state->evclk) { ++ if (radeon_new_state->evclk) { + pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); + + tmp = RREG32_SMC(DPM_TABLE_475); +@@ -3604,6 +3616,7 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, + return ret; + } + ++#if 0 + static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate) + { + return ci_enable_samu_dpm(rdev, gate); +@@ -4740,13 +4753,13 @@ int ci_dpm_set_power_state(struct radeon_device *rdev) + DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); + return ret; + } +-#if 0 ++ + ret = ci_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("ci_update_vce_dpm failed\n"); + return ret; + } +-#endif ++ + ret = ci_update_sclk_t(rdev); + if (ret) { + DRM_ERROR("ci_update_sclk_t failed\n"); +@@ -4990,6 +5003,21 @@ static int ci_parse_power_table(struct radeon_device *rdev) + power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + } + rdev->pm.dpm.num_ps = state_array->ucNumEntries; ++ ++ /* fill in the vce power states */ ++ for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { ++ u32 sclk, mclk; ++ clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; ++ clock_info = (union pplib_clock_info *) ++ &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; ++ sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); ++ sclk |= clock_info->ci.ucEngineClockHigh << 16; ++ mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); ++ mclk |= clock_info->ci.ucMemoryClockHigh << 16; ++ rdev->pm.dpm.vce_states[i].sclk = sclk; ++ rdev->pm.dpm.vce_states[i].mclk = mclk; ++ } ++ + return 0; + } + +@@ -5075,12 +5103,14 @@ int ci_dpm_init(struct radeon_device *rdev) + ci_dpm_fini(rdev); + return ret; + } +- ret = ci_parse_power_table(rdev); ++ ++ ret = r600_parse_extended_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } +- ret = r600_parse_extended_power_table(rdev); ++ ++ ret = ci_parse_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0036-drm-radeon-enable-vce-dpm-on-CI.patch b/common/recipes-kernel/linux/linux-amd/0036-drm-radeon-enable-vce-dpm-on-CI.patch new file mode 100644 index 00000000..6f06219c --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0036-drm-radeon-enable-vce-dpm-on-CI.patch @@ -0,0 +1,28 @@ +From 7bc6dbef723886cbf045305a6099df0bb741ceb6 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Fri, 23 Aug 2013 11:09:21 -0400 +Subject: [PATCH 36/60] drm/radeon: enable vce dpm on CI + +VCE dpm dynamically adjusts the uvd clocks on +demand. + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/ci_dpm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c +index 7fa91b4..4601fa8 100644 +--- a/drivers/gpu/drm/radeon/ci_dpm.c ++++ b/drivers/gpu/drm/radeon/ci_dpm.c +@@ -5152,6 +5152,7 @@ int ci_dpm_init(struct radeon_device *rdev) + pi->caps_sclk_throttle_low_notification = false; + + pi->caps_uvd_dpm = true; ++ pi->caps_vce_dpm = true; + + ci_get_leakage_voltages(rdev); + ci_patch_dependency_tables_with_leakage(rdev); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0037-drm-radeon-add-vce-dpm-support-for-KV-KB.patch b/common/recipes-kernel/linux/linux-amd/0037-drm-radeon-add-vce-dpm-support-for-KV-KB.patch new file mode 100644 index 00000000..f4f1d53d --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0037-drm-radeon-add-vce-dpm-support-for-KV-KB.patch @@ -0,0 +1,169 @@ +From 3383f18a3cd0505bcb2a3abcb745c371685e38f5 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Wed, 4 Sep 2013 16:17:07 -0400 +Subject: [PATCH 37/60] drm/radeon: add vce dpm support for KV/KB + +TODO: plug in cik_vce_suspend()/resume() so we can enable +vce powergating. See XXX in code. + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/kv_dpm.c | 46 +++++++++++++++++++++++++++++++---------- + 1 file changed, 35 insertions(+), 11 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c +index 58b5a5d..feacd5c 100644 +--- a/drivers/gpu/drm/radeon/kv_dpm.c ++++ b/drivers/gpu/drm/radeon/kv_dpm.c +@@ -1346,13 +1346,11 @@ static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable) + PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable); + } + +-#if 0 + static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable) + { + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable); + } +-#endif + + static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable) + { +@@ -1397,7 +1395,6 @@ static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate) + return kv_enable_uvd_dpm(rdev, !gate); + } + +-#if 0 + static u8 kv_get_vce_boot_level(struct radeon_device *rdev) + { + u8 i; +@@ -1422,6 +1419,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, + int ret; + + if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { ++ kv_dpm_powergate_vce(rdev, false); ++ /* XXX cik_vce_resume(); */ + if (pi->caps_stable_p_state) + pi->vce_boot_level = table->count - 1; + else +@@ -1444,11 +1443,12 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, + kv_enable_vce_dpm(rdev, true); + } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { + kv_enable_vce_dpm(rdev, false); ++ /* XXX cik_vce_suspend(); */ ++ kv_dpm_powergate_vce(rdev, true); + } + + return 0; + } +-#endif + + static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) + { +@@ -1776,7 +1776,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) + { + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; +- /*struct radeon_ps *old_ps = &pi->current_rps;*/ ++ struct radeon_ps *old_ps = &pi->current_rps; + int ret; + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | +@@ -1811,13 +1811,12 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) + kv_set_enabled_levels(rdev); + kv_force_lowest_valid(rdev); + kv_unforce_levels(rdev); +-#if 0 ++ + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("kv_update_vce_dpm failed\n"); + return ret; + } +-#endif + kv_update_sclk_t(rdev); + } + } else { +@@ -1836,13 +1835,11 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) + kv_program_nbps_index_settings(rdev, new_ps); + kv_freeze_sclk_dpm(rdev, false); + kv_set_enabled_levels(rdev); +-#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("kv_update_vce_dpm failed\n"); + return ret; + } +-#endif + kv_update_acp_boot_level(rdev); + kv_update_sclk_t(rdev); + kv_enable_nb_dpm(rdev); +@@ -2055,6 +2052,14 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + struct radeon_clock_and_voltage_limits *max_limits = + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + ++ if (new_rps->vce_active) { ++ new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; ++ new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; ++ } else { ++ new_rps->evclk = 0; ++ new_rps->ecclk = 0; ++ } ++ + mclk = max_limits->mclk; + sclk = min_sclk; + +@@ -2074,6 +2079,11 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + sclk = stable_p_state_sclk; + } + ++ if (new_rps->vce_active) { ++ if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) ++ sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; ++ } ++ + ps->need_dfs_bypass = true; + + for (i = 0; i < ps->num_levels; i++) { +@@ -2110,7 +2120,8 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + } + } + +- pi->video_start = new_rps->dclk || new_rps->vclk; ++ pi->video_start = new_rps->dclk || new_rps->vclk || ++ new_rps->evclk || new_rps->ecclk; + + if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == + ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) +@@ -2592,6 +2603,19 @@ static int kv_parse_power_table(struct radeon_device *rdev) + power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + } + rdev->pm.dpm.num_ps = state_array->ucNumEntries; ++ ++ /* fill in the vce power states */ ++ for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { ++ u32 sclk; ++ clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; ++ clock_info = (union pplib_clock_info *) ++ &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; ++ sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); ++ sclk |= clock_info->sumo.ucEngineClockHigh << 16; ++ rdev->pm.dpm.vce_states[i].sclk = sclk; ++ rdev->pm.dpm.vce_states[i].mclk = 0; ++ } ++ + return 0; + } + +@@ -2642,7 +2666,7 @@ int kv_dpm_init(struct radeon_device *rdev) + pi->caps_fps = false; /* true? */ + pi->caps_uvd_pg = true; + pi->caps_uvd_dpm = true; +- pi->caps_vce_pg = false; ++ pi->caps_vce_pg = false; /* XXX true */ + pi->caps_samu_pg = false; + pi->caps_acp_pg = false; + pi->caps_stable_p_state = false; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0038-drm-radeon-dpm-enable-dynamic-vce-state-switching-v2.patch b/common/recipes-kernel/linux/linux-amd/0038-drm-radeon-dpm-enable-dynamic-vce-state-switching-v2.patch new file mode 100644 index 00000000..9834a582 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0038-drm-radeon-dpm-enable-dynamic-vce-state-switching-v2.patch @@ -0,0 +1,195 @@ +From ac2d716ed1a3a0c722da697eec4f9e3081deee95 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Fri, 23 Aug 2013 11:56:26 -0400 +Subject: [PATCH 38/60] drm/radeon/dpm: enable dynamic vce state switching v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +enable vce states when vce is active. When vce is active, +it adjusts the currently selected state (performance, battery, +uvd, etc.) + +v2: add code comments + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 3 ++ + drivers/gpu/drm/radeon/radeon_cs.c | 3 ++ + drivers/gpu/drm/radeon/radeon_pm.c | 17 ++++++++++ + drivers/gpu/drm/radeon/radeon_vce.c | 62 +++++++++++++++++++++++++++++++++++++ + 4 files changed, 85 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index fb5ffa4..a4d6f82 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1518,6 +1518,7 @@ struct radeon_dpm { + }; + + void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable); ++void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable); + + struct radeon_pm { + struct mutex mutex; +@@ -1639,6 +1640,7 @@ struct radeon_vce { + unsigned fb_version; + atomic_t handles[RADEON_MAX_VCE_HANDLES]; + struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; ++ struct delayed_work idle_work; + }; + + int radeon_vce_init(struct radeon_device *rdev); +@@ -1650,6 +1652,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, + int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); + void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); ++void radeon_vce_note_usage(struct radeon_device *rdev); + int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); + int radeon_vce_cs_parse(struct radeon_cs_parser *p); + bool radeon_vce_semaphore_emit(struct radeon_device *rdev, +diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c +index 9d4e25d..848266c 100644 +--- a/drivers/gpu/drm/radeon/radeon_cs.c ++++ b/drivers/gpu/drm/radeon/radeon_cs.c +@@ -407,6 +407,9 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, + + if (parser->ring == R600_RING_TYPE_UVD_INDEX) + radeon_uvd_note_usage(rdev); ++ else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || ++ (parser->ring == TN_RING_TYPE_VCE2_INDEX)) ++ radeon_vce_note_usage(rdev); + + radeon_cs_sync_rings(parser); + r = radeon_ib_schedule(rdev, &parser->ib, NULL); +diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c +index f8afbf9..5f3a536 100644 +--- a/drivers/gpu/drm/radeon/radeon_pm.c ++++ b/drivers/gpu/drm/radeon/radeon_pm.c +@@ -985,6 +985,23 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) + } + } + ++void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable) ++{ ++ if (enable) { ++ mutex_lock(&rdev->pm.mutex); ++ rdev->pm.dpm.vce_active = true; ++ /* XXX select vce level based on ring/task */ ++ rdev->pm.dpm.vce_level = RADEON_VCE_LEVEL_AC_ALL; ++ mutex_unlock(&rdev->pm.mutex); ++ } else { ++ mutex_lock(&rdev->pm.mutex); ++ rdev->pm.dpm.vce_active = false; ++ mutex_unlock(&rdev->pm.mutex); ++ } ++ ++ radeon_pm_compute_clocks(rdev); ++} ++ + static void radeon_pm_suspend_old(struct radeon_device *rdev) + { + mutex_lock(&rdev->pm.mutex); +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +index f46563b..d130432 100644 +--- a/drivers/gpu/drm/radeon/radeon_vce.c ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -34,11 +34,16 @@ + #include "radeon_asic.h" + #include "sid.h" + ++/* 1 second timeout */ ++#define VCE_IDLE_TIMEOUT_MS 1000 ++ + /* Firmware Names */ + #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" + + MODULE_FIRMWARE(FIRMWARE_BONAIRE); + ++static void radeon_vce_idle_work_handler(struct work_struct *work); ++ + /** + * radeon_vce_init - allocate memory, load vce firmware + * +@@ -55,6 +60,8 @@ int radeon_vce_init(struct radeon_device *rdev) + uint8_t start, mid, end; + int i, r; + ++ INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler); ++ + switch (rdev->family) { + case CHIP_BONAIRE: + case CHIP_KAVERI: +@@ -220,6 +227,59 @@ int radeon_vce_resume(struct radeon_device *rdev) + } + + /** ++ * radeon_vce_idle_work_handler - power off VCE ++ * ++ * @work: pointer to work structure ++ * ++ * power of VCE when it's not used any more ++ */ ++static void radeon_vce_idle_work_handler(struct work_struct *work) ++{ ++ struct radeon_device *rdev = ++ container_of(work, struct radeon_device, vce.idle_work.work); ++ ++ if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) && ++ (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) { ++ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { ++ radeon_dpm_enable_vce(rdev, false); ++ } else { ++ radeon_set_vce_clocks(rdev, 0, 0); ++ } ++ } else { ++ schedule_delayed_work(&rdev->vce.idle_work, ++ msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); ++ } ++} ++ ++/** ++ * radeon_vce_note_usage - power up VCE ++ * ++ * @rdev: radeon_device pointer ++ * ++ * Make sure VCE is powerd up when we want to use it ++ */ ++void radeon_vce_note_usage(struct radeon_device *rdev) ++{ ++ bool streams_changed = false; ++ bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work); ++ set_clocks &= schedule_delayed_work(&rdev->vce.idle_work, ++ msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); ++ ++ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { ++ /* XXX figure out if the streams changed */ ++ streams_changed = false; ++ } ++ ++ if (set_clocks || streams_changed) { ++ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { ++ radeon_dpm_enable_vce(rdev, true); ++ } else { ++ radeon_set_vce_clocks(rdev, 53300, 40000); ++ } ++ } ++} ++ ++/** + * radeon_vce_free_handles - free still open VCE handles + * + * @rdev: radeon_device pointer +@@ -235,6 +295,8 @@ void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) + if (!handle || rdev->vce.filp[i] != filp) + continue; + ++ radeon_vce_note_usage(rdev); ++ + r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, + handle, NULL); + if (r) +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0039-drm-radeon-dpm-properly-enable-disable-vce-when-vce-.patch b/common/recipes-kernel/linux/linux-amd/0039-drm-radeon-dpm-properly-enable-disable-vce-when-vce-.patch new file mode 100644 index 00000000..d97637dd --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0039-drm-radeon-dpm-properly-enable-disable-vce-when-vce-.patch @@ -0,0 +1,56 @@ +From 8d7351e114e9fa3a918f8b1765ee7e645c0af271 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Wed, 28 Aug 2013 18:53:50 -0400 +Subject: [PATCH 39/60] drm/radeon/dpm: properly enable/disable vce when vce pg + is enabled + +The adds the appropriate function calls to properly re-init +vce before it's used after it has been power gated. + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/kv_dpm.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c +index feacd5c..c8b9d7b 100644 +--- a/drivers/gpu/drm/radeon/kv_dpm.c ++++ b/drivers/gpu/drm/radeon/kv_dpm.c +@@ -1420,7 +1420,6 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, + + if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { + kv_dpm_powergate_vce(rdev, false); +- /* XXX cik_vce_resume(); */ + if (pi->caps_stable_p_state) + pi->vce_boot_level = table->count - 1; + else +@@ -1443,7 +1442,6 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, + kv_enable_vce_dpm(rdev, true); + } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { + kv_enable_vce_dpm(rdev, false); +- /* XXX cik_vce_suspend(); */ + kv_dpm_powergate_vce(rdev, true); + } + +@@ -1583,11 +1581,16 @@ static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate) + pi->vce_power_gated = gate; + + if (gate) { +- if (pi->caps_vce_pg) ++ if (pi->caps_vce_pg) { ++ /* XXX do we need a vce_v1_0_stop() ? */ + kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF); ++ } + } else { +- if (pi->caps_vce_pg) ++ if (pi->caps_vce_pg) { + kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON); ++ vce_v2_0_resume(rdev); ++ vce_v1_0_start(rdev); ++ } + } + } + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0040-drm-radeon-add-vce-debugfs-support.patch b/common/recipes-kernel/linux/linux-amd/0040-drm-radeon-add-vce-debugfs-support.patch new file mode 100644 index 00000000..7c1f0f58 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0040-drm-radeon-add-vce-debugfs-support.patch @@ -0,0 +1,98 @@ +From 903b57d77d6bca3f437a15bd4fe821453018e958 Mon Sep 17 00:00:00 2001 +From: Leo Liu <leo.liu@amd.com> +Date: Mon, 25 Nov 2013 17:30:38 -0500 +Subject: [PATCH 40/60] drm/radeon: add vce debugfs support + +Signed-off-by: Leo Liu <leo.liu@amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 1 + + drivers/gpu/drm/radeon/radeon_vce.c | 37 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 38 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index a4d6f82..d3f1389 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -1641,6 +1641,7 @@ struct radeon_vce { + atomic_t handles[RADEON_MAX_VCE_HANDLES]; + struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; + struct delayed_work idle_work; ++ bool status; + }; + + int radeon_vce_init(struct radeon_device *rdev); +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +index d130432..eb11ac0 100644 +--- a/drivers/gpu/drm/radeon/radeon_vce.c ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -41,6 +41,7 @@ + #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" + + MODULE_FIRMWARE(FIRMWARE_BONAIRE); ++static int radeon_debugfs_vce_init(struct radeon_device *rdev); + + static void radeon_vce_idle_work_handler(struct work_struct *work); + +@@ -146,6 +147,10 @@ int radeon_vce_init(struct radeon_device *rdev) + rdev->vce.filp[i] = NULL; + } + ++ r = radeon_debugfs_vce_init(rdev); ++ if (r) ++ dev_err(rdev->dev, "(%d) Register debugfs file for vce failed\n", r); ++ + return 0; + } + +@@ -249,6 +254,7 @@ static void radeon_vce_idle_work_handler(struct work_struct *work) + schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); + } ++ rdev->vce.status = false; + } + + /** +@@ -276,9 +282,40 @@ void radeon_vce_note_usage(struct radeon_device *rdev) + } else { + radeon_set_vce_clocks(rdev, 53300, 40000); + } ++ rdev->vce.status = true; + } + } + ++/* ++ * Debugfs info ++ */ ++#if defined(CONFIG_DEBUG_FS) ++ ++static int radeon_debugfs_vce_info(struct seq_file *m, void *data) ++{ ++ struct drm_info_node *node = (struct drm_info_node *) m->private; ++ struct drm_device *dev = node->minor->dev; ++ struct radeon_device *rdev = dev->dev_private; ++ ++ seq_printf(m, "VCE Status: %s\n", ((rdev->vce.status) ? "Busy" : "Idle")); ++ ++ return 0; ++} ++ ++static struct drm_info_list radeon_vce_info_list[] = { ++ {"radeon_vce_info", radeon_debugfs_vce_info, 0, NULL}, ++}; ++#endif ++ ++static int radeon_debugfs_vce_init(struct radeon_device *rdev) ++{ ++#if defined(CONFIG_DEBUG_FS) ++ return radeon_debugfs_add_files(rdev, radeon_vce_info_list, ARRAY_SIZE(radeon_vce_info_list)); ++#else ++ return 0; ++#endif ++} ++ + /** + * radeon_vce_free_handles - free still open VCE handles + * +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0041-drm-radeon-add-support-for-vce-2.0-clock-gating.patch b/common/recipes-kernel/linux/linux-amd/0041-drm-radeon-add-support-for-vce-2.0-clock-gating.patch new file mode 100644 index 00000000..03629691 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0041-drm-radeon-add-support-for-vce-2.0-clock-gating.patch @@ -0,0 +1,165 @@ +From e9b1866c0dba795476cc4bdbafa39586ff443b25 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Thu, 5 Sep 2013 15:14:28 -0400 +Subject: [PATCH 41/60] drm/radeon: add support for vce 2.0 clock gating + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/cikd.h | 10 ++++ + drivers/gpu/drm/radeon/vce_v2_0.c | 111 ++++++++++++++++++++++++++++++++++++++ + 2 files changed, 121 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h +index 481d56e..26114a3 100644 +--- a/drivers/gpu/drm/radeon/cikd.h ++++ b/drivers/gpu/drm/radeon/cikd.h +@@ -1927,8 +1927,18 @@ + #define VCE_RB_RPTR 0x2018c + #define VCE_RB_WPTR 0x20190 + #define VCE_CLOCK_GATING_A 0x202f8 ++# define CGC_CLK_GATE_DLY_TIMER_MASK (0xf << 0) ++# define CGC_CLK_GATE_DLY_TIMER(x) ((x) << 0) ++# define CGC_CLK_GATER_OFF_DLY_TIMER_MASK (0xff << 4) ++# define CGC_CLK_GATER_OFF_DLY_TIMER(x) ((x) << 4) ++# define CGC_UENC_WAIT_AWAKE (1 << 18) + #define VCE_CLOCK_GATING_B 0x202fc ++#define VCE_CGTT_CLK_OVERRIDE 0x207a0 + #define VCE_UENC_CLOCK_GATING 0x207bc ++# define CLOCK_ON_DELAY_MASK (0xf << 0) ++# define CLOCK_ON_DELAY(x) ((x) << 0) ++# define CLOCK_OFF_DELAY_MASK (0xff << 4) ++# define CLOCK_OFF_DELAY(x) ((x) << 4) + #define VCE_UENC_REG_CLOCK_GATING 0x207c0 + #define VCE_SYS_INT_EN 0x21300 + # define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) +diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c +index 4911d1b..1ac7bb8 100644 +--- a/drivers/gpu/drm/radeon/vce_v2_0.c ++++ b/drivers/gpu/drm/radeon/vce_v2_0.c +@@ -31,6 +31,115 @@ + #include "radeon_asic.h" + #include "cikd.h" + ++static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) ++{ ++ u32 tmp; ++ ++ if (gated) { ++ tmp = RREG32(VCE_CLOCK_GATING_B); ++ tmp |= 0xe70000; ++ WREG32(VCE_CLOCK_GATING_B, tmp); ++ ++ tmp = RREG32(VCE_UENC_CLOCK_GATING); ++ tmp |= 0xff000000; ++ WREG32(VCE_UENC_CLOCK_GATING, tmp); ++ ++ tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); ++ tmp &= ~0x3fc; ++ WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); ++ ++ WREG32(VCE_CGTT_CLK_OVERRIDE, 0); ++ } else { ++ tmp = RREG32(VCE_CLOCK_GATING_B); ++ tmp |= 0xe7; ++ tmp &= ~0xe70000; ++ WREG32(VCE_CLOCK_GATING_B, tmp); ++ ++ tmp = RREG32(VCE_UENC_CLOCK_GATING); ++ tmp |= 0x1fe000; ++ tmp &= ~0xff000000; ++ WREG32(VCE_UENC_CLOCK_GATING, tmp); ++ ++ tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); ++ tmp |= 0x3fc; ++ WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); ++ } ++} ++ ++static void vce_v2_0_set_dyn_cg(struct radeon_device *rdev, bool gated) ++{ ++ u32 orig, tmp; ++ ++ tmp = RREG32(VCE_CLOCK_GATING_B); ++ tmp &= ~0x00060006; ++ if (gated) { ++ tmp |= 0xe10000; ++ } else { ++ tmp |= 0xe1; ++ tmp &= ~0xe10000; ++ } ++ WREG32(VCE_CLOCK_GATING_B, tmp); ++ ++ orig = tmp = RREG32(VCE_UENC_CLOCK_GATING); ++ tmp &= ~0x1fe000; ++ tmp &= ~0xff000000; ++ if (tmp != orig) ++ WREG32(VCE_UENC_CLOCK_GATING, tmp); ++ ++ orig = tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); ++ tmp &= ~0x3fc; ++ if (tmp != orig) ++ WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); ++ ++ if (gated) ++ WREG32(VCE_CGTT_CLK_OVERRIDE, 0); ++} ++ ++static void vce_v2_0_disable_cg(struct radeon_device *rdev) ++{ ++ WREG32(VCE_CGTT_CLK_OVERRIDE, 7); ++} ++ ++void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable) ++{ ++ bool sw_cg = false; ++ ++ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { ++ if (sw_cg) ++ vce_v2_0_set_sw_cg(rdev, true); ++ else ++ vce_v2_0_set_dyn_cg(rdev, true); ++ } else { ++ vce_v2_0_disable_cg(rdev); ++ ++ if (sw_cg) ++ vce_v2_0_set_sw_cg(rdev, false); ++ else ++ vce_v2_0_set_dyn_cg(rdev, false); ++ } ++} ++ ++static void vce_v2_0_init_cg(struct radeon_device *rdev) ++{ ++ u32 tmp; ++ ++ tmp = RREG32(VCE_CLOCK_GATING_A); ++ tmp &= ~(CGC_CLK_GATE_DLY_TIMER_MASK | CGC_CLK_GATER_OFF_DLY_TIMER_MASK); ++ tmp |= (CGC_CLK_GATE_DLY_TIMER(0) | CGC_CLK_GATER_OFF_DLY_TIMER(4)); ++ tmp |= CGC_UENC_WAIT_AWAKE; ++ WREG32(VCE_CLOCK_GATING_A, tmp); ++ ++ tmp = RREG32(VCE_UENC_CLOCK_GATING); ++ tmp &= ~(CLOCK_ON_DELAY_MASK | CLOCK_OFF_DELAY_MASK); ++ tmp |= (CLOCK_ON_DELAY(0) | CLOCK_OFF_DELAY(4)); ++ WREG32(VCE_UENC_CLOCK_GATING, tmp); ++ ++ tmp = RREG32(VCE_CLOCK_GATING_B); ++ tmp |= 0x10; ++ tmp &= ~0x100000; ++ WREG32(VCE_CLOCK_GATING_B, tmp); ++} ++ + int vce_v2_0_resume(struct radeon_device *rdev) + { + uint64_t addr = rdev->vce.gpu_addr; +@@ -66,5 +175,7 @@ int vce_v2_0_resume(struct radeon_device *rdev) + WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, + ~VCE_SYS_INT_TRAP_INTERRUPT_EN); + ++ vce_v2_0_init_cg(rdev); ++ + return 0; + } +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0042-drm-radeon-cik-enable-disable-vce-cg-when-encoding.patch b/common/recipes-kernel/linux/linux-amd/0042-drm-radeon-cik-enable-disable-vce-cg-when-encoding.patch new file mode 100644 index 00000000..52dfd301 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0042-drm-radeon-cik-enable-disable-vce-cg-when-encoding.patch @@ -0,0 +1,90 @@ +From bfc95d64efcf9ec3590d17ccac0a064f906f8f2c Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Fri, 6 Sep 2013 12:33:04 -0400 +Subject: [PATCH 42/60] drm/radeon/cik: enable/disable vce cg when encoding + +Some of the vce clocks are automatic, others need to +be manually enabled. For ease, just disable cg when +vce is active. + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/ci_dpm.c | 7 ++++++- + drivers/gpu/drm/radeon/cik.c | 5 +++++ + drivers/gpu/drm/radeon/kv_dpm.c | 4 ++++ + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c +index 4601fa8..de28f5b 100644 +--- a/drivers/gpu/drm/radeon/ci_dpm.c ++++ b/drivers/gpu/drm/radeon/ci_dpm.c +@@ -3601,8 +3601,10 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, + + if (radeon_current_state->evclk != radeon_new_state->evclk) { + if (radeon_new_state->evclk) { +- pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); ++ /* turn the clocks on when encoding */ ++ cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); + ++ pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); + tmp = RREG32_SMC(DPM_TABLE_475); + tmp &= ~VceBootLevel_MASK; + tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel); +@@ -3610,6 +3612,9 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, + + ret = ci_enable_vce_dpm(rdev, true); + } else { ++ /* turn the clocks off when not encoding */ ++ cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); ++ + ret = ci_enable_vce_dpm(rdev, false); + } + } +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index 9d2762d..e759595 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -72,6 +72,7 @@ extern void cik_sdma_vm_set_page(struct radeon_device *rdev, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags); ++extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable); + static void cik_rlc_stop(struct radeon_device *rdev); + static void cik_pcie_gen3_enable(struct radeon_device *rdev); + static void cik_program_aspm(struct radeon_device *rdev); +@@ -5414,6 +5415,10 @@ void cik_update_cg(struct radeon_device *rdev, + cik_enable_hdp_mgcg(rdev, enable); + cik_enable_hdp_ls(rdev, enable); + } ++ ++ if (block & RADEON_CG_BLOCK_VCE) { ++ vce_v2_0_enable_mgcg(rdev, enable); ++ } + } + + static void cik_init_cg(struct radeon_device *rdev) +diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c +index c8b9d7b..a100b23 100644 +--- a/drivers/gpu/drm/radeon/kv_dpm.c ++++ b/drivers/gpu/drm/radeon/kv_dpm.c +@@ -1420,6 +1420,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, + + if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { + kv_dpm_powergate_vce(rdev, false); ++ /* turn the clocks on when encoding */ ++ cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); + if (pi->caps_stable_p_state) + pi->vce_boot_level = table->count - 1; + else +@@ -1442,6 +1444,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, + kv_enable_vce_dpm(rdev, true); + } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { + kv_enable_vce_dpm(rdev, false); ++ /* turn the clocks off when not encoding */ ++ cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); + kv_dpm_powergate_vce(rdev, true); + } + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0043-drm-radeon-fix-CP-semaphores-on-CIK.patch b/common/recipes-kernel/linux/linux-amd/0043-drm-radeon-fix-CP-semaphores-on-CIK.patch new file mode 100644 index 00000000..0e94f860 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0043-drm-radeon-fix-CP-semaphores-on-CIK.patch @@ -0,0 +1,117 @@ +From 6aa3711a4b7e021f834b4b5aaa8e15e8a0fdc808 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com> +Date: Wed, 19 Feb 2014 13:21:35 -0500 +Subject: [PATCH 43/60] drm/radeon: fix CP semaphores on CIK +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Christian König <christian.koenig at amd.com> +--- + drivers/gpu/drm/radeon/radeon.h | 4 +++- + drivers/gpu/drm/radeon/radeon_ring.c | 2 +- + drivers/gpu/drm/radeon/radeon_semaphore.c | 19 ++++++++++++++++--- + 3 files changed, 20 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index d3f1389..e98da4b 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -137,6 +137,9 @@ extern int radeon_aspm; + /* max number of rings */ + #define RADEON_NUM_RINGS 8 + ++/* number of hw syncs before falling back on blocking */ ++#define RADEON_NUM_SYNCS 4 ++ + /* hardcode those limit for now */ + #define RADEON_VA_IB_OFFSET (1 << 20) + #define RADEON_VA_RESERVED_SIZE (8 << 20) +@@ -553,7 +556,6 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, + /* + * Semaphores. + */ +-/* everything here is constant */ + struct radeon_semaphore { + struct radeon_sa_bo *sa_bo; + signed waiters; +diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c +index 91457f8..529893fc 100644 +--- a/drivers/gpu/drm/radeon/radeon_ring.c ++++ b/drivers/gpu/drm/radeon/radeon_ring.c +@@ -139,7 +139,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, + } + + /* 64 dwords should be enough for fence too */ +- r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); ++ r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_SYNCS * 8); + if (r) { + dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); + return r; +diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c +index 2b42aa1..9006b32 100644 +--- a/drivers/gpu/drm/radeon/radeon_semaphore.c ++++ b/drivers/gpu/drm/radeon/radeon_semaphore.c +@@ -34,14 +34,15 @@ + int radeon_semaphore_create(struct radeon_device *rdev, + struct radeon_semaphore **semaphore) + { ++ uint32_t *cpu_addr; + int i, r; + + *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); + if (*semaphore == NULL) { + return -ENOMEM; + } +- r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, +- &(*semaphore)->sa_bo, 8, 8, true); ++ r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, ++ 8 * RADEON_NUM_SYNCS, 8, true); + if (r) { + kfree(*semaphore); + *semaphore = NULL; +@@ -49,7 +50,10 @@ int radeon_semaphore_create(struct radeon_device *rdev, + } + (*semaphore)->waiters = 0; + (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); +- *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; ++ ++ cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); ++ for (i = 0; i < RADEON_NUM_SYNCS; ++i) ++ cpu_addr[i] = 0; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + (*semaphore)->sync_to[i] = NULL; +@@ -125,6 +129,7 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, + struct radeon_semaphore *semaphore, + int ring) + { ++ unsigned count = 0; + int i, r; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { +@@ -140,6 +145,12 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, + return -EINVAL; + } + ++ if (++count > RADEON_NUM_SYNCS) { ++ /* not enough room, wait manually */ ++ radeon_fence_wait_locked(fence); ++ continue; ++ } ++ + /* allocate enough space for sync command */ + r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); + if (r) { +@@ -164,6 +175,8 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, + + radeon_ring_commit(rdev, &rdev->ring[i]); + radeon_fence_note_sync(fence, ring); ++ ++ semaphore->gpu_addr += 8; + } + + return 0; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0044-drm-radeon-disable-dynamic-powering-vce.patch b/common/recipes-kernel/linux/linux-amd/0044-drm-radeon-disable-dynamic-powering-vce.patch new file mode 100644 index 00000000..18d9c93a --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0044-drm-radeon-disable-dynamic-powering-vce.patch @@ -0,0 +1,30 @@ +From 429849e276fd02dc7b212c87f8353446ad53d1bd Mon Sep 17 00:00:00 2001 +From: Leo Liu <leo.liu@amd.com> +Date: Mon, 24 Feb 2014 12:55:11 -0500 +Subject: [PATCH 44/60] drm/radeon: disable dynamic powering vce + +--- + drivers/gpu/drm/radeon/radeon_vce.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +index eb11ac0..5bfb726 100644 +--- a/drivers/gpu/drm/radeon/radeon_vce.c ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -245,11 +245,11 @@ static void radeon_vce_idle_work_handler(struct work_struct *work) + + if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) && + (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) { +- if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { ++ /*if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + radeon_dpm_enable_vce(rdev, false); + } else { + radeon_set_vce_clocks(rdev, 0, 0); +- } ++ }*/ + } else { + schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0045-drm-radeon-add-Mullins-chip-family.patch b/common/recipes-kernel/linux/linux-amd/0045-drm-radeon-add-Mullins-chip-family.patch new file mode 100644 index 00000000..0be92260 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0045-drm-radeon-add-Mullins-chip-family.patch @@ -0,0 +1,54 @@ +From 46ca6a03591d1ed06ba2c2ff9376aa9d26d6b060 Mon Sep 17 00:00:00 2001 +From: Drew Moseley <drew_moseley@mentor.com> +Date: Sat, 5 Jul 2014 16:44:20 -0400 +Subject: [PATCH 45/60] drm/radeon: add Mullins chip family + +Mullins is a new CI-based APU. + +Signed-off-by: Samuel Li <samuel.li@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon_asic.c | 1 + + drivers/gpu/drm/radeon/radeon_device.c | 1 + + drivers/gpu/drm/radeon/radeon_family.h | 1 + + 3 files changed, 3 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c +index 19b2eea..4af1b94 100644 +--- a/drivers/gpu/drm/radeon/radeon_asic.c ++++ b/drivers/gpu/drm/radeon/radeon_asic.c +@@ -2484,6 +2484,7 @@ int radeon_asic_init(struct radeon_device *rdev) + break; + case CHIP_KAVERI: + case CHIP_KABINI: ++ case CHIP_MULLINS: + rdev->asic = &kv_asic; + /* set num crtcs */ + if (rdev->family == CHIP_KAVERI) { +diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c +index 841d0e0..9359f89 100644 +--- a/drivers/gpu/drm/radeon/radeon_device.c ++++ b/drivers/gpu/drm/radeon/radeon_device.c +@@ -98,6 +98,7 @@ static const char radeon_family_name[][16] = { + "BONAIRE", + "KAVERI", + "KABINI", ++ "MULLINS", + "LAST", + }; + +diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h +index 3c82890..d082291 100644 +--- a/drivers/gpu/drm/radeon/radeon_family.h ++++ b/drivers/gpu/drm/radeon/radeon_family.h +@@ -96,6 +96,7 @@ enum radeon_family { + CHIP_BONAIRE, + CHIP_KAVERI, + CHIP_KABINI, ++ CHIP_MULLINS, + CHIP_LAST, + }; + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0046-drm-radeon-update-cik-init-for-Mullins.patch b/common/recipes-kernel/linux/linux-amd/0046-drm-radeon-update-cik-init-for-Mullins.patch new file mode 100644 index 00000000..7975edec --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0046-drm-radeon-update-cik-init-for-Mullins.patch @@ -0,0 +1,153 @@ +From 7c8facd79df23175e8288e86d10d3c5e1811cf6f Mon Sep 17 00:00:00 2001 +From: Drew Moseley <drew_moseley@mentor.com> +Date: Sat, 5 Jul 2014 16:45:13 -0400 +Subject: [PATCH 46/60] drm/radeon: update cik init for Mullins. + +Also add golden registers, update firmware loading functions. + +Signed-off-by: Samuel Li <samuel.li@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/cik.c | 71 +++++++++++++++++++++++++++++++++++ + drivers/gpu/drm/radeon/radeon_ucode.h | 1 + + 2 files changed, 72 insertions(+) + +diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c +index e759595..a35db40 100644 +--- a/drivers/gpu/drm/radeon/cik.c ++++ b/drivers/gpu/drm/radeon/cik.c +@@ -53,6 +53,12 @@ MODULE_FIRMWARE("radeon/KABINI_ce.bin"); + MODULE_FIRMWARE("radeon/KABINI_mec.bin"); + MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); + MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); ++MODULE_FIRMWARE("radeon/MULLINS_pfp.bin"); ++MODULE_FIRMWARE("radeon/MULLINS_me.bin"); ++MODULE_FIRMWARE("radeon/MULLINS_ce.bin"); ++MODULE_FIRMWARE("radeon/MULLINS_mec.bin"); ++MODULE_FIRMWARE("radeon/MULLINS_rlc.bin"); ++MODULE_FIRMWARE("radeon/MULLINS_sdma.bin"); + + extern int r600_ih_ring_alloc(struct radeon_device *rdev); + extern void r600_ih_ring_fini(struct radeon_device *rdev); +@@ -1303,6 +1309,43 @@ static const u32 kalindi_mgcg_cgcg_init[] = + 0xd80c, 0xff000ff0, 0x00000100 + }; + ++static const u32 godavari_golden_registers[] = ++{ ++ 0x55e4, 0xff607fff, 0xfc000100, ++ 0x6ed8, 0x00010101, 0x00010000, ++ 0x9830, 0xffffffff, 0x00000000, ++ 0x98302, 0xf00fffff, 0x00000400, ++ 0x6130, 0xffffffff, 0x00010000, ++ 0x5bb0, 0x000000f0, 0x00000070, ++ 0x5bc0, 0xf0311fff, 0x80300000, ++ 0x98f8, 0x73773777, 0x12010001, ++ 0x98fc, 0xffffffff, 0x00000010, ++ 0x8030, 0x00001f0f, 0x0000100a, ++ 0x2f48, 0x73773777, 0x12010001, ++ 0x2408, 0x000fffff, 0x000c007f, ++ 0x8a14, 0xf000003f, 0x00000007, ++ 0x8b24, 0xffffffff, 0x00ff0fff, ++ 0x30a04, 0x0000ff0f, 0x00000000, ++ 0x28a4c, 0x07ffffff, 0x06000000, ++ 0x4d8, 0x00000fff, 0x00000100, ++ 0xd014, 0x00010000, 0x00810001, ++ 0xd814, 0x00010000, 0x00810001, ++ 0x3e78, 0x00000001, 0x00000002, ++ 0xc768, 0x00000008, 0x00000008, ++ 0xc770, 0x00000f00, 0x00000800, ++ 0xc774, 0x00000f00, 0x00000800, ++ 0xc798, 0x00ffffff, 0x00ff7fbf, ++ 0xc79c, 0x00ffffff, 0x00ff7faf, ++ 0x8c00, 0x000000ff, 0x00000001, ++ 0x214f8, 0x01ff01ff, 0x00000002, ++ 0x21498, 0x007ff800, 0x00200000, ++ 0x2015c, 0xffffffff, 0x00000f40, ++ 0x88c4, 0x001f3ae3, 0x00000082, ++ 0x88d4, 0x0000001f, 0x00000010, ++ 0x30934, 0xffffffff, 0x00000000 ++}; ++ ++ + static void cik_init_golden_registers(struct radeon_device *rdev) + { + switch (rdev->family) { +@@ -1334,6 +1377,20 @@ static void cik_init_golden_registers(struct radeon_device *rdev) + kalindi_golden_spm_registers, + (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); + break; ++ case CHIP_MULLINS: ++ radeon_program_register_sequence(rdev, ++ kalindi_mgcg_cgcg_init, ++ (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); ++ radeon_program_register_sequence(rdev, ++ godavari_golden_registers, ++ (const u32)ARRAY_SIZE(godavari_golden_registers)); ++ radeon_program_register_sequence(rdev, ++ kalindi_golden_common_registers, ++ (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); ++ radeon_program_register_sequence(rdev, ++ kalindi_golden_spm_registers, ++ (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); ++ break; + case CHIP_KAVERI: + radeon_program_register_sequence(rdev, + spectre_mgcg_cgcg_init, +@@ -1602,6 +1659,15 @@ static int cik_init_microcode(struct radeon_device *rdev) + rlc_req_size = KB_RLC_UCODE_SIZE * 4; + sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; + break; ++ case CHIP_MULLINS: ++ chip_name = "MULLINS"; ++ pfp_req_size = CIK_PFP_UCODE_SIZE * 4; ++ me_req_size = CIK_ME_UCODE_SIZE * 4; ++ ce_req_size = CIK_CE_UCODE_SIZE * 4; ++ mec_req_size = CIK_MEC_UCODE_SIZE * 4; ++ rlc_req_size = ML_RLC_UCODE_SIZE * 4; ++ sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; ++ break; + default: BUG(); + } + +@@ -2770,6 +2836,7 @@ static void cik_gpu_init(struct radeon_device *rdev) + gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; + break; + case CHIP_KABINI: ++ case CHIP_MULLINS: + default: + rdev->config.cik.max_shader_engines = 1; + rdev->config.cik.max_tile_pipes = 2; +@@ -5044,6 +5111,9 @@ static int cik_rlc_resume(struct radeon_device *rdev) + case CHIP_KABINI: + size = KB_RLC_UCODE_SIZE; + break; ++ case CHIP_MULLINS: ++ size = ML_RLC_UCODE_SIZE; ++ break; + } + + cik_rlc_stop(rdev); +@@ -5791,6 +5861,7 @@ void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) + buffer[count++] = 0x00000000; + break; + case CHIP_KABINI: ++ case CHIP_MULLINS: + buffer[count++] = 0x00000000; /* XXX */ + buffer[count++] = 0x00000000; + break; +diff --git a/drivers/gpu/drm/radeon/radeon_ucode.h b/drivers/gpu/drm/radeon/radeon_ucode.h +index 7e48c35..aa32575 100644 +--- a/drivers/gpu/drm/radeon/radeon_ucode.h ++++ b/drivers/gpu/drm/radeon/radeon_ucode.h +@@ -52,6 +52,7 @@ + #define BONAIRE_RLC_UCODE_SIZE 2048 + #define KB_RLC_UCODE_SIZE 2560 + #define KV_RLC_UCODE_SIZE 2560 ++#define ML_RLC_UCODE_SIZE 2560 + + /* MC */ + #define BTC_MC_UCODE_SIZE 6024 +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0047-drm-radeon-add-Mullins-UVD-support.patch b/common/recipes-kernel/linux/linux-amd/0047-drm-radeon-add-Mullins-UVD-support.patch new file mode 100644 index 00000000..11b34449 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0047-drm-radeon-add-Mullins-UVD-support.patch @@ -0,0 +1,28 @@ +From 673db09585791c81880662e86615f682d355c3f8 Mon Sep 17 00:00:00 2001 +From: Drew Moseley <drew_moseley@mentor.com> +Date: Sat, 5 Jul 2014 16:46:16 -0400 +Subject: [PATCH 47/60] drm/radeon/: add Mullins UVD support. + +Has same version of UVD as other CIK parts. + +Signed-off-by: Samuel Li <samuel.li@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/radeon_uvd.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c +index 839a0f9..77976b6 100644 +--- a/drivers/gpu/drm/radeon/radeon_uvd.c ++++ b/drivers/gpu/drm/radeon/radeon_uvd.c +@@ -99,6 +99,7 @@ int radeon_uvd_init(struct radeon_device *rdev) + case CHIP_BONAIRE: + case CHIP_KABINI: + case CHIP_KAVERI: ++ case CHIP_MULLINS: + fw_name = FIRMWARE_BONAIRE; + break; + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0048-drm-radeon-add-Mullins-dpm-support.patch b/common/recipes-kernel/linux/linux-amd/0048-drm-radeon-add-Mullins-dpm-support.patch new file mode 100644 index 00000000..d88379fc --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0048-drm-radeon-add-Mullins-dpm-support.patch @@ -0,0 +1,124 @@ +From fda91cd04f482af13757925b2778ad4a21d99685 Mon Sep 17 00:00:00 2001 +From: Drew Moseley <drew_moseley@mentor.com> +Date: Sat, 5 Jul 2014 16:47:01 -0400 +Subject: [PATCH 48/60] drm/radeon: add Mullins dpm support. + +Generic dpm support similar to Kabini. Mullins specific features +will be worked on later. + +Signed-off-by: Samuel Li <samuel.li@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/kv_dpm.c | 20 ++++++++++---------- + drivers/gpu/drm/radeon/radeon_pm.c | 1 + + 2 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c +index a100b23..2f5979d 100644 +--- a/drivers/gpu/drm/radeon/kv_dpm.c ++++ b/drivers/gpu/drm/radeon/kv_dpm.c +@@ -639,7 +639,7 @@ static int kv_force_lowest_valid(struct radeon_device *rdev) + + static int kv_unforce_levels(struct radeon_device *rdev) + { +- if (rdev->family == CHIP_KABINI) ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) + return kv_notify_message_to_smu(rdev, PPSMC_MSG_NoForcedLevel); + else + return kv_set_enabled_levels(rdev); +@@ -1625,7 +1625,7 @@ static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate) + if (pi->acp_power_gated == gate) + return; + +- if (rdev->family == CHIP_KABINI) ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) + return; + + pi->acp_power_gated = gate; +@@ -1799,7 +1799,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) + } + } + +- if (rdev->family == CHIP_KABINI) { ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) { + if (pi->enable_dpm) { + kv_set_valid_clock_range(rdev, new_ps); + kv_update_dfs_bypass_settings(rdev, new_ps); +@@ -1880,7 +1880,7 @@ void kv_dpm_reset_asic(struct radeon_device *rdev) + { + struct kv_power_info *pi = kv_get_pi(rdev); + +- if (rdev->family == CHIP_KABINI) { ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) { + kv_force_lowest_valid(rdev); + kv_init_graphics_levels(rdev); + kv_program_bootup_state(rdev); +@@ -1959,7 +1959,7 @@ static int kv_force_dpm_highest(struct radeon_device *rdev) + break; + } + +- if (rdev->family == CHIP_KABINI) ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) + return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + else + return kv_set_enabled_level(rdev, i); +@@ -1979,7 +1979,7 @@ static int kv_force_dpm_lowest(struct radeon_device *rdev) + break; + } + +- if (rdev->family == CHIP_KABINI) ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) + return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + else + return kv_set_enabled_level(rdev, i); +@@ -2136,7 +2136,7 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + else + pi->battery_state = false; + +- if (rdev->family == CHIP_KABINI) { ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) { + ps->dpm0_pg_nb_ps_lo = 0x1; + ps->dpm0_pg_nb_ps_hi = 0x0; + ps->dpmx_nb_ps_lo = 0x1; +@@ -2197,7 +2197,7 @@ static int kv_calculate_nbps_level_settings(struct radeon_device *rdev) + if (pi->lowest_valid > pi->highest_valid) + return -EINVAL; + +- if (rdev->family == CHIP_KABINI) { ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) { + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) { + pi->graphics_level[i].GnbSlow = 1; + pi->graphics_level[i].ForceNbPs1 = 0; +@@ -2342,7 +2342,7 @@ static void kv_program_nbps_index_settings(struct radeon_device *rdev, + struct kv_power_info *pi = kv_get_pi(rdev); + u32 nbdpmconfig1; + +- if (rdev->family == CHIP_KABINI) ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) + return; + + if (pi->sys_info.nb_dpm_enable) { +@@ -2649,7 +2649,7 @@ int kv_dpm_init(struct radeon_device *rdev) + + pi->sram_end = SMC_RAM_END; + +- if (rdev->family == CHIP_KABINI) ++ if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) + pi->high_voltage_t = 4001; + + pi->enable_nb_dpm = true; +diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c +index 5f3a536..e971ec3 100644 +--- a/drivers/gpu/drm/radeon/radeon_pm.c ++++ b/drivers/gpu/drm/radeon/radeon_pm.c +@@ -1300,6 +1300,7 @@ int radeon_pm_init(struct radeon_device *rdev) + case CHIP_BONAIRE: + case CHIP_KABINI: + case CHIP_KAVERI: ++ case CHIP_MULLINS: + /* DPM requires the RLC, RV770+ dGPU requires SMC */ + if (!rdev->rlc_fw) + rdev->pm.pm_method = PM_METHOD_PROFILE; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0049-drm-radeon-modesetting-updates-for-Mullins.patch b/common/recipes-kernel/linux/linux-amd/0049-drm-radeon-modesetting-updates-for-Mullins.patch new file mode 100644 index 00000000..0b571bee --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0049-drm-radeon-modesetting-updates-for-Mullins.patch @@ -0,0 +1,32 @@ +From 9d8e4f860ef39cf4eff5764a5a0468b5b40155ce Mon Sep 17 00:00:00 2001 +From: Drew Moseley <drew_moseley@mentor.com> +Date: Sat, 5 Jul 2014 16:47:43 -0400 +Subject: [PATCH 49/60] drm/radeon: modesetting updates for Mullins. + +Uses the same code as Kabini. + +Signed-off-by: Samuel Li <samuel.li@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/radeon/atombios_crtc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c +index 4be69b5..f50cd53 100644 +--- a/drivers/gpu/drm/radeon/atombios_crtc.c ++++ b/drivers/gpu/drm/radeon/atombios_crtc.c +@@ -1716,8 +1716,9 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) + } + /* otherwise, pick one of the plls */ + if ((rdev->family == CHIP_KAVERI) || +- (rdev->family == CHIP_KABINI)) { +- /* KB/KV has PPLL1 and PPLL2 */ ++ (rdev->family == CHIP_KABINI) || ++ (rdev->family == CHIP_MULLINS)) { ++ /* KB/KV/ML has PPLL1 and PPLL2 */ + pll_in_use = radeon_get_pll_use_mask(crtc); + if (!(pll_in_use & (1 << ATOM_PPLL2))) + return ATOM_PPLL2; +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0050-drm-radeon-add-pci-ids-for-Mullins.patch b/common/recipes-kernel/linux/linux-amd/0050-drm-radeon-add-pci-ids-for-Mullins.patch new file mode 100644 index 00000000..7b58a691 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0050-drm-radeon-add-pci-ids-for-Mullins.patch @@ -0,0 +1,41 @@ +From 4fd9aa991f37e6435a179bd17ffde2ccd49cf210 Mon Sep 17 00:00:00 2001 +From: Drew Moseley <drew_moseley@mentor.com> +Date: Sat, 5 Jul 2014 16:48:26 -0400 +Subject: [PATCH 50/60] drm/radeon: add pci ids for Mullins + +Signed-off-by: Samuel Li <samuel.li@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + include/drm/drm_pciids.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h +index b521d1c..c6be0d8 100644 +--- a/include/drm/drm_pciids.h ++++ b/include/drm/drm_pciids.h +@@ -631,6 +631,22 @@ + {0x1002, 0x983d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x983e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x983f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9851, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9852, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9853, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9854, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9855, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9856, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9857, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9858, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x9859, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x985A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x985B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x985C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ ++ {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0051-drm-radeon-add-Mulins-VCE-support.patch b/common/recipes-kernel/linux/linux-amd/0051-drm-radeon-add-Mulins-VCE-support.patch new file mode 100644 index 00000000..37eee7ca --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0051-drm-radeon-add-Mulins-VCE-support.patch @@ -0,0 +1,25 @@ +From 951b9d2da8816f2f3581f406b1180bc81c1a71bf Mon Sep 17 00:00:00 2001 +From: Leo Liu <leo.liu@amd.com> +Date: Mon, 28 Apr 2014 15:44:55 -0400 +Subject: [PATCH 51/60] drm/radeon: add Mulins VCE support + +Signed-off-by: Leo Liu <leo.liu@amd.com> +--- + drivers/gpu/drm/radeon/radeon_vce.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c +index 5bfb726..aec990e 100644 +--- a/drivers/gpu/drm/radeon/radeon_vce.c ++++ b/drivers/gpu/drm/radeon/radeon_vce.c +@@ -67,6 +67,7 @@ int radeon_vce_init(struct radeon_device *rdev) + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_KABINI: ++ case CHIP_MULLINS: + fw_name = FIRMWARE_BONAIRE; + break; + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0052-i2c-piix4-Use-different-message-for-AMD-Auxiliary-SM.patch b/common/recipes-kernel/linux/linux-amd/0052-i2c-piix4-Use-different-message-for-AMD-Auxiliary-SM.patch new file mode 100644 index 00000000..11948066 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0052-i2c-piix4-Use-different-message-for-AMD-Auxiliary-SM.patch @@ -0,0 +1,33 @@ +From e40b4471e518e9478c97600befd29175a6358b2f Mon Sep 17 00:00:00 2001 +From: Shane Huang <shane.huang@amd.com> +Date: Wed, 22 Jan 2014 14:06:52 -0800 +Subject: [PATCH 52/60] i2c: piix4: Use different message for AMD Auxiliary + SMBus Controller + +Same messages for AMD main and auxiliary SMBus controllers lead to confusion, +this patch is to remove confusion and keep consistent with non-AMD products. + +Signed-off-by: Shane Huang <shane.huang@amd.com> +Reviewed-by: Jean Delvare <khali@linux-fr.org> +Signed-off-by: Wolfram Sang <wsa@the-dreams.de> +--- + drivers/i2c/busses/i2c-piix4.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c +index f71b4d3..d82b8ab 100644 +--- a/drivers/i2c/busses/i2c-piix4.c ++++ b/drivers/i2c/busses/i2c-piix4.c +@@ -295,7 +295,8 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, + /* Aux SMBus does not support IRQ information */ + if (aux) { + dev_info(&PIIX4_dev->dev, +- "SMBus Host Controller at 0x%x\n", piix4_smba); ++ "Auxiliary SMBus Host Controller at 0x%x\n", ++ piix4_smba); + return piix4_smba; + } + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0053-ACPI-Set-acpi_enforce_resources-to-ENFORCE_RESOURCES.patch b/common/recipes-kernel/linux/linux-amd/0053-ACPI-Set-acpi_enforce_resources-to-ENFORCE_RESOURCES.patch new file mode 100644 index 00000000..025802a3 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0053-ACPI-Set-acpi_enforce_resources-to-ENFORCE_RESOURCES.patch @@ -0,0 +1,33 @@ +From 486e4ca33c3ea76848624063dd9426a0107dd06f Mon Sep 17 00:00:00 2001 +From: Drew Moseley <drew_moseley@mentor.com> +Date: Sat, 5 Jul 2014 17:10:52 -0400 +Subject: [PATCH 53/60] ACPI: Set acpi_enforce_resources to + ENFORCE_RESOURCES_LAX + +On some AMD platforms, the SMBus IO region 0xb00 - 0xb07 conflicts with the +corresponding ACPI SystemIO region. This prevents the SMBus host controller +driver to function correctly. We set acpi_enforce_resources to +ENFORCE_RESOURCES_LAX so that the SMBus driver can work normally, even +though a warning message notifying the conflict is printed in system logs. + +Signed-off-by: Arindam Nath <arindam.nath@amd.com> +--- + drivers/acpi/osl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c +index e5f416c..cede198 100644 +--- a/drivers/acpi/osl.c ++++ b/drivers/acpi/osl.c +@@ -1533,7 +1533,7 @@ __setup("acpi_serialize", acpi_serialize_setup); + #define ENFORCE_RESOURCES_LAX 1 + #define ENFORCE_RESOURCES_NO 0 + +-static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_STRICT; ++static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_LAX; + + static int __init acpi_enforce_resources_setup(char *str) + { +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0059-yocto-amd-staging-add-support-to-enable-and-disable-.patch b/common/recipes-kernel/linux/linux-amd/0059-yocto-amd-staging-add-support-to-enable-and-disable-.patch new file mode 100644 index 00000000..4b35a82e --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0059-yocto-amd-staging-add-support-to-enable-and-disable-.patch @@ -0,0 +1,455 @@ +From 524e1b2703dd289d3301d62a67868866f7111a5d Mon Sep 17 00:00:00 2001 +From: Arindam Nath <arindam.nath@amd.com> +Date: Mon, 4 Aug 2014 19:16:53 +0530 +Subject: [PATCH 59/60] yocto: amd: staging: add support to enable and disable + IMC to fetch BIOS code + +The patch essentially adds support for two functions: +amd_imc_enter_scratch_ram() and amd_imc_exit_scratch_ram(). These +functions instruct IMC to stop and start fetching code from BIOS +ROM respectively. These functions are needed where IMC is trying +to fetch code on a shared bus when some other transaction is already +occuring. To prevent IMC to fetch incorrect data from ROM while it +is still being updated, we instruct IMC to temporarily stop fetching +code from BIOS, and then start fetching again when it is safe to do +so. + +Upstream Status: None + +Signed-off-by: Arindam Nath <arindam.nath@amd.com> +--- + drivers/staging/Kconfig | 2 + + drivers/staging/Makefile | 1 + + drivers/staging/amd_imc/Kconfig | 9 ++ + drivers/staging/amd_imc/Makefile | 1 + + drivers/staging/amd_imc/amd_imc.c | 298 ++++++++++++++++++++++++++++++++++++++ + include/linux/amd_imc.h | 68 +++++++++ + 6 files changed, 379 insertions(+) + create mode 100644 drivers/staging/amd_imc/Kconfig + create mode 100644 drivers/staging/amd_imc/Makefile + create mode 100644 drivers/staging/amd_imc/amd_imc.c + create mode 100644 include/linux/amd_imc.h + +diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig +index 3626dbc8..0a95d6d 100644 +--- a/drivers/staging/Kconfig ++++ b/drivers/staging/Kconfig +@@ -148,4 +148,6 @@ source "drivers/staging/dgnc/Kconfig" + + source "drivers/staging/dgap/Kconfig" + ++source "drivers/staging/amd_imc/Kconfig" ++ + endif # STAGING +diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile +index d1b4b80..2be3a91 100644 +--- a/drivers/staging/Makefile ++++ b/drivers/staging/Makefile +@@ -66,3 +66,4 @@ obj-$(CONFIG_USB_BTMTK) += btmtk_usb/ + obj-$(CONFIG_XILLYBUS) += xillybus/ + obj-$(CONFIG_DGNC) += dgnc/ + obj-$(CONFIG_DGAP) += dgap/ ++obj-$(CONFIG_AMD_IMC) += amd_imc/ +diff --git a/drivers/staging/amd_imc/Kconfig b/drivers/staging/amd_imc/Kconfig +new file mode 100644 +index 0000000..ca87061 +--- /dev/null ++++ b/drivers/staging/amd_imc/Kconfig +@@ -0,0 +1,9 @@ ++config AMD_IMC ++ bool "AMD Integrated Micro Controller support" ++ depends on PCI && X86_64 ++ default y ++ ---help--- ++ This driver supports AMD Integrated Micro Controller. ++ ++ To compile this driver as a module, choose M here. The module ++ will be called amd_imc. +diff --git a/drivers/staging/amd_imc/Makefile b/drivers/staging/amd_imc/Makefile +new file mode 100644 +index 0000000..c4837f8 +--- /dev/null ++++ b/drivers/staging/amd_imc/Makefile +@@ -0,0 +1 @@ ++obj-$(CONFIG_AMD_IMC) += amd_imc.o +diff --git a/drivers/staging/amd_imc/amd_imc.c b/drivers/staging/amd_imc/amd_imc.c +new file mode 100644 +index 0000000..c6c6074 +--- /dev/null ++++ b/drivers/staging/amd_imc/amd_imc.c +@@ -0,0 +1,298 @@ ++/***************************************************************************** ++* ++* Copyright (c) 2014, Advanced Micro Devices, Inc. ++* All rights reserved. ++* ++* Redistribution and use in source and binary forms, with or without ++* modification, are permitted provided that the following conditions are met: ++* * Redistributions of source code must retain the above copyright ++* notice, this list of conditions and the following disclaimer. ++* * Redistributions in binary form must reproduce the above copyright ++* notice, this list of conditions and the following disclaimer in the ++* documentation and/or other materials provided with the distribution. ++* * Neither the name of Advanced Micro Devices, Inc. nor the names of ++* its contributors may be used to endorse or promote products derived ++* from this software without specific prior written permission. ++* ++* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++* DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. BE LIABLE FOR ANY ++* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++* ++* ++***************************************************************************/ ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/pci.h> ++#include <linux/ioport.h> ++#include <linux/platform_device.h> ++#include <linux/uaccess.h> ++#include <linux/io.h> ++#include <linux/delay.h> ++#include <linux/amd_imc.h> ++ ++static int imc_enabled; ++static u16 imc_port_addr; ++static u8 msg_reg_base_hi; ++static u8 msg_reg_base_lo; ++static u16 msg_reg_base; ++ ++static struct pci_dev *amd_imc_pci; ++static struct platform_device *amd_imc_platform_device; ++ ++static DEFINE_PCI_DEVICE_TABLE(amd_lpc_pci_tbl) = { ++ {PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LPC_BRIDGE, PCI_ANY_ID, ++ PCI_ANY_ID,}, ++ {} ++}; ++ ++void amd_imc_enter_scratch_ram(void) ++{ ++ u8 byte; ++ ++ if (!imc_enabled) ++ return; ++ ++ /* Instruct IMC to enter scratch RAM */ ++ outb(AMD_MSG_REG0, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ outb(0, msg_reg_base + AMD_MSG_DATA_REG_OFFSET); ++ ++ outb(AMD_MSG_REG1, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ outb(AMD_IMC_ENTER_SCRATCH_RAM, msg_reg_base + AMD_MSG_DATA_REG_OFFSET); ++ ++ outb(AMD_MSG_SYS_TO_IMC, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ outb(AMD_IMC_ROM_OWNERSHIP_SEM, msg_reg_base + ++ AMD_MSG_DATA_REG_OFFSET); ++ ++ /* As per the spec, the firmware may take up to 50ms */ ++ msleep(50); ++ ++ /* read message registger 0 to confirm function completion */ ++ outb(AMD_MSG_REG0, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ byte = inb(msg_reg_base + AMD_MSG_DATA_REG_OFFSET); ++ ++ if (byte == AMD_IMC_FUNC_NOT_SUPP) ++ pr_info("amd_imc: %s not supported\n", __func__); ++ else if (byte == AMD_IMC_FUNC_COMPLETED) ++ pr_info("amd_imc: %s completed\n", __func__); ++} ++EXPORT_SYMBOL_GPL(amd_imc_enter_scratch_ram); ++ ++void amd_imc_exit_scratch_ram(void) ++{ ++ u8 byte; ++ ++ if (!imc_enabled) ++ return; ++ ++ /* Instruct IMC to exit scratch RAM */ ++ outb(AMD_MSG_REG0, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ outb(0, msg_reg_base + AMD_MSG_DATA_REG_OFFSET); ++ ++ outb(AMD_MSG_REG1, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ outb(AMD_IMC_ENTER_SCRATCH_RAM, msg_reg_base + AMD_MSG_DATA_REG_OFFSET); ++ ++ outb(AMD_MSG_SYS_TO_IMC, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ outb(AMD_IMC_ROM_OWNERSHIP_SEM, msg_reg_base + ++ AMD_MSG_DATA_REG_OFFSET); ++ ++ /* As per the spec, the firmware may take up to 50ms */ ++ msleep(50); ++ ++ /* read message registger 0 to confirm function completion */ ++ outb(AMD_MSG_REG0, msg_reg_base + AMD_MSG_INDEX_REG_OFFSET); ++ byte = inb(msg_reg_base + AMD_MSG_DATA_REG_OFFSET); ++ ++ if (byte == AMD_IMC_FUNC_NOT_SUPP) ++ pr_info("amd_imc: %s not supported\n", __func__); ++ else if (byte == AMD_IMC_FUNC_COMPLETED) ++ pr_info("amd_imc: %s completed\n", __func__); ++} ++EXPORT_SYMBOL_GPL(amd_imc_exit_scratch_ram); ++ ++/* ++* The PCI Device ID table below is used to identify the platform ++* the driver is supposed to work for. Since this is a platform ++* driver, we need a way for us to be able to find the correct ++* platform when the driver gets loaded, otherwise we should ++* bail out. ++*/ ++static DEFINE_PCI_DEVICE_TABLE(amd_imc_pci_tbl) = { ++ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS, PCI_ANY_ID, ++ PCI_ANY_ID, }, ++ { 0, }, ++}; ++ ++static int amd_imc_init(struct platform_device *pdev) ++{ ++ struct pci_dev *dev = NULL; ++ static u32 imc_strap_status_phys; ++ void __iomem *imcstrapstatus; ++ u32 val; ++ u8 *byte; ++ ++ /* Match the PCI device */ ++ for_each_pci_dev(dev) { ++ if (pci_match_id(amd_imc_pci_tbl, dev) != NULL) { ++ amd_imc_pci = dev; ++ break; ++ } ++ } ++ ++ if (!amd_imc_pci) ++ return -ENODEV; ++ ++ /* Locate ACPI MMIO Base Address. */ ++ byte = (u8 *)&val; ++ ++ outb(AMD_PM_ACPI_MMIO_BASE0, AMD_IO_PM_INDEX_REG); ++ byte[0] = inb(AMD_IO_PM_DATA_REG); ++ outb(AMD_PM_ACPI_MMIO_BASE1, AMD_IO_PM_INDEX_REG); ++ byte[1] = inb(AMD_IO_PM_DATA_REG); ++ outb(AMD_PM_ACPI_MMIO_BASE2, AMD_IO_PM_INDEX_REG); ++ byte[2] = inb(AMD_IO_PM_DATA_REG); ++ outb(AMD_PM_ACPI_MMIO_BASE3, AMD_IO_PM_INDEX_REG); ++ byte[3] = inb(AMD_IO_PM_DATA_REG); ++ ++ /* Bits 31:13 is the actual ACPI MMIO Base Address */ ++ val &= AMD_ACPI_MMIO_ADDR_MASK; ++ ++ /* IMCStrapStatus is located at ACPI MMIO Base Address + 0xE80 */ ++ if (!request_mem_region_exclusive(val + AMD_IMC_STRAP_STATUS_OFFSET, ++ AMD_IMC_STRAP_STATUS_SIZE, "IMC Strap Status")) { ++ pr_err("amd_imc: MMIO address 0x%04x already in use\n", ++ val + AMD_IMC_STRAP_STATUS_OFFSET); ++ goto exit; ++ } ++ ++ imc_strap_status_phys = val + AMD_IMC_STRAP_STATUS_OFFSET; ++ ++ imcstrapstatus = ioremap(imc_strap_status_phys, ++ AMD_IMC_STRAP_STATUS_SIZE); ++ if (!imcstrapstatus) { ++ pr_err("amd_imc: failed to get IMC Strap Status address\n"); ++ goto unreg_imc_region; ++ } ++ ++ /* Check if IMC is enabled */ ++ val = ioread32(imcstrapstatus); ++ if ((val & AMD_IMC_ENABLED) == AMD_IMC_ENABLED) { ++ struct pci_dev *pdev = NULL; ++ ++ pr_info("amd_imc: IMC is enabled\n"); ++ imc_enabled = 1; ++ ++ /* ++ * In case IMC is enabled, we need to find the IMC port address ++ * which will be used to send messages to the IMC. The IMC port ++ * address is stored in bits 1:15 of PCI device 20, function 3, ++ * offset 0xA4. PCI device 20, function 3 is actually the LPC ++ * ISA bridge. ++ */ ++ for_each_pci_dev(pdev) { ++ if (pci_match_id(amd_lpc_pci_tbl, pdev) != NULL) ++ break; ++ } ++ ++ /* Match found. Get the IMC port address */ ++ if (pdev) { ++ pci_read_config_word(pdev, AMD_PCI_IMC_PORT_ADDR_REG, ++ &imc_port_addr); ++ ++ /* The actual IMC port address has bit 0 masked out */ ++ imc_port_addr &= ~AMD_IMC_PORT_ACTIVE; ++ } ++ ++ /* Put device into configuration state */ ++ outb(AMD_DEVICE_ENTER_CONFIG_STATE, imc_port_addr + ++ AMD_IMC_INDEX_REG_OFFSET); ++ ++ /* Select logical device number 9 */ ++ outb(AMD_SET_LOGICAL_DEVICE, imc_port_addr + ++ AMD_IMC_INDEX_REG_OFFSET); ++ outb(AMD_SET_DEVICE_9, imc_port_addr + ++ AMD_IMC_DATA_REG_OFFSET); ++ ++ /* read high byte of message register base address */ ++ outb(AMD_MSG_REG_HIGH, imc_port_addr + ++ AMD_IMC_INDEX_REG_OFFSET); ++ msg_reg_base_hi = inb(imc_port_addr + AMD_IMC_DATA_REG_OFFSET); ++ ++ /* read low byte of message register base address */ ++ outb(AMD_MSG_REG_LOW, imc_port_addr + ++ AMD_IMC_INDEX_REG_OFFSET); ++ msg_reg_base_lo = inb(imc_port_addr + AMD_IMC_DATA_REG_OFFSET); ++ ++ msg_reg_base = msg_reg_base_lo | (msg_reg_base_hi << 8); ++ ++ /* Get device out of configuration state */ ++ outb(AMD_DEVICE_EXIT_CONFIG_STATE, imc_port_addr + ++ AMD_IMC_INDEX_REG_OFFSET); ++ } else { ++ pr_info("amd_imc: IMC is disabled\n"); ++ imc_enabled = 0; ++ } ++ ++ /* Release the region occupied by IMC Strap Status register */ ++ iounmap(imcstrapstatus); ++ release_mem_region(imc_strap_status_phys, AMD_IMC_STRAP_STATUS_SIZE); ++ ++ return 0; ++ ++unreg_imc_region: ++ release_mem_region(imc_strap_status_phys, AMD_IMC_STRAP_STATUS_SIZE); ++exit: ++ return -ENODEV; ++} ++ ++static struct platform_driver amd_imc_driver = { ++ .probe = amd_imc_init, ++ .driver = { ++ .owner = THIS_MODULE, ++ .name = IMC_MODULE_NAME, ++ }, ++}; ++ ++static int __init amd_imc_init_module(void) ++{ ++ int err; ++ ++ pr_info("AMD IMC Driver v%s\n", IMC_VERSION); ++ ++ err = platform_driver_register(&amd_imc_driver); ++ if (err) ++ return err; ++ ++ amd_imc_platform_device = platform_device_register_simple( ++ IMC_MODULE_NAME, -1, NULL, 0); ++ if (IS_ERR(amd_imc_platform_device)) { ++ err = PTR_ERR(amd_imc_platform_device); ++ goto unreg_platform_driver; ++ } ++ ++ return 0; ++ ++unreg_platform_driver: ++ platform_driver_unregister(&amd_imc_driver); ++ return err; ++} ++ ++static void __exit amd_imc_cleanup_module(void) ++{ ++ platform_device_unregister(amd_imc_platform_device); ++ platform_driver_unregister(&amd_imc_driver); ++ pr_info("AMD IMC Module Unloaded\n"); ++} ++ ++module_init(amd_imc_init_module); ++module_exit(amd_imc_cleanup_module); ++ ++MODULE_AUTHOR("Arindam Nath <arindam.nath@amd.com>"); ++MODULE_DESCRIPTION("AMD IMC driver"); ++MODULE_LICENSE("Dual BSD/GPL"); +diff --git a/include/linux/amd_imc.h b/include/linux/amd_imc.h +new file mode 100644 +index 0000000..4b4b7b8 +--- /dev/null ++++ b/include/linux/amd_imc.h +@@ -0,0 +1,68 @@ ++#ifndef _AMD_IMC_H_ ++#define _AMD_IMC_H_ ++ ++/* Module and version information */ ++#define IMC_VERSION "0.1" ++#define IMC_MODULE_NAME "AMD IMC" ++#define IMC_DRIVER_NAME IMC_MODULE_NAME ", v" IMC_VERSION ++ ++#define DRV_NAME "amd_imc" ++ ++/* IO port address for indirect access using the ACPI PM registers */ ++#define AMD_IO_PM_INDEX_REG 0xCD6 ++#define AMD_IO_PM_DATA_REG 0xCD7 ++ ++#define AMD_PM_ACPI_MMIO_BASE0 0x24 ++#define AMD_PM_ACPI_MMIO_BASE1 0x25 ++#define AMD_PM_ACPI_MMIO_BASE2 0x26 ++#define AMD_PM_ACPI_MMIO_BASE3 0x27 ++ ++#define AMD_ACPI_MMIO_ADDR_MASK ~0x1FFF ++ ++/* Offset of IMC Strap Status register in the ACPI MMIO region */ ++#define AMD_IMC_STRAP_STATUS_OFFSET 0xE80 ++ #define AMD_IMC_ENABLED 0x4 ++#define AMD_IMC_STRAP_STATUS_SIZE 4 ++ ++#define PCI_DEVICE_ID_AMD_LPC_BRIDGE 0x780E ++ #define AMD_PCI_IMC_PORT_ADDR_REG 0xA4 ++ #define AMD_IMC_PORT_ACTIVE 0x0001 ++ ++/* Device configuration state fields */ ++#define AMD_DEVICE_ENTER_CONFIG_STATE 0x5A ++#define AMD_DEVICE_EXIT_CONFIG_STATE 0xA5 ++ ++/* Global configuration registers */ ++#define AMD_SET_LOGICAL_DEVICE 0x07 ++ #define AMD_SET_DEVICE_9 0x09 ++#define AMD_MSG_REG_HIGH 0x60 ++#define AMD_MSG_REG_LOW 0x61 ++ ++/* IMC index and data port offsets for indirect access */ ++#define AMD_IMC_INDEX_REG_OFFSET 0x00 ++#define AMD_IMC_DATA_REG_OFFSET 0x01 ++ ++/* Message register index and data port offsets for indirect access */ ++#define AMD_MSG_INDEX_REG_OFFSET 0x00 ++#define AMD_MSG_DATA_REG_OFFSET 0x01 ++ ++/* IMC message registers */ ++#define AMD_MSG_SYS_TO_IMC 0x80 ++ #define AMD_IMC_ROM_OWNERSHIP_SEM 0x96 ++#define AMD_MSG_REG0 0x82 ++ #define AMD_IMC_FUNC_NOT_SUPP 0x00 ++ #define AMD_IMC_FUNC_COMPLETED 0xFA ++#define AMD_MSG_REG1 0x83 ++ #define AMD_IMC_ENTER_SCRATCH_RAM 0xB4 ++ #define AMD_IMC_EXIT_SCRATCH_RAM 0xB5 ++ ++/* Extern functions */ ++#ifdef CONFIG_AMD_IMC ++extern void amd_imc_enter_scratch_ram(void); ++extern void amd_imc_exit_scratch_ram(void); ++#else ++void amd_imc_enter_scratch_ram(void) {} ++void amd_imc_exit_scratch_ram(void) {} ++#endif ++ ++#endif /* _AMD_IMC_H_ */ +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-amd/0060-yocto-amd-i2c-dev-add-calls-to-enable-and-disable-IM.patch b/common/recipes-kernel/linux/linux-amd/0060-yocto-amd-i2c-dev-add-calls-to-enable-and-disable-IM.patch new file mode 100644 index 00000000..3fa9c407 --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd/0060-yocto-amd-i2c-dev-add-calls-to-enable-and-disable-IM.patch @@ -0,0 +1,51 @@ +From ae1353dc647641a2a9f2614eeb7f761e0c63586c Mon Sep 17 00:00:00 2001 +From: Arindam Nath <arindam.nath@amd.com> +Date: Mon, 4 Aug 2014 19:21:44 +0530 +Subject: [PATCH 60/60] yocto: amd: i2c-dev: add calls to enable and disable + IMC from fetching BIOS code + +The patch adds support to disable IMC from fetching BIOS code when +we first open the SMBus device. We can perform SMBus transaction +safely once IMC is disabled. Then when we close the device after +the operation, we enable IMC to start fetching from BIOS ROM again. + +Upstream Status: None + +Signed-off-by: Arindam Nath <arindam.nath@amd.com> +--- + drivers/i2c/i2c-dev.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c +index c3ccdea..c92ec4c 100644 +--- a/drivers/i2c/i2c-dev.c ++++ b/drivers/i2c/i2c-dev.c +@@ -39,6 +39,7 @@ + #include <linux/i2c-dev.h> + #include <linux/jiffies.h> + #include <linux/uaccess.h> ++#include <linux/amd_imc.h> + + /* + * An i2c_dev represents an i2c_adapter ... an I2C or SMBus master, not a +@@ -512,6 +513,8 @@ static int i2cdev_open(struct inode *inode, struct file *file) + client->adapter = adap; + file->private_data = client; + ++ amd_imc_enter_scratch_ram(); ++ + return 0; + } + +@@ -523,6 +526,8 @@ static int i2cdev_release(struct inode *inode, struct file *file) + kfree(client); + file->private_data = NULL; + ++ amd_imc_exit_scratch_ram(); ++ + return 0; + } + +-- +1.9.1 + diff --git a/common/recipes-kernel/linux/linux-yocto/bluetooth.cfg b/common/recipes-kernel/linux/linux-amd/bluetooth.cfg index 581830f0..581830f0 100644 --- a/common/recipes-kernel/linux/linux-yocto/bluetooth.cfg +++ b/common/recipes-kernel/linux/linux-amd/bluetooth.cfg diff --git a/common/recipes-kernel/linux/linux-yocto/console.cfg b/common/recipes-kernel/linux/linux-amd/console.cfg index 31c98bb1..31c98bb1 100644 --- a/common/recipes-kernel/linux/linux-yocto/console.cfg +++ b/common/recipes-kernel/linux/linux-amd/console.cfg diff --git a/common/recipes-kernel/linux/linux-yocto/disable-debug-preempt.cfg b/common/recipes-kernel/linux/linux-amd/disable-debug-preempt.cfg index 6db1c66f..6db1c66f 100644 --- a/common/recipes-kernel/linux/linux-yocto/disable-debug-preempt.cfg +++ b/common/recipes-kernel/linux/linux-amd/disable-debug-preempt.cfg diff --git a/common/recipes-kernel/linux/linux-yocto/efi-partition.cfg b/common/recipes-kernel/linux/linux-amd/efi-partition.cfg index a7a89935..a7a89935 100644 --- a/common/recipes-kernel/linux/linux-yocto/efi-partition.cfg +++ b/common/recipes-kernel/linux/linux-amd/efi-partition.cfg diff --git a/common/recipes-kernel/linux/linux-yocto/logitech.cfg b/common/recipes-kernel/linux/linux-amd/logitech.cfg index 12b0fe98..12b0fe98 100644 --- a/common/recipes-kernel/linux/linux-yocto/logitech.cfg +++ b/common/recipes-kernel/linux/linux-amd/logitech.cfg diff --git a/common/recipes-kernel/linux/linux-yocto/logo.cfg b/common/recipes-kernel/linux/linux-amd/logo.cfg index 9772c12e..9772c12e 100644 --- a/common/recipes-kernel/linux/linux-yocto/logo.cfg +++ b/common/recipes-kernel/linux/linux-amd/logo.cfg diff --git a/common/recipes-kernel/linux/linux-yocto/sound.cfg b/common/recipes-kernel/linux/linux-amd/sound.cfg index 21ee7ae1..21ee7ae1 100644 --- a/common/recipes-kernel/linux/linux-yocto/sound.cfg +++ b/common/recipes-kernel/linux/linux-amd/sound.cfg diff --git a/common/recipes-kernel/linux/linux-amd_3.12.34.bb b/common/recipes-kernel/linux/linux-amd_3.12.34.bb new file mode 100644 index 00000000..4cf9c1fd --- /dev/null +++ b/common/recipes-kernel/linux/linux-amd_3.12.34.bb @@ -0,0 +1,77 @@ +DESCRIPTION = "Linux Kernel v3.12.34" +SECTION = "kernel" +LICENSE = "GPLv2" + +LIC_FILES_CHKSUM = "file://COPYING;md5=d7810fab7487fb0aad327b76f1be7cd7" + +inherit kernel cml1-config + +SRC_URI = "https://www.kernel.org/pub/linux/kernel/v3.x/linux-${PV}.tar.xz;name=kernel \ + file://0001-drm-radeon-add-vm_set_page-tracepoint.patch;striplevel=1 \ + file://0002-drm-radeon-cleanup-flushing-on-CIK-v3.patch;striplevel=1 \ + file://0003-drm-radeon-cleanup-DMA-HDP-flush-on-CIK-v2.patch;striplevel=1 \ + file://0004-drm-radeon-allow-semaphore-emission-to-fail.patch;striplevel=1 \ + file://0005-drm-radeon-improve-ring-debugfs-a-bit.patch;striplevel=1 \ + file://0006-drm-radeon-report-the-real-offset-in-radeon_sa_bo_du.patch;striplevel=1 \ + file://0007-drm-radeon-update-fence-values-in-before-reporting-t.patch;striplevel=1 \ + file://0008-drm-radeon-cleanup-radeon_ttm-debugfs-handling.patch;striplevel=1 \ + file://0009-drm-radeon-add-VRAM-debugfs-access-v3.patch;striplevel=1 \ + file://0010-drm-radeon-add-GART-debugfs-access-v3.patch;striplevel=1 \ + file://0011-drm-radeon-fix-VMID-use-tracking.patch;striplevel=1 \ + file://0012-drm-radeon-add-missing-trace-point.patch;striplevel=1 \ + file://0013-drm-radeon-add-semaphore-trace-point.patch;striplevel=1 \ + file://0014-drm-radeon-add-VMID-allocation-trace-point.patch;striplevel=1 \ + file://0015-drm-radeon-add-uvd-debugfs-support.patch;striplevel=1 \ + file://0016-drm-radeon-add-radeon_vm_bo_update-trace-point.patch;striplevel=1 \ + file://0017-drm-radeon-drop-CP-page-table-updates-cleanup-v2.patch;striplevel=1 \ + file://0018-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch;striplevel=1 \ + file://0019-drm-radeon-add-proper-support-for-RADEON_VM_BLOCK_SI.patch;striplevel=1 \ + file://0020-drm-radeon-WIP-add-copy-trace-point.patch;striplevel=1 \ + file://0021-drm-radeon-cik-Return-backend-map-information-to-use.patch;striplevel=1 \ + file://0022-drm-radeon-cik-Add-macrotile-mode-array-query.patch;striplevel=1 \ + file://0023-drm-radeon-set-correct-number-of-banks-for-CIK-chips.patch;striplevel=1 \ + file://0024-drm-radeon-don-t-power-gate-paused-UVD-streams.patch;striplevel=1 \ + file://0025-drm-radeon-dpm-retain-user-selected-performance-leve.patch;striplevel=1 \ + file://0026-drm-radeon-remove-generic-rptr-wptr-functions-v2.patch;striplevel=1 \ + file://0027-drm-radeon-initial-VCE-support-v4.patch;striplevel=1 \ + file://0028-drm-radeon-add-VCE-ring-query.patch;striplevel=1 \ + file://0029-drm-radeon-add-VCE-version-parsing-and-checking.patch;striplevel=1 \ + file://0030-drm-radeon-add-callback-for-setting-vce-clocks.patch;striplevel=1 \ + file://0031-drm-radeon-dpm-move-platform-caps-fetching-to-a-sepa.patch;striplevel=1 \ + file://0032-drm-radeon-dpm-fill-in-some-initial-vce-infrastructu.patch;striplevel=1 \ + file://0033-drm-radeon-dpm-fetch-vce-states-from-the-vbios.patch;striplevel=1 \ + file://0034-drm-radeon-fill-in-set_vce_clocks-for-CIK-asics.patch;striplevel=1 \ + file://0035-drm-radeon-add-vce-dpm-support-for-CI.patch;striplevel=1 \ + file://0036-drm-radeon-enable-vce-dpm-on-CI.patch;striplevel=1 \ + file://0037-drm-radeon-add-vce-dpm-support-for-KV-KB.patch;striplevel=1 \ + file://0038-drm-radeon-dpm-enable-dynamic-vce-state-switching-v2.patch;striplevel=1 \ + file://0039-drm-radeon-dpm-properly-enable-disable-vce-when-vce-.patch;striplevel=1 \ + file://0040-drm-radeon-add-vce-debugfs-support.patch;striplevel=1 \ + file://0041-drm-radeon-add-support-for-vce-2.0-clock-gating.patch;striplevel=1 \ + file://0042-drm-radeon-cik-enable-disable-vce-cg-when-encoding.patch;striplevel=1 \ + file://0043-drm-radeon-fix-CP-semaphores-on-CIK.patch;striplevel=1 \ + file://0044-drm-radeon-disable-dynamic-powering-vce.patch;striplevel=1 \ + file://0045-drm-radeon-add-Mullins-chip-family.patch;striplevel=1 \ + file://0046-drm-radeon-update-cik-init-for-Mullins.patch;striplevel=1 \ + file://0047-drm-radeon-add-Mullins-UVD-support.patch;striplevel=1 \ + file://0048-drm-radeon-add-Mullins-dpm-support.patch;striplevel=1 \ + file://0049-drm-radeon-modesetting-updates-for-Mullins.patch;striplevel=1 \ + file://0050-drm-radeon-add-pci-ids-for-Mullins.patch;striplevel=1 \ + file://0051-drm-radeon-add-Mulins-VCE-support.patch;striplevel=1 \ + file://0052-i2c-piix4-Use-different-message-for-AMD-Auxiliary-SM.patch;striplevel=1 \ + file://0053-ACPI-Set-acpi_enforce_resources-to-ENFORCE_RESOURCES.patch;striplevel=1 \ + file://0059-yocto-amd-staging-add-support-to-enable-and-disable-.patch;striplevel=1 \ + file://0060-yocto-amd-i2c-dev-add-calls-to-enable-and-disable-IM.patch;striplevel=1 \ + file://logo.cfg \ + file://console.cfg \ + file://logitech.cfg \ + file://efi-partition.cfg \ + file://sound.cfg \ + ${@base_contains("DISTRO_FEATURES", "bluetooth", "file://bluetooth.cfg", "", d)} \ + file://disable-debug-preempt.cfg \ +" + +S = "${WORKDIR}/linux-${PV}" + +SRC_URI[kernel.md5sum] = "0cadb5280ca8948fedd44734d3d6275f" +SRC_URI[kernel.sha256sum] = "f067eb4447c36358c7b2ee392e0a2470a232818998287acd98ec6295f1b1ed0c" diff --git a/common/recipes-kernel/linux/linux-yocto/0001-xhci-Enable-XHCI_TRUST_TX_LENGTH-quirk-for-AMD-devic.patch b/common/recipes-kernel/linux/linux-yocto/0001-xhci-Enable-XHCI_TRUST_TX_LENGTH-quirk-for-AMD-devic.patch deleted file mode 100644 index 81d2bddb..00000000 --- a/common/recipes-kernel/linux/linux-yocto/0001-xhci-Enable-XHCI_TRUST_TX_LENGTH-quirk-for-AMD-devic.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 2ee96475fe61bbc8a22175e774cc3b9d3627c4a9 Mon Sep 17 00:00:00 2001 -From: Drew Moseley <drew_moseley@mentor.com> -Date: Fri, 25 Jul 2014 18:28:09 -0400 -Subject: [PATCH] xhci: Enable XHCI_TRUST_TX_LENGTH quirk for AMD devices. - -Signed-off-by: Drew Moseley <drew_moseley@mentor.com> ---- - drivers/usb/host/xhci-pci.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c -index f34b42e..ff2d876 100644 ---- a/drivers/usb/host/xhci-pci.c -+++ b/drivers/usb/host/xhci-pci.c -@@ -38,6 +38,9 @@ - #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 - #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 - -+#define PCI_VENDOR_ID_AMD 0x1022 -+#define PCI_DEVICE_ID_AMD_USB_HCI 0x7814 -+ - static const char hcd_name[] = "xhci_hcd"; - - /* called after powerup, by probe or system-pm "wakeup" */ -@@ -90,6 +93,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) - "has broken MSI implementation", - pdev->revision); - xhci->quirks |= XHCI_TRUST_TX_LENGTH; -+ } else if (pdev->vendor == PCI_VENDOR_ID_AMD && -+ pdev->device == PCI_DEVICE_ID_AMD_USB_HCI) { -+ xhci->quirks |= XHCI_TRUST_TX_LENGTH; -+ xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, -+ "QUIRK: AMD revision %u " -+ "needs quirk XHCI_TRUST_TX_LENGTH", -+ pdev->revision); - } - - if (pdev->vendor == PCI_VENDOR_ID_NEC) --- -1.9.1 - |