diff options
Diffstat (limited to 'common/recipes-kernel/linux/files/0335-drm-amdgpu-fix-VM-faults-caused-by-vm_grab_id-v4.patch')
-rw-r--r-- | common/recipes-kernel/linux/files/0335-drm-amdgpu-fix-VM-faults-caused-by-vm_grab_id-v4.patch | 413 |
1 files changed, 413 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0335-drm-amdgpu-fix-VM-faults-caused-by-vm_grab_id-v4.patch b/common/recipes-kernel/linux/files/0335-drm-amdgpu-fix-VM-faults-caused-by-vm_grab_id-v4.patch new file mode 100644 index 00000000..83902103 --- /dev/null +++ b/common/recipes-kernel/linux/files/0335-drm-amdgpu-fix-VM-faults-caused-by-vm_grab_id-v4.patch @@ -0,0 +1,413 @@ +From eaa21d10fdc0f6004c251edb39a17ac9dece6707 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Fri, 26 Feb 2016 16:18:26 +0100 +Subject: [PATCH 0335/1110] drm/amdgpu: fix VM faults caused by vm_grab_id() v4 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The owner must be per ring as long as we don't +support sharing VMIDs per process. Also move the +assigned VMID and page directory address into the +IB structure. + +v3: assign the VMID to all IBs, not just the first one. +v4: use correct pointer for owner + +Signed-off-by: Christian König <christian.koenig@amd.com> +Reviewed-by: Chunming Zhou <david1.zhou@amd.com> +Acked-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Kalyan Alle <kalyan.alle@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 +++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 15 +++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 115 +++++++++++++++++--------------- + drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 +- + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 +- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +- + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 +- + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- + 9 files changed, 91 insertions(+), 80 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index af4f128..ca1223f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -729,8 +729,9 @@ struct amdgpu_ib { + uint32_t *ptr; + struct amdgpu_fence *fence; + struct amdgpu_user_fence *user; +- bool grabbed_vmid; + struct amdgpu_vm *vm; ++ unsigned vm_id; ++ uint64_t vm_pd_addr; + struct amdgpu_ctx *ctx; + uint32_t gds_base, gds_size; + uint32_t gws_base, gws_size; +@@ -836,10 +837,10 @@ struct amdgpu_vm_pt { + }; + + struct amdgpu_vm_id { +- unsigned id; +- uint64_t pd_gpu_addr; ++ struct amdgpu_vm_manager_id *mgr_id; ++ uint64_t pd_gpu_addr; + /* last flushed PD/PT update */ +- struct fence *flushed_updates; ++ struct fence *flushed_updates; + }; + + struct amdgpu_vm { +@@ -920,10 +921,11 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates); + void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, + struct amdgpu_vm *vm); + int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, +- struct amdgpu_sync *sync, struct fence *fence); ++ struct amdgpu_sync *sync, struct fence *fence, ++ unsigned *vm_id, uint64_t *vm_pd_addr); + void amdgpu_vm_flush(struct amdgpu_ring *ring, +- struct amdgpu_vm *vm, +- struct fence *updates); ++ unsigned vmid, ++ uint64_t pd_addr); + uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); + int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + struct amdgpu_vm *vm); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +index 9040b3e..6696fc1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +@@ -75,6 +75,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, + } + + ib->vm = vm; ++ ib->vm_id = 0; + + return 0; + } +@@ -138,7 +139,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + return -EINVAL; + } + +- if (vm && !ibs->grabbed_vmid) { ++ if (vm && !ibs->vm_id) { + dev_err(adev->dev, "VM IB without ID\n"); + return -EINVAL; + } +@@ -151,10 +152,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + + if (vm) { + /* do context switch */ +- amdgpu_vm_flush(ring, vm, last_vm_update); ++ amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr); + + if (ring->funcs->emit_gds_switch) +- amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, ++ amdgpu_ring_emit_gds_switch(ring, ib->vm_id, + ib->gds_base, ib->gds_size, + ib->gws_base, ib->gws_size, + ib->oa_base, ib->oa_size); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +index 10ff227..af9bae6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -105,16 +105,23 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) + + struct fence *fence = amdgpu_sync_get_fence(&job->sync); + +- if (fence == NULL && vm && !job->ibs->grabbed_vmid) { ++ if (fence == NULL && vm && !job->ibs->vm_id) { + struct amdgpu_ring *ring = job->ring; ++ unsigned i, vm_id; ++ uint64_t vm_pd_addr; + int r; + + r = amdgpu_vm_grab_id(vm, ring, &job->sync, +- &job->base.s_fence->base); ++ &job->base.s_fence->base, ++ &vm_id, &vm_pd_addr); + if (r) + DRM_ERROR("Error getting VM ID (%d)\n", r); +- else +- job->ibs->grabbed_vmid = true; ++ else { ++ for (i = 0; i < job->num_ibs; ++i) { ++ job->ibs[i].vm_id = vm_id; ++ job->ibs[i].vm_pd_addr = vm_pd_addr; ++ } ++ } + + fence = amdgpu_sync_get_fence(&job->sync); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index 8e6786c..d464bde 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -50,6 +50,9 @@ + * SI supports 16. + */ + ++/* Special value that no flush is necessary */ ++#define AMDGPU_VM_NO_FLUSH (~0ll) ++ + /** + * amdgpu_vm_num_pde - return the number of page directory entries + * +@@ -164,50 +167,69 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, + * Allocate an id for the vm, adding fences to the sync obj as necessary. + */ + int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, +- struct amdgpu_sync *sync, struct fence *fence) ++ struct amdgpu_sync *sync, struct fence *fence, ++ unsigned *vm_id, uint64_t *vm_pd_addr) + { +- struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; ++ uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); + struct amdgpu_device *adev = ring->adev; +- struct amdgpu_vm_manager_id *id; ++ struct amdgpu_vm_id *id = &vm->ids[ring->idx]; ++ struct fence *updates = sync->last_vm_update; + int r; + + mutex_lock(&adev->vm_manager.lock); + + /* check if the id is still valid */ +- if (vm_id->id) { ++ if (id->mgr_id) { ++ struct fence *flushed = id->flushed_updates; ++ bool is_later; + long owner; + +- id = &adev->vm_manager.ids[vm_id->id]; +- owner = atomic_long_read(&id->owner); +- if (owner == (long)vm) { +- list_move_tail(&id->list, &adev->vm_manager.ids_lru); +- trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); ++ if (!flushed) ++ is_later = true; ++ else if (!updates) ++ is_later = false; ++ else ++ is_later = fence_is_later(updates, flushed); ++ ++ owner = atomic_long_read(&id->mgr_id->owner); ++ if (!is_later && owner == (long)id && ++ pd_addr == id->pd_gpu_addr) { ++ ++ fence_put(id->mgr_id->active); ++ id->mgr_id->active = fence_get(fence); ++ ++ list_move_tail(&id->mgr_id->list, ++ &adev->vm_manager.ids_lru); + +- fence_put(id->active); +- id->active = fence_get(fence); ++ *vm_id = id->mgr_id - adev->vm_manager.ids; ++ *vm_pd_addr = AMDGPU_VM_NO_FLUSH; ++ trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx); + + mutex_unlock(&adev->vm_manager.lock); + return 0; + } + } + +- /* we definately need to flush */ +- vm_id->pd_gpu_addr = ~0ll; ++ id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru, ++ struct amdgpu_vm_manager_id, ++ list); + +- id = list_first_entry(&adev->vm_manager.ids_lru, +- struct amdgpu_vm_manager_id, +- list); +- list_move_tail(&id->list, &adev->vm_manager.ids_lru); +- atomic_long_set(&id->owner, (long)vm); ++ r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active); ++ if (!r) { ++ fence_put(id->mgr_id->active); ++ id->mgr_id->active = fence_get(fence); + +- vm_id->id = id - adev->vm_manager.ids; +- trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); ++ fence_put(id->flushed_updates); ++ id->flushed_updates = fence_get(updates); + +- r = amdgpu_sync_fence(ring->adev, sync, id->active); ++ id->pd_gpu_addr = pd_addr; + +- if (!r) { +- fence_put(id->active); +- id->active = fence_get(fence); ++ list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru); ++ atomic_long_set(&id->mgr_id->owner, (long)id); ++ ++ *vm_id = id->mgr_id - adev->vm_manager.ids; ++ *vm_pd_addr = pd_addr; ++ trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx); + } + + mutex_unlock(&adev->vm_manager.lock); +@@ -218,35 +240,18 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + * amdgpu_vm_flush - hardware flush the vm + * + * @ring: ring to use for flush +- * @vm: vm we want to flush +- * @updates: last vm update that we waited for ++ * @vmid: vmid number to use ++ * @pd_addr: address of the page directory + * +- * Flush the vm. ++ * Emit a VM flush when it is necessary. + */ + void amdgpu_vm_flush(struct amdgpu_ring *ring, +- struct amdgpu_vm *vm, +- struct fence *updates) ++ unsigned vmid, ++ uint64_t pd_addr) + { +- uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); +- struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; +- struct fence *flushed_updates = vm_id->flushed_updates; +- bool is_later; +- +- if (!flushed_updates) +- is_later = true; +- else if (!updates) +- is_later = false; +- else +- is_later = fence_is_later(updates, flushed_updates); +- +- if (pd_addr != vm_id->pd_gpu_addr || is_later) { +- trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); +- if (is_later) { +- vm_id->flushed_updates = fence_get(updates); +- fence_put(flushed_updates); +- } +- vm_id->pd_gpu_addr = pd_addr; +- amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); ++ if (pd_addr != AMDGPU_VM_NO_FLUSH) { ++ trace_amdgpu_vm_flush(pd_addr, ring->idx, vmid); ++ amdgpu_ring_emit_vm_flush(ring, vmid, pd_addr); + } + } + +@@ -1291,7 +1296,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) + int i, r; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { +- vm->ids[i].id = 0; ++ vm->ids[i].mgr_id = NULL; + vm->ids[i].flushed_updates = NULL; + } + vm->va = RB_ROOT; +@@ -1385,13 +1390,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) + amdgpu_bo_unref(&vm->page_directory); + fence_put(vm->page_directory_fence); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { +- unsigned id = vm->ids[i].id; ++ struct amdgpu_vm_id *id = &vm->ids[i]; + +- atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner, +- (long)vm, 0); +- fence_put(vm->ids[i].flushed_updates); ++ if (id->mgr_id) ++ atomic_long_cmpxchg(&id->mgr_id->owner, ++ (long)id, 0); ++ fence_put(id->flushed_updates); + } +- + } + + /** +diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +index 2bf993c..db10010 100644 +--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c ++++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +@@ -212,7 +212,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) + static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib) + { +- u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; ++ u32 extra_bits = ib->vm_id & 0xf; + u32 next_rptr = ring->wptr + 5; + + while ((next_rptr & 7) != 4) +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +index c452048..d0cb200 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -2043,8 +2043,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + +- control |= ib->length_dw | +- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); ++ control |= ib->length_dw | (ib->vm_id << 24); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +@@ -2072,8 +2071,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + +- control |= ib->length_dw | +- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); ++ control |= ib->length_dw | (ib->vm_id << 24); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 48bc114..fadbfd8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -4619,8 +4619,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + +- control |= ib->length_dw | +- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); ++ control |= ib->length_dw | (ib->vm_id << 24); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +@@ -4649,8 +4648,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + +- control |= ib->length_dw | +- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); ++ control |= ib->length_dw | (ib->vm_id << 24); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +index 1f70d83..1562291 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +@@ -244,7 +244,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) + static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib) + { +- u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; ++ u32 vmid = ib->vm_id & 0xf; + u32 next_rptr = ring->wptr + 5; + + while ((next_rptr & 7) != 2) +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +index 2389bdb..d748a3a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +@@ -355,7 +355,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) + static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib) + { +- u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; ++ u32 vmid = ib->vm_id & 0xf; + u32 next_rptr = ring->wptr + 5; + + while ((next_rptr & 7) != 2) +-- +2.7.4 + |