diff options
Diffstat (limited to 'meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch')
-rw-r--r-- | meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch new file mode 100644 index 00000000..07e61477 --- /dev/null +++ b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch @@ -0,0 +1,293 @@ +From 21c16bf634e62cf9673946f509b469e7f0953ecf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Tue, 7 Jul 2015 17:24:49 +0200 +Subject: [PATCH 0353/1050] drm/amdgpu: add user fence context map v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is a prerequisite for the GPU scheduler to make the order +of submission independent from the order of execution. + +v2: properly implement the locking + +Signed-off-by: Christian König <christian.koenig@amd.com> +Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com> +Reviewed-by: Chunming Zhou <david1.zhou@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 +++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 60 ++++++++++++++++++--------------- + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++++++++++++++++- + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++- + 4 files changed, 110 insertions(+), 30 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 70e783a..0220d98 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -415,6 +415,8 @@ struct amdgpu_user_fence { + struct amdgpu_bo *bo; + /* write-back address offset to bo start */ + uint32_t offset; ++ /* resulting sequence number */ ++ uint64_t sequence; + }; + + int amdgpu_fence_driver_init(struct amdgpu_device *adev); +@@ -985,9 +987,18 @@ struct amdgpu_vm_manager { + * context related structures + */ + ++#define AMDGPU_CTX_MAX_CS_PENDING 16 ++ ++struct amdgpu_ctx_ring { ++ uint64_t sequence; ++ struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; ++}; ++ + struct amdgpu_ctx { + struct kref refcount; + unsigned reset_counter; ++ spinlock_t ring_lock; ++ struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; + }; + + struct amdgpu_ctx_mgr { +@@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); + struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); + int amdgpu_ctx_put(struct amdgpu_ctx *ctx); + ++uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, ++ struct fence *fence); ++struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, ++ struct amdgpu_ring *ring, uint64_t seq); ++ + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index 53e6a10f..cef8360 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (j = 0; j < num_deps; ++j) { +- struct amdgpu_fence *fence; + struct amdgpu_ring *ring; + struct amdgpu_ctx *ctx; ++ struct fence *fence; + + r = amdgpu_cs_get_ring(adev, deps[j].ip_type, + deps[j].ip_instance, +@@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + if (ctx == NULL) + return -EINVAL; + +- r = amdgpu_fence_recreate(ring, p->filp, +- deps[j].handle, +- &fence); +- if (r) { ++ fence = amdgpu_ctx_get_fence(ctx, ring, ++ deps[j].handle); ++ if (IS_ERR(fence)) { ++ r = PTR_ERR(fence); + amdgpu_ctx_put(ctx); + return r; +- } +- +- r = amdgpu_sync_fence(adev, &ib->sync, &fence->base); +- amdgpu_fence_unref(&fence); +- amdgpu_ctx_put(ctx); + +- if (r) +- return r; ++ } else if (fence) { ++ r = amdgpu_sync_fence(adev, &ib->sync, fence); ++ fence_put(fence); ++ amdgpu_ctx_put(ctx); ++ if (r) ++ return r; ++ } + } + } + +@@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) + r = amdgpu_cs_ib_fill(adev, &parser); + } + +- if (!r) ++ if (!r) { + r = amdgpu_cs_dependencies(adev, &parser); ++ if (r) ++ DRM_ERROR("Failed in the dependencies handling %d!\n", r); ++ } + + if (r) { + amdgpu_cs_parser_fini(&parser, r, reserved_buffers); +@@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) + goto out; + } + +- cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; ++ cs->out.handle = parser.uf.sequence; + out: + amdgpu_cs_parser_fini(&parser, r, true); + up_read(&adev->exclusive_lock); +@@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, + union drm_amdgpu_wait_cs *wait = data; + struct amdgpu_device *adev = dev->dev_private; + unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); +- struct amdgpu_fence *fence = NULL; + struct amdgpu_ring *ring = NULL; + struct amdgpu_ctx *ctx; ++ struct fence *fence; + long r; + ++ r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, ++ wait->in.ring, &ring); ++ if (r) ++ return r; ++ + ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); + if (ctx == NULL) + return -EINVAL; + +- r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, +- wait->in.ring, &ring); +- if (r) { +- amdgpu_ctx_put(ctx); +- return r; +- } ++ fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); ++ if (IS_ERR(fence)) ++ r = PTR_ERR(fence); + +- r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); +- if (r) { +- amdgpu_ctx_put(ctx); +- return r; +- } ++ else if (fence) { ++ r = fence_wait_timeout(fence, true, timeout); ++ fence_put(fence); ++ ++ } else ++ r = 1; + +- r = fence_wait_timeout(&fence->base, true, timeout); +- amdgpu_fence_unref(&fence); + amdgpu_ctx_put(ctx); + if (r < 0) + return r; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +index e63cfb7..c23bfd8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +@@ -28,17 +28,22 @@ + static void amdgpu_ctx_do_release(struct kref *ref) + { + struct amdgpu_ctx *ctx; ++ unsigned i, j; + + ctx = container_of(ref, struct amdgpu_ctx, refcount); ++ ++ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) ++ for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) ++ fence_put(ctx->rings[i].fences[j]); + kfree(ctx); + } + + int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + uint32_t *id) + { +- int r; + struct amdgpu_ctx *ctx; + struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; ++ int i, r; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) +@@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + + memset(ctx, 0, sizeof(*ctx)); + kref_init(&ctx->refcount); ++ spin_lock_init(&ctx->ring_lock); ++ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) ++ ctx->rings[i].sequence = 1; + mutex_unlock(&mgr->lock); + + return 0; +@@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) + kref_put(&ctx->refcount, amdgpu_ctx_do_release); + return 0; + } ++ ++uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, ++ struct fence *fence) ++{ ++ struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; ++ uint64_t seq = cring->sequence; ++ unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING; ++ struct fence *other = cring->fences[idx]; ++ ++ if (other) { ++ signed long r; ++ r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); ++ if (r < 0) ++ DRM_ERROR("Error (%ld) waiting for fence!\n", r); ++ } ++ ++ fence_get(fence); ++ ++ spin_lock(&ctx->ring_lock); ++ cring->fences[idx] = fence; ++ cring->sequence++; ++ spin_unlock(&ctx->ring_lock); ++ ++ fence_put(other); ++ ++ return seq; ++} ++ ++struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, ++ struct amdgpu_ring *ring, uint64_t seq) ++{ ++ struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; ++ struct fence *fence; ++ ++ spin_lock(&ctx->ring_lock); ++ if (seq >= cring->sequence) { ++ spin_unlock(&ctx->ring_lock); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) { ++ spin_unlock(&ctx->ring_lock); ++ return NULL; ++ } ++ ++ fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]); ++ spin_unlock(&ctx->ring_lock); ++ ++ return fence; ++} +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +index 2722815..95d5334 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +@@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, + /* wrap the last IB with fence */ + if (ib->user) { + uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); ++ ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, ++ &ib->fence->base); + addr += ib->user->offset; +- amdgpu_ring_emit_fence(ring, addr, ib->fence->seq, ++ amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, + AMDGPU_FENCE_FLAG_64BIT); + } + +-- +1.9.1 + |