aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch')
-rw-r--r--meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch293
1 files changed, 293 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch
new file mode 100644
index 00000000..07e61477
--- /dev/null
+++ b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch
@@ -0,0 +1,293 @@
+From 21c16bf634e62cf9673946f509b469e7f0953ecf Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Tue, 7 Jul 2015 17:24:49 +0200
+Subject: [PATCH 0353/1050] drm/amdgpu: add user fence context map v2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This is a prerequisite for the GPU scheduler to make the order
+of submission independent from the order of execution.
+
+v2: properly implement the locking
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
+Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 +++++++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 60 ++++++++++++++++++---------------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++-
+ 4 files changed, 110 insertions(+), 30 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 70e783a..0220d98 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -415,6 +415,8 @@ struct amdgpu_user_fence {
+ struct amdgpu_bo *bo;
+ /* write-back address offset to bo start */
+ uint32_t offset;
++ /* resulting sequence number */
++ uint64_t sequence;
+ };
+
+ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
+@@ -985,9 +987,18 @@ struct amdgpu_vm_manager {
+ * context related structures
+ */
+
++#define AMDGPU_CTX_MAX_CS_PENDING 16
++
++struct amdgpu_ctx_ring {
++ uint64_t sequence;
++ struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING];
++};
++
+ struct amdgpu_ctx {
+ struct kref refcount;
+ unsigned reset_counter;
++ spinlock_t ring_lock;
++ struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
+ };
+
+ struct amdgpu_ctx_mgr {
+@@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
+ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+
++uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
++ struct fence *fence);
++struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
++ struct amdgpu_ring *ring, uint64_t seq);
++
+ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index 53e6a10f..cef8360 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+ for (j = 0; j < num_deps; ++j) {
+- struct amdgpu_fence *fence;
+ struct amdgpu_ring *ring;
+ struct amdgpu_ctx *ctx;
++ struct fence *fence;
+
+ r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
+ deps[j].ip_instance,
+@@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ if (ctx == NULL)
+ return -EINVAL;
+
+- r = amdgpu_fence_recreate(ring, p->filp,
+- deps[j].handle,
+- &fence);
+- if (r) {
++ fence = amdgpu_ctx_get_fence(ctx, ring,
++ deps[j].handle);
++ if (IS_ERR(fence)) {
++ r = PTR_ERR(fence);
+ amdgpu_ctx_put(ctx);
+ return r;
+- }
+-
+- r = amdgpu_sync_fence(adev, &ib->sync, &fence->base);
+- amdgpu_fence_unref(&fence);
+- amdgpu_ctx_put(ctx);
+
+- if (r)
+- return r;
++ } else if (fence) {
++ r = amdgpu_sync_fence(adev, &ib->sync, fence);
++ fence_put(fence);
++ amdgpu_ctx_put(ctx);
++ if (r)
++ return r;
++ }
+ }
+ }
+
+@@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ r = amdgpu_cs_ib_fill(adev, &parser);
+ }
+
+- if (!r)
++ if (!r) {
+ r = amdgpu_cs_dependencies(adev, &parser);
++ if (r)
++ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
++ }
+
+ if (r) {
+ amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+@@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ goto out;
+ }
+
+- cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq;
++ cs->out.handle = parser.uf.sequence;
+ out:
+ amdgpu_cs_parser_fini(&parser, r, true);
+ up_read(&adev->exclusive_lock);
+@@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
+ union drm_amdgpu_wait_cs *wait = data;
+ struct amdgpu_device *adev = dev->dev_private;
+ unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+- struct amdgpu_fence *fence = NULL;
+ struct amdgpu_ring *ring = NULL;
+ struct amdgpu_ctx *ctx;
++ struct fence *fence;
+ long r;
+
++ r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
++ wait->in.ring, &ring);
++ if (r)
++ return r;
++
+ ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+- r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
+- wait->in.ring, &ring);
+- if (r) {
+- amdgpu_ctx_put(ctx);
+- return r;
+- }
++ fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
++ if (IS_ERR(fence))
++ r = PTR_ERR(fence);
+
+- r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
+- if (r) {
+- amdgpu_ctx_put(ctx);
+- return r;
+- }
++ else if (fence) {
++ r = fence_wait_timeout(fence, true, timeout);
++ fence_put(fence);
++
++ } else
++ r = 1;
+
+- r = fence_wait_timeout(&fence->base, true, timeout);
+- amdgpu_fence_unref(&fence);
+ amdgpu_ctx_put(ctx);
+ if (r < 0)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+index e63cfb7..c23bfd8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+@@ -28,17 +28,22 @@
+ static void amdgpu_ctx_do_release(struct kref *ref)
+ {
+ struct amdgpu_ctx *ctx;
++ unsigned i, j;
+
+ ctx = container_of(ref, struct amdgpu_ctx, refcount);
++
++ for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
++ for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
++ fence_put(ctx->rings[i].fences[j]);
+ kfree(ctx);
+ }
+
+ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
+ uint32_t *id)
+ {
+- int r;
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
++ int i, r;
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+@@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
+
+ memset(ctx, 0, sizeof(*ctx));
+ kref_init(&ctx->refcount);
++ spin_lock_init(&ctx->ring_lock);
++ for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
++ ctx->rings[i].sequence = 1;
+ mutex_unlock(&mgr->lock);
+
+ return 0;
+@@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
+ kref_put(&ctx->refcount, amdgpu_ctx_do_release);
+ return 0;
+ }
++
++uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
++ struct fence *fence)
++{
++ struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
++ uint64_t seq = cring->sequence;
++ unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
++ struct fence *other = cring->fences[idx];
++
++ if (other) {
++ signed long r;
++ r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
++ if (r < 0)
++ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
++ }
++
++ fence_get(fence);
++
++ spin_lock(&ctx->ring_lock);
++ cring->fences[idx] = fence;
++ cring->sequence++;
++ spin_unlock(&ctx->ring_lock);
++
++ fence_put(other);
++
++ return seq;
++}
++
++struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
++ struct amdgpu_ring *ring, uint64_t seq)
++{
++ struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
++ struct fence *fence;
++
++ spin_lock(&ctx->ring_lock);
++ if (seq >= cring->sequence) {
++ spin_unlock(&ctx->ring_lock);
++ return ERR_PTR(-EINVAL);
++ }
++
++ if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) {
++ spin_unlock(&ctx->ring_lock);
++ return NULL;
++ }
++
++ fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]);
++ spin_unlock(&ctx->ring_lock);
++
++ return fence;
++}
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+index 2722815..95d5334 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+@@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
+ /* wrap the last IB with fence */
+ if (ib->user) {
+ uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
++ ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
++ &ib->fence->base);
+ addr += ib->user->offset;
+- amdgpu_ring_emit_fence(ring, addr, ib->fence->seq,
++ amdgpu_ring_emit_fence(ring, addr, ib->user->sequence,
+ AMDGPU_FENCE_FLAG_64BIT);
+ }
+
+--
+1.9.1
+