1 files changed, 293 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch
new file mode 100644
index 00000000..07e61477
--- /dev/null
+++ b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0353-drm-amdgpu-add-user-fence-context-map-v2.patch
@@ -0,0 +1,293 @@
+From 21c16bf634e62cf9673946f509b469e7f0953ecf Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Tue, 7 Jul 2015 17:24:49 +0200
+Subject: [PATCH 0353/1050] drm/amdgpu: add user fence context map v2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This is a prerequisite for the GPU scheduler to make the order
+of submission independent from the order of execution.
+
+v2: properly implement the locking
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
+Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 16 +++++++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 60 ++++++++++++++++++---------------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  4 ++-
+ 4 files changed, 110 insertions(+), 30 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 70e783a..0220d98 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -415,6 +415,8 @@ struct amdgpu_user_fence {
+ 	struct amdgpu_bo 	*bo;
+ 	/* write-back address offset to bo start */
+ 	uint32_t                offset;
++	/* resulting sequence number */
++	uint64_t                sequence;
+ };
+ 
+ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
+@@ -985,9 +987,18 @@ struct amdgpu_vm_manager {
+  * context related structures
+  */
+ 
++#define AMDGPU_CTX_MAX_CS_PENDING	16
++
++struct amdgpu_ctx_ring {
++	uint64_t	sequence;
++	struct fence	*fences[AMDGPU_CTX_MAX_CS_PENDING];
++};
++
+ struct amdgpu_ctx {
+ 	struct kref		refcount;
+ 	unsigned		reset_counter;
++	spinlock_t		ring_lock;
++	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
+ };
+ 
+ struct amdgpu_ctx_mgr {
+@@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
+ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+ 
++uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
++			      struct fence *fence);
++struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
++				   struct amdgpu_ring *ring, uint64_t seq);
++
+ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+ 		     struct drm_file *filp);
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index 53e6a10f..cef8360 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ 			sizeof(struct drm_amdgpu_cs_chunk_dep);
+ 
+ 		for (j = 0; j < num_deps; ++j) {
+-			struct amdgpu_fence *fence;
+ 			struct amdgpu_ring *ring;
+ 			struct amdgpu_ctx *ctx;
++			struct fence *fence;
+ 
+ 			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
+ 					       deps[j].ip_instance,
+@@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ 			if (ctx == NULL)
+ 				return -EINVAL;
+ 
+-			r = amdgpu_fence_recreate(ring, p->filp,
+-						  deps[j].handle,
+-						  &fence);
+-			if (r) {
++			fence = amdgpu_ctx_get_fence(ctx, ring,
++						     deps[j].handle);
++			if (IS_ERR(fence)) {
++				r = PTR_ERR(fence);
+ 				amdgpu_ctx_put(ctx);
+ 				return r;
+-			}
+-
+-			r = amdgpu_sync_fence(adev, &ib->sync, &fence->base);
+-			amdgpu_fence_unref(&fence);
+-			amdgpu_ctx_put(ctx);
+ 
+-			if (r)
+-				return r;
++			} else if (fence) {
++				r = amdgpu_sync_fence(adev, &ib->sync, fence);
++				fence_put(fence);
++				amdgpu_ctx_put(ctx);
++				if (r)
++					return r;
++			}
+ 		}
+ 	}
+ 
+@@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ 		r = amdgpu_cs_ib_fill(adev, &parser);
+ 	}
+ 
+-	if (!r)
++	if (!r) {
+ 		r = amdgpu_cs_dependencies(adev, &parser);
++		if (r)
++			DRM_ERROR("Failed in the dependencies handling %d!\n", r);
++	}
+ 
+ 	if (r) {
+ 		amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+@@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ 		goto out;
+ 	}
+ 
+-	cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq;
++	cs->out.handle = parser.uf.sequence;
+ out:
+ 	amdgpu_cs_parser_fini(&parser, r, true);
+ 	up_read(&adev->exclusive_lock);
+@@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
+ 	union drm_amdgpu_wait_cs *wait = data;
+ 	struct amdgpu_device *adev = dev->dev_private;
+ 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+-	struct amdgpu_fence *fence = NULL;
+ 	struct amdgpu_ring *ring = NULL;
+ 	struct amdgpu_ctx *ctx;
++	struct fence *fence;
+ 	long r;
+ 
++	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
++			       wait->in.ring, &ring);
++	if (r)
++		return r;
++
+ 	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+ 	if (ctx == NULL)
+ 		return -EINVAL;
+ 
+-	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
+-			       wait->in.ring, &ring);
+-	if (r) {
+-		amdgpu_ctx_put(ctx);
+-		return r;
+-	}
++	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
++	if (IS_ERR(fence))
++		r = PTR_ERR(fence);
+ 
+-	r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
+-	if (r) {
+-		amdgpu_ctx_put(ctx);
+-		return r;
+-	}
++	else if (fence) {
++		r = fence_wait_timeout(fence, true, timeout);
++		fence_put(fence);
++
++	} else
++		r = 1;
+ 
+-	r = fence_wait_timeout(&fence->base, true, timeout);
+-	amdgpu_fence_unref(&fence);
+ 	amdgpu_ctx_put(ctx);
+ 	if (r < 0)
+ 		return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+index e63cfb7..c23bfd8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+@@ -28,17 +28,22 @@
+ static void amdgpu_ctx_do_release(struct kref *ref)
+ {
+ 	struct amdgpu_ctx *ctx;
++	unsigned i, j;
+ 
+ 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
++
++	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
++		for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
++			fence_put(ctx->rings[i].fences[j]);
+ 	kfree(ctx);
+ }
+ 
+ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
+ 		     uint32_t *id)
+ {
+-	int r;
+ 	struct amdgpu_ctx *ctx;
+ 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
++	int i, r;
+ 
+ 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ 	if (!ctx)
+@@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
+ 
+ 	memset(ctx, 0, sizeof(*ctx));
+ 	kref_init(&ctx->refcount);
++	spin_lock_init(&ctx->ring_lock);
++	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
++		ctx->rings[i].sequence = 1;
+ 	mutex_unlock(&mgr->lock);
+ 
+ 	return 0;
+@@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
+ 	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
+ 	return 0;
+ }
++
++uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
++			      struct fence *fence)
++{
++	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
++	uint64_t seq = cring->sequence;
++	unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
++	struct fence *other = cring->fences[idx];
++
++	if (other) {
++		signed long r;
++		r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
++		if (r < 0)
++			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
++	}
++
++	fence_get(fence);
++
++	spin_lock(&ctx->ring_lock);
++	cring->fences[idx] = fence;
++	cring->sequence++;
++	spin_unlock(&ctx->ring_lock);
++
++	fence_put(other);
++
++	return seq;
++}
++
++struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
++				   struct amdgpu_ring *ring, uint64_t seq)
++{
++	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
++	struct fence *fence;
++
++	spin_lock(&ctx->ring_lock);
++	if (seq >= cring->sequence) {
++		spin_unlock(&ctx->ring_lock);
++		return ERR_PTR(-EINVAL);
++	}
++
++	if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) {
++		spin_unlock(&ctx->ring_lock);
++		return NULL;
++	}
++
++	fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]);
++	spin_unlock(&ctx->ring_lock);
++
++	return fence;
++}
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+index 2722815..95d5334 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+@@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
+ 	/* wrap the last IB with fence */
+ 	if (ib->user) {
+ 		uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
++		ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
++							  &ib->fence->base);
+ 		addr += ib->user->offset;
+-		amdgpu_ring_emit_fence(ring, addr, ib->fence->seq,
++		amdgpu_ring_emit_fence(ring, addr, ib->user->sequence,
+ 				       AMDGPU_FENCE_FLAG_64BIT);
+ 	}
+ 
+-- 
+1.9.1
+