common/recipes-kernel/linux/linux-yocto-4.14.71/2000-drm-amdgpu-Move-old-fence-waiting-before-reservation.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

From e814a90845b6824221c26465f71ef475c45e3a9e Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Tue, 10 Oct 2017 16:50:17 -0400
Subject: [PATCH 2000/4131] drm/amdgpu: Move old fence waiting before
 reservation lock is aquired v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Helps avoiding deadlock during GPU reset.
Added mutex to amdgpu_ctx to preserve order of fences on a ring.

v2:
Put waiting logic in a function in a seperate function in amdgpu_ctx.c

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>

 Conflicts:
        drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

Change-Id: I56f1b6d60eefd53417a7123e017184f45689cbd2
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 606cf9e..da8003e 100755
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -762,6 +762,7 @@ struct amdgpu_ctx {
 	bool 			preamble_presented;
 	enum amd_sched_priority init_priority;
 	enum amd_sched_priority override_priority;
+	struct mutex            lock;
 };
 
 struct amdgpu_ctx_mgr {
@@ -784,9 +785,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp);
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
+
 /*
  * file private structure
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5133b7a..8a936a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -89,6 +89,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		ret = -EINVAL;
 		goto free_chunk;
 	}
+	
+	mutex_lock(&p->ctx->lock);
 
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
@@ -774,8 +776,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
 
@@ -936,9 +940,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 			r = amdgpu_ring_parse_cs(ring, p, j);
 			if (r)
 				return r;
-
 		}
-
 		j++;
 	}
 
@@ -1026,7 +1028,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
-	return 0;
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0ec5985..064247b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	if (!ctx->fences)
 		return -ENOMEM;
 
+	mutex_init(&ctx->lock);
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
 		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
@@ -143,6 +145,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 				      &ctx->rings[i].entity);
 
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+	mutex_destroy(&ctx->lock);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -313,12 +317,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
-	if (other) {
-		signed long r;
-		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
-		if (r < 0)
-			return r;
-	}
+		if (other)
+			BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
@@ -389,6 +389,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 	}
 }
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+{
+	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
+	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
+	struct dma_fence *other = cring->fences[idx];
+
+	if (other) {
+		signed long r;
+		r = kcl_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		if (r < 0) {
+			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 {
 	mutex_init(&mgr->lock);
-- 
2.7.4