aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2055-drm-amdgpu-cleanup-job-reset-routine-v2.patch
blob: dbe748ce9b4915437f59fa72fbf5ea4782e40b71 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
From 22c6aecb8e3ec8a1b6c1fb43076e86a7de4c3780 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Mon, 16 Oct 2017 19:46:43 +0800
Subject: [PATCH 2055/4131] drm/amdgpu:cleanup job reset routine(v2)

merge the setting guilty on context into this function
to avoid implement extra routine.

v2:
go through entity list and compare the fence_ctx
before operate on the entity, otherwise the entity
may be just a wild pointer

Change-Id: I7a0063464fdc85d5ac9080046380e745565ff540
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Chunming Zhou <David1.Zhou@amd.com>

 Conflicts:
        drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  4 ++--
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 31 ++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 +-
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d0aaed4..29349bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2896,7 +2896,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
 			amd_sched_job_kickout(&job->base);
 
 		/* only do job_reset on the hang ring if @job not NULL */
-		amd_sched_hw_job_reset(&ring->sched);
+		amd_sched_hw_job_reset(&ring->sched, NULL);
 
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
@@ -3017,7 +3017,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 		if (!ring || !ring->sched.thread)
 			continue;
 		kthread_park(ring->sched.thread);
-		amd_sched_hw_job_reset(&ring->sched);
+		amd_sched_hw_job_reset(&ring->sched, NULL);
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 041510e..2634846 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -443,9 +443,18 @@ static void amd_sched_job_timedout(struct work_struct *work)
 	job->sched->ops->timedout_job(job);
 }
 
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
+static void amd_sched_set_guilty(struct amd_sched_job *s_job)
+{
+	if (atomic_inc_return(&s_job->karma) > s_job->sched->hang_limit)
+		if (s_job->s_entity->guilty)
+			atomic_set(s_job->s_entity->guilty, 1);
+}
+
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *bad)
 {
 	struct amd_sched_job *s_job;
+	struct amd_sched_entity *entity, *tmp;
+	int i;;
 
 	spin_lock(&sched->job_list_lock);
 	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
@@ -458,6 +467,26 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 		}
 	}
 	spin_unlock(&sched->job_list_lock);
+
+	if (bad) {
+		bool found = false;
+
+		for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++ ) {
+			struct amd_sched_rq *rq = &sched->sched_rq[i];
+
+			spin_lock(&rq->lock);
+			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
+				if (bad->s_fence->scheduled.context == entity->fence_context) {
+					found = true;
+					amd_sched_set_guilty(bad);
+					break;
+				}
+			}
+			spin_unlock(&rq->lock);
+			if (found)
+				break;
+		}
+	}
 }
 
 void amd_sched_job_kickout(struct amd_sched_job *s_job)
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 7342763..64c887f 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -173,7 +173,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
 		       struct amd_gpu_scheduler *sched,
 		       struct amd_sched_entity *entity,
 		       void *owner);
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *job);
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
 bool amd_sched_dependency_optimized(struct dma_fence* fence,
 				    struct amd_sched_entity *entity);
-- 
2.7.4