aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/3623-drm-amdgpu-stop-all-rings-before-doing-gpu-recover.patch
blob: 0122fcca34f3c5ae95af9cf68f21da9e8209bc43 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
From 69b616387d8353287a6363622dd5b9406d1296e7 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Mon, 25 Dec 2017 15:14:58 +0800
Subject: [PATCH 3623/4131] drm/amdgpu: stop all rings before doing gpu recover
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

found recover_vram_from_shadow sometimes get executed
in paralle with SDMA scheduler, should stop all
schedulers before doing gpu reset/recover

Change-Id: Ibaef3e3c015f3cf88f84b2eaf95cda95ae1a64e3
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Conflicts:
      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 +++++++++++-------------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1619263..5a83045 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2687,18 +2687,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
         if (amdgpu_device_has_dc_support(adev))
                 state = drm_atomic_helper_suspend(adev->ddev);
 
-        /* block scheduler */
+	/* block all schedulers and reset given job's ring */
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                 struct amdgpu_ring *ring = adev->rings[i];
 
                 if (!ring || !ring->sched.thread)
                         continue;
 
-                /* only focus on the ring hit timeout if &job not NULL */
+		kthread_park(ring->sched.thread);
+
                 if (job && job->ring->idx != i)
                         continue;
 
-                kthread_park(ring->sched.thread);
 		drm_sched_hw_job_reset(&ring->sched, &job->base);
 
                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
@@ -2741,33 +2741,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
                         }
                         dma_fence_put(fence);
                 }
+	}
 
-                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-                        struct amdgpu_ring *ring = adev->rings[i];
-
-                        if (!ring || !ring->sched.thread)
-                                continue;
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
 
-                        /* only focus on the ring hit timeout if &job not NULL */
-                        if (job && job->ring->idx != i)
-                                continue;
+			if (!ring || !ring->sched.thread)
+			continue;
 
+		/* only need recovery sched of the given job's ring
+		* or all rings (in the case @job is NULL)
+		* after above amdgpu_reset accomplished
+		*/
+		if ((!job || job->ring->idx == i) && !r)
 			drm_sched_job_recovery(&ring->sched);
-                        kthread_unpark(ring->sched.thread);
-                }
-        } else {
-                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-                        struct amdgpu_ring *ring = adev->rings[i];
-
-                        if (!ring || !ring->sched.thread)
-                                continue;
-
-                        /* only focus on the ring hit timeout if &job not NULL */
-                        if (job && job->ring->idx != i)
-                                continue;
 
                         kthread_unpark(adev->rings[i]->sched.thread);
-                }
         }
 
         if (amdgpu_device_has_dc_support(adev)) {
-- 
2.7.4