1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
From e2a8565998cbdc24d717bed98cb69beaa9aad4f0 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Wed, 19 Apr 2017 13:11:30 -0500
Subject: [PATCH 1660/4131] drm/amdkfd: Fix MQD modification race during
eviction
The eviction path invokes update_mqd before preempting the scheduler
which may be using the MQD concurrently. This leads to a race in which
the scheduler sees partial values of MQD registers when mapping them
to the HQD.
Do not modify the MQD during eviction (or restore). Instead just mark
q->is_active directly. Fixes CP hangs under stress testing.
Change-Id: I6a2395cf8ec307048894e32d3a14851f136ed510
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index dcdc380..24ef621 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -629,10 +629,11 @@ int process_evict_queues(struct device_queue_manager *dqm,
continue;
}
/* if the queue is not active anyway, it is not evicted */
- if (q->properties.is_active == true)
+ if (q->properties.is_active == true) {
q->properties.is_evicted = true;
+ q->properties.is_active = false;
+ }
- retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
if (is_queue_nocpsch(dqm, q) &&
q->properties.is_evicted)
retval = mqd->destroy_mqd(mqd, q->mqd,
@@ -700,7 +701,8 @@ int process_restore_queues(struct device_queue_manager *dqm,
}
if (q->properties.is_evicted) {
q->properties.is_evicted = false;
- retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
+ q->properties.is_active = true;
+
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA))
--
2.7.4
|