1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
From 1b4fe6d0f39f9500f04eb102aee802917541dafb Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Tue, 28 Mar 2017 16:56:41 -0400
Subject: [PATCH 1254/4131] drm/amdkfd: Avoid KFD process starvation due to
evictions
Insert a timeout before the same process can be evicted again.
Change-Id: Iac3ef0f54edf860dd023a6cb5d7c0f7edd9d1893
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 29 ++++++++++++++++++++++++++---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++++++
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 +
3 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9333433..93ac064 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -865,6 +865,17 @@ void kfd_restore_bo_worker(struct work_struct *work)
pr_info("Started restoring process of pasid %d\n", p->pasid);
+ /* Setting last_restore_timestamp before successful restoration.
+ * Otherwise this would have to be set by KGD (restore_process_bos)
+ * before KFD BOs are unreserved. If not, the process can be evicted
+ * again before the timestamp is set.
+ * If restore fails, the timestamp will be set again in the next
+ * attempt. This would mean that the minimum GPU quanta would be
+ * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
+ * functions)
+ */
+
+ p->last_restore_timestamp = get_jiffies_64();
ret = pdd->dev->kfd2kgd->restore_process_bos(p->process_info);
if (ret) {
pr_info("Restore failed, try again after %d ms\n",
@@ -894,6 +905,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct fence *fence)
{
struct kfd_process *p;
+ unsigned long active_time;
+ unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
if (!fence)
return -EINVAL;
@@ -919,11 +932,21 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
}
}
- /* During process initialization eviction_work.work is initialized
+ p->eviction_work.eviction_fence = fence_get(fence);
+
+ /* Avoid KFD process starvation. Wait for at least
+ * PROCESS_ACTIVE_TIME_MS before evicting the process again
+ */
+ active_time = get_jiffies_64() - p->last_restore_timestamp;
+ if (delay_jiffies > active_time)
+ delay_jiffies -= active_time;
+ else
+ delay_jiffies = 0;
+
+ /* During process initialization eviction_work.dwork is initialized
* to kfd_evict_bo_worker
*/
- p->eviction_work.eviction_fence = fence_get(fence);
- schedule_delayed_work(&p->eviction_work.dwork, 0);
+ schedule_delayed_work(&p->eviction_work.dwork, delay_jiffies);
out:
kfd_unref_process(p);
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 48e6641..f2a9030 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -583,6 +583,8 @@ struct kfd_eviction_work {
#define PROCESS_RESTORE_TIME_MS 100
/* Approx. back off time if restore fails due to lack of memory */
#define PROCESS_BACK_OFF_TIME_MS 100
+/* Approx. time before evicting the process again */
+#define PROCESS_ACTIVE_TIME_MS 10
void kfd_evict_bo_worker(struct work_struct *work);
void kfd_restore_bo_worker(struct work_struct *work);
@@ -722,6 +724,10 @@ struct kfd_process {
/* Work items for evicting and restoring BOs */
struct kfd_eviction_work eviction_work;
struct delayed_work restore_work;
+ /* Approx. the last timestamp (in jiffies) when the process was
+ * restored after an eviction
+ */
+ unsigned long last_restore_timestamp;
};
/**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index affa4184..562f061 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -599,6 +599,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
INIT_DELAYED_WORK(&process->eviction_work.dwork, kfd_evict_bo_worker);
INIT_DELAYED_WORK(&process->restore_work, kfd_restore_bo_worker);
+ process->last_restore_timestamp = get_jiffies_64();
/* If PeerDirect interface was not detected try to detect it again
* in case if network driver was loaded later.
--
2.7.4
|