aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1254-drm-amdkfd-Avoid-KFD-process-starvation-due-to-evict.patch
blob: c8b752472703c2b1815db0f471375d34fc2b92ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
From 1b4fe6d0f39f9500f04eb102aee802917541dafb Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Tue, 28 Mar 2017 16:56:41 -0400
Subject: [PATCH 1254/4131] drm/amdkfd: Avoid KFD process starvation due to
 evictions

Insert a timeout before the same process can be evicted again.

Change-Id: Iac3ef0f54edf860dd023a6cb5d7c0f7edd9d1893
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 29 ++++++++++++++++++++++++++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  6 ++++++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  1 +
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9333433..93ac064 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -865,6 +865,17 @@ void kfd_restore_bo_worker(struct work_struct *work)
 
 	pr_info("Started restoring process of pasid %d\n", p->pasid);
 
+	/* Setting last_restore_timestamp before successful restoration.
+	 * Otherwise this would have to be set by KGD (restore_process_bos)
+	 * before KFD BOs are unreserved. If not, the process can be evicted
+	 * again before the timestamp is set.
+	 * If restore fails, the timestamp will be set again in the next
+	 * attempt. This would mean that the minimum GPU quanta would be
+	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
+	 * functions)
+	 */
+
+	p->last_restore_timestamp = get_jiffies_64();
 	ret = pdd->dev->kfd2kgd->restore_process_bos(p->process_info);
 	if (ret) {
 		pr_info("Restore failed, try again after %d ms\n",
@@ -894,6 +905,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 					       struct fence *fence)
 {
 	struct kfd_process *p;
+	unsigned long active_time;
+	unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
 
 	if (!fence)
 		return -EINVAL;
@@ -919,11 +932,21 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 		}
 	}
 
-	/* During process initialization eviction_work.work is initialized
+	p->eviction_work.eviction_fence = fence_get(fence);
+
+	/* Avoid KFD process starvation. Wait for at least
+	 * PROCESS_ACTIVE_TIME_MS before evicting the process again
+	 */
+	active_time = get_jiffies_64() - p->last_restore_timestamp;
+	if (delay_jiffies > active_time)
+		delay_jiffies -= active_time;
+	else
+		delay_jiffies = 0;
+
+	/* During process initialization eviction_work.dwork is initialized
 	 * to kfd_evict_bo_worker
 	 */
-	p->eviction_work.eviction_fence = fence_get(fence);
-	schedule_delayed_work(&p->eviction_work.dwork, 0);
+	schedule_delayed_work(&p->eviction_work.dwork, delay_jiffies);
 out:
 	kfd_unref_process(p);
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 48e6641..f2a9030 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -583,6 +583,8 @@ struct kfd_eviction_work {
 #define PROCESS_RESTORE_TIME_MS 100
 /* Approx. back off time if restore fails due to lack of memory */
 #define PROCESS_BACK_OFF_TIME_MS 100
+/* Approx. time before evicting the process again */
+#define PROCESS_ACTIVE_TIME_MS 10
 
 void kfd_evict_bo_worker(struct work_struct *work);
 void kfd_restore_bo_worker(struct work_struct *work);
@@ -722,6 +724,10 @@ struct kfd_process {
 	/* Work items for evicting and restoring BOs */
 	struct kfd_eviction_work eviction_work;
 	struct delayed_work restore_work;
+	/* Approx. the last timestamp (in jiffies) when the process was
+	 * restored after an eviction
+	 */
+	unsigned long last_restore_timestamp;
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index affa4184..562f061 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -599,6 +599,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
 
 	INIT_DELAYED_WORK(&process->eviction_work.dwork, kfd_evict_bo_worker);
 	INIT_DELAYED_WORK(&process->restore_work, kfd_restore_bo_worker);
+	process->last_restore_timestamp = get_jiffies_64();
 
 	/* If PeerDirect interface was not detected try to detect it again
 	* in case if network driver was loaded later.
-- 
2.7.4