1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
From 2a8a7db7857985c15f0fcd0949fb1e3a4c1820bb Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Wed, 8 Nov 2017 15:36:47 -0500
Subject: [PATCH 1350/4131] drm/amdgpu: Share eviction fence with KFD
Currently, when TTM wants to evict a process BO,
dma_fence_ops.enable_signaling() calls KFD call back function
evict_and_restore_process(fence *eviction_fence) to prepare for
eviction. This function starts a worker thread that handles suspending
the process and signaling the eviction fence.
However, during suspend / resume, KFD cannot reliably depend on this
eviction sequence. This change is necessary to handle suspend / resume
sequence by KFD.
Change-Id: I342e64a4986225246d8fff21b9c96f2c844648ad
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 ++++++++++-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 ++++--
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 6f5f93c..232e713 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -945,7 +945,7 @@ void kfd_restore_bo_worker(struct work_struct *work)
*/
p->last_restore_timestamp = get_jiffies_64();
- ret = pdd->dev->kfd2kgd->restore_process_bos(p->process_info);
+ ret = pdd->dev->kfd2kgd->restore_process_bos(p->process_info, &p->ef);
if (ret) {
pr_info("Restore failed, try again after %d ms\n",
PROCESS_BACK_OFF_TIME_MS);
@@ -1049,6 +1049,15 @@ void kfd_evict_bo_worker(struct work_struct *work)
ret = quiesce_process_mm(p);
if (!ret) {
dma_fence_signal(eviction_work->quiesce_fence);
+ WARN_ONCE(eviction_work->quiesce_fence != p->ef,
+ "Eviction fence mismatch\n");
+ dma_fence_put(p->ef);
+ /* TODO: quiesce_fence is same as kfd_process->ef. But
+ * quiesce_fence is also used to avoid starting multiple
+ * eviction work items. This might not be necessary and
+ * one of the variables could be removed
+ */
+ p->ef = NULL;
schedule_delayed_work(&p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
} else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index fc2936b..851d1f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -730,6 +730,11 @@ struct kfd_process {
/* Information used for memory eviction */
void *process_info;
+ /* Eviction fence that is attached to all the BOs of this process. The
+ * fence will be triggered during eviction and new one will be created
+ * during restore
+ */
+ struct dma_fence *ef;
/* Work items for evicting and restoring BOs */
struct kfd_eviction_work eviction_work;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4080ac0..7271a08 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -360,9 +360,11 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
per_device_list) {
kfd_flush_tlb(pdd->dev, p->pasid);
/* Destroy the GPUVM VM context */
- if (pdd->vm)
+ if (pdd->vm) {
+ dma_fence_put(p->ef);
pdd->dev->kfd2kgd->destroy_process_vm(
pdd->dev->kgd, pdd->vm);
+ }
list_del(&pdd->per_device_list);
if (pdd->qpd.cwsr_pages) {
@@ -724,7 +726,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
/* Create the GPUVM context for this specific device */
if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
- &p->process_info)) {
+ &p->process_info, &p->ef)) {
pr_err("Failed to create process VM object\n");
goto err_create_pdd;
}
--
2.7.4
|