aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1583-drm-amdgpu-Fix-userptr-restore-race-condition-with-f.patch
blob: e89712bacfde9536f4a361df87a65ff69138bafa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
From 2c3b0f1e194a27c0c51703e8b9c6ed5356a06944 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 3 Feb 2017 17:50:59 -0500
Subject: [PATCH 1583/4131] drm/amdgpu: Fix userptr restore race condition with
 free

Update_user_pages needs to drop the lock. This could lead to freeing
a userptr BO while a restore was in progress. In that case
cancel_restore_locked and restore_mem_worker would both call mmput
and lead to corruption of kernel data structures.

Fix this by marking the mem object as busy while dropping the lock
for user page udpates. When canceling restore, wait for busy mem
objects. This ensures that a restore that has already started will
be able complete before the BO gets freed. When the restore completes
it sets mem->mm to NULL and calls mmput. Thes cancel_restore_locked
will not call mmput again.

Bug: SWDEV-112697
Change-Id: I6c76f7559957992303cf69b7a26360886f090990
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c       | 11 +++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 ++
 3 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 05a627a..a2d1b55 100755
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -194,6 +194,17 @@ static void cancel_restore_locked(struct kgd_mem *mem)
 	struct mm_struct *mm;
 
 	while (mem->mm) {
+		/* update_user_pages needs to drop the lock
+		 * briefly. Therefore holding the lock is no guarantee
+		 * that no restore is in progress
+		 */
+		if (mem->busy) {
+			mutex_unlock(&mem->lock);
+			schedule_timeout_uninterruptible(1);
+			mutex_lock(&mem->lock);
+			continue;
+		}
+
 		mm = mem->mm;
 		mem->mm = NULL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 9ac3b6b..db4f75c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -63,6 +63,7 @@ struct kgd_mem {
         /* flags bitfield */
         bool no_substitute : 1;
         bool aql_queue     : 1;
+	bool busy          : 1;
 };
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d08d25c6..8d05564 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -826,6 +826,7 @@ static int update_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 		if (!pages)
 			return -ENOMEM;
 
+		mem->busy = true;
 		mutex_unlock(&mem->lock);
 
 		while (true) {
@@ -834,6 +835,7 @@ static int update_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 			up_read(&mm->mmap_sem);
 
 			mutex_lock(&mem->lock);
+			mem->busy = false;
 			if (ret != 0)
 				return ret;
 
-- 
2.7.4