common/recipes-kernel/linux/files/0989-drm-amdgpu-optimize-VM-fencing.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

From 1098eae4a3cb24976d7099e0a0305567d0ebbb4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 26 Jan 2016 11:40:46 +0100
Subject: [PATCH 0989/1565] drm/amdgpu: optimize VM fencing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No need to fence every page table, just the page directory is enough.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 50 +++++++++++++++-------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7b660db..161652b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -624,36 +624,25 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  *
  * Global and local mutex must be locked!
  */
-static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
-				 struct amdgpu_gart *gtt,
-				 uint32_t gtt_flags,
-				 struct amdgpu_vm *vm,
-				 struct amdgpu_ib *ib,
-				 uint64_t start, uint64_t end,
-				 uint64_t dst, uint32_t flags)
+static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+				  struct amdgpu_gart *gtt,
+				  uint32_t gtt_flags,
+				  struct amdgpu_vm *vm,
+				  struct amdgpu_ib *ib,
+				  uint64_t start, uint64_t end,
+				  uint64_t dst, uint32_t flags)
 {
 	uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 	uint64_t last_pte = ~0, last_dst = ~0;
-	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned count = 0;
 	uint64_t addr;
 
-	/* sync to everything on unmapping */
-	if (!(flags & AMDGPU_PTE_VALID))
-		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
-
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
 		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
 		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
 		unsigned nptes;
 		uint64_t pte;
-		int r;
-
-		amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
-		r = reservation_object_reserve_shared(pt->tbo.resv);
-		if (r)
-			return r;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
@@ -687,8 +676,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 				    last_pte, last_pte + 8 * count,
 				    last_dst, flags);
 	}
-
-	return 0;
 }
 
 /**
@@ -716,11 +703,16 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       struct fence **fence)
 {
 	struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned nptes, ncmds, ndw;
 	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
 	int r;
 
+	/* sync to everything on unmapping */
+	if (!(flags & AMDGPU_PTE_VALID))
+		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+
 	nptes = last - start + 1;
 
 	/*
@@ -761,15 +753,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		return r;
 	}
 
-	ib->length_dw = 0;
+	r = amdgpu_sync_resv(adev, &ib->sync, vm->page_directory->tbo.resv,
+			     owner);
+	if (r)
+		goto error_free;
 
-	r = amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start,
-				  last + 1, addr, flags);
-	if (r) {
-		amdgpu_ib_free(adev, ib);
-		kfree(ib);
-		return r;
-	}
+	r = reservation_object_reserve_shared(vm->page_directory->tbo.resv);
+	if (r)
+		goto error_free;
+
+	amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1,
+			      addr, flags);
 
 	amdgpu_vm_pad_ib(adev, ib);
 	WARN_ON(ib->length_dw > ndw);
-- 
1.9.1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

// SPDX-License-Identifier: GPL-2.0-only
///
/// From Documentation/filesystems/sysfs.rst:
///  show() must not use snprintf() when formatting the value to be
///  returned to user space. If you can guarantee that an overflow
///  will never happen you can use sprintf() otherwise you must use
///  scnprintf().
///
// Confidence: High
// Copyright: (C) 2020 Denis Efremov ISPRAS
// Options: --no-includes --include-headers
//

virtual report
virtual org
virtual context
virtual patch

@r depends on !patch@
identifier show, dev, attr, buf;
position p;
@@

ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
{
	<...
*	return snprintf@p(...);
	...>
}

@rp depends on patch@
identifier show, dev, attr, buf;
@@

ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
{
	<...
	return
-		snprintf
+		scnprintf
			(...);
	...>
}

@script: python depends on report@
p << r.p;
@@

coccilib.report.print_report(p[0], "WARNING: use scnprintf or sprintf")

@script: python depends on org@
p << r.p;
@@

coccilib.org.print_todo(p[0], "WARNING: use scnprintf or sprintf")