aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1257-drm-amdkfd-Allocate-CWSR-pages-per-process-for-APU.patch
blob: f5b7631fcee5f85017f623cc33c8e85932e08fc9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
From 36d1cf399caa8f414d4894d828b6c8def22a9b71 Mon Sep 17 00:00:00 2001
From: Shaoyun Liu <Shaoyun.Liu@amd.com>
Date: Mon, 27 Mar 2017 16:43:32 -0400
Subject: [PATCH 1257/4131] drm/amdkfd: Allocate CWSR pages per process for APU

The original code for APU uses common reserved pages per device for CWSR trap
 handler TBA and TMA which will be overwrote when mulitple process register
its own second level trap handler.

Change-Id: I62c7c6dccd9b6a95533b65de7c7135f4083497e2
Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 55 ++++++++++++++++++++++----------
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index bbd6336..141d938 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -567,6 +567,7 @@ struct qcm_process_device {
 	uint64_t tba_addr;
 	uint64_t tma_addr;
 	void *cwsr_kaddr;
+	struct page *cwsr_pages;
 
 	/* IB memory */
 	uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 562f061..3c0c40f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -349,6 +349,13 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 			pdd->dev->kfd2kgd->destroy_process_vm(
 				pdd->dev->kgd, pdd->vm);
 		list_del(&pdd->per_device_list);
+
+		if (pdd->qpd.cwsr_pages) {
+			kunmap(pdd->qpd.cwsr_pages);
+			__free_pages(pdd->qpd.cwsr_pages,
+				get_order(pdd->dev->cwsr_size));
+		}
+
 		kfree(pdd);
 	}
 }
@@ -505,9 +512,6 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
 					dev->cwsr_size,	&kaddr, pdd);
 			if (!err) {
 				qpd->cwsr_kaddr = kaddr;
-				memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages),
-				       PAGE_SIZE);
-				kunmap(dev->cwsr_pages);
 				qpd->tba_addr = qpd->cwsr_base;
 			} else
 				goto out;
@@ -525,13 +529,15 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
 				qpd->cwsr_kaddr = NULL;
 				err = -ENOMEM;
 				goto out;
-			} else
-				qpd->cwsr_kaddr = (void *)qpd->tba_addr;
+			}
 		}
 
+		memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages), PAGE_SIZE);
+		kunmap(dev->cwsr_pages);
+
 		qpd->tma_addr = qpd->tba_addr + dev->tma_offset;
-		pr_debug("set tba :0x%llx, tma:0x%llx for pqm.\n",
-			qpd->tba_addr, qpd->tma_addr);
+		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
 	}
 
 out:
@@ -1004,25 +1010,42 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vm
 	unsigned long pfn, i;
 	int ret = 0;
 	struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
+	struct kfd_process_device *temp, *pdd = NULL;
+	struct qcm_process_device *qpd = NULL;
 
 	if (dev == NULL)
 		return -EINVAL;
-	if ((vma->vm_start & (PAGE_SIZE - 1)) ||
+	if (((vma->vm_end - vma->vm_start) != dev->cwsr_size) ||
+		(vma->vm_start & (PAGE_SIZE - 1)) ||
 		(vma->vm_end & (PAGE_SIZE - 1))) {
-		pr_err("KFD only support page aligned memory map.\n");
+		pr_err("KFD only support page aligned memory map and correct size.\n");
 		return -EINVAL;
 	}
 
 	pr_debug("kfd reserved mem mmap been called.\n");
-	/* We supported  two reserved memory mmap in the future .
-	    1. Trap handler code and parameter (TBA and TMA , 2 pages total)
-	    2. Relaunch stack (control  block, 1 page for Carrizo)
-	 */
 
+	list_for_each_entry_safe(pdd, temp, &process->per_device_data,
+				per_device_list) {
+		if (dev == pdd->dev) {
+			qpd = &pdd->qpd;
+			break;
+		}
+	}
+	if (qpd == NULL)
+		return -EINVAL;
+
+	qpd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM,
+				get_order(dev->cwsr_size));
+	if (!qpd->cwsr_pages) {
+		pr_err("amdkfd: error alloc CWSR isa memory per process.\n");
+		return -ENOMEM;
+	}
+	qpd->cwsr_kaddr = kmap(qpd->cwsr_pages);
+
+	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
 	for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); ++i) {
-		pfn = page_to_pfn(&dev->cwsr_pages[i]);
-		vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
-			| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
+		pfn = page_to_pfn(&qpd->cwsr_pages[i]);
 		/* mapping the page to user process */
 		ret = remap_pfn_range(vma, vma->vm_start + (i << PAGE_SHIFT),
 				pfn, PAGE_SIZE, vma->vm_page_prot);
-- 
2.7.4