aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch357
1 files changed, 357 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch
new file mode 100644
index 00000000..c54abf5a
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch
@@ -0,0 +1,357 @@
+From 30272a1b4da44c7866dad126b7026551498f6be2 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Fri, 23 Feb 2018 18:02:55 -0500
+Subject: [PATCH 3439/4131] drm/amdkfd: Create KFD VMs on demand
+
+Instead of creating all VMs on process creation, create them when
+a process is bound to a device. This will later allow registering
+an existing VM from a DRM render node FD at runtime, before the
+process is bound to the device. This way the render node VM can be
+used for KFD instead of creating our own redundant VM.
+
+Change-Id: I62f912e41af9c223752f199c22822c4ff6638ed4
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 242 +++++++++++++++++--------------
+ 1 file changed, 137 insertions(+), 105 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 7c6bcbd..91aac82 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -68,7 +68,6 @@ static struct kfd_process *find_process(const struct task_struct *thread,
+ static void kfd_process_ref_release(struct kref *ref);
+ static struct kfd_process *create_process(const struct task_struct *thread,
+ struct file *filep);
+-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
+
+ static void evict_process_worker(struct work_struct *work);
+ static void restore_process_worker(struct work_struct *work);
+@@ -175,47 +174,31 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
+ return err;
+ }
+
+-/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage
+- * The memory reserved is for KFD to submit IB to AMDGPU from kernel.
+- * If the memory is reserved successfully, ib_kaddr_assigned will have
+- * the CPU/kernel address. Check ib_kaddr_assigned before accessing the
+- * memory.
++/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
++ * process for IB usage The memory reserved is for KFD to submit
++ * IB to AMDGPU from kernel. If the memory is reserved
++ * successfully, ib_kaddr_assigned will have the CPU/kernel
++ * address. Check ib_kaddr_assigned before accessing the memory.
+ */
+-static int kfd_process_reserve_ib_mem(struct kfd_process *p)
++static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
+ {
+- int ret = 0;
+- struct kfd_process_device *pdd = NULL;
+- struct kfd_dev *kdev = NULL;
+- struct qcm_process_device *qpd = NULL;
+- void *kaddr;
++ struct qcm_process_device *qpd = &pdd->qpd;
+ uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
+- ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
+- ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
++ void *kaddr;
++ int ret;
+
+- list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+- kdev = pdd->dev;
+- qpd = &pdd->qpd;
+- if (qpd->ib_kaddr)
+- continue;
++ if (qpd->ib_kaddr || !qpd->ib_base)
++ return 0;
+
+- if (qpd->ib_base) { /* is dGPU */
+- ret = kfd_process_alloc_gpuvm(p, kdev,
+- qpd->ib_base, PAGE_SIZE,
+- &kaddr, pdd, flags);
+- if (!ret)
+- qpd->ib_kaddr = kaddr;
+- else
+- /* In case of error, the kfd_bos for some pdds
+- * which are already allocated successfully
+- * will be freed in upper level function
+- * i.e. create_process().
+- */
+- return ret;
+- } else {
+- /* FIXME: Support APU */
+- continue;
+- }
+- }
++ /* ib_base is only set for dGPU */
++ ret = kfd_process_alloc_gpuvm(pdd->process, pdd->dev,
++ qpd->ib_base, PAGE_SIZE,
++ &kaddr, pdd, flags);
++ if (ret)
++ return ret;
++
++ qpd->ib_kaddr = kaddr;
+
+ return 0;
+ }
+@@ -317,33 +300,42 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
+ return p;
+ }
+
+-static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
++static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
+ {
+- struct kfd_process_device *pdd, *peer_pdd;
++ struct kfd_process *p = pdd->process;
+ struct kfd_bo *buf_obj;
+ int id;
+
+- list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+- /*
+- * Remove all handles from idr and release appropriate
+- * local memory object
+- */
+- idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) {
+- list_for_each_entry(peer_pdd, &p->per_device_data,
+- per_device_list) {
+- peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
+- peer_pdd->dev->kgd,
+- buf_obj->mem, peer_pdd->vm);
+- }
+-
+- run_rdma_free_callback(buf_obj);
+- pdd->dev->kfd2kgd->free_memory_of_gpu(
+- pdd->dev->kgd, buf_obj->mem);
+- kfd_process_device_remove_obj_handle(pdd, id);
++ /*
++ * Remove all handles from idr and release appropriate
++ * local memory object
++ */
++ idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) {
++ struct kfd_process_device *peer_pdd;
++
++ list_for_each_entry(peer_pdd, &p->per_device_data,
++ per_device_list) {
++ if (!peer_pdd->vm)
++ continue;
++ peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
++ peer_pdd->dev->kgd, buf_obj->mem, peer_pdd->vm);
+ }
++
++ run_rdma_free_callback(buf_obj);
++ pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd,
++ buf_obj->mem);
++ kfd_process_device_remove_obj_handle(pdd, id);
+ }
+ }
+
++static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
++{
++ struct kfd_process_device *pdd;
++
++ list_for_each_entry(pdd, &p->per_device_data, per_device_list)
++ kfd_process_device_free_bos(pdd);
++}
++
+ static void kfd_process_destroy_pdds(struct kfd_process *p)
+ {
+ struct kfd_process_device *pdd, *temp;
+@@ -472,52 +464,29 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
+ .release = kfd_process_notifier_release,
+ };
+
+-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
++static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
+ {
+- int ret;
+ unsigned long offset;
+- struct kfd_process_device *pdd = NULL;
+- struct kfd_dev *dev = NULL;
+- struct qcm_process_device *qpd = NULL;
+- void *kaddr;
+- uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
+- ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
+- ALLOC_MEM_FLAGS_READONLY |
+- ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
++ struct kfd_process_device *pdd;
+
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+- dev = pdd->dev;
+- qpd = &pdd->qpd;
+- if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
++ struct kfd_dev *dev = pdd->dev;
++ struct qcm_process_device *qpd = &pdd->qpd;
++
++ if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
+ continue;
+- if (qpd->cwsr_base) {
+- /* cwsr_base is only set for DGPU */
+- ret = kfd_process_alloc_gpuvm(p, dev, qpd->cwsr_base,
+- KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd, flags);
+- if (!ret) {
+- qpd->cwsr_kaddr = kaddr;
+- qpd->tba_addr = qpd->cwsr_base;
+- } else
+- /* In case of error, the kfd_bos for some pdds
+- * which are already allocated successfully
+- * will be freed in upper level function
+- * i.e. create_process().
+- */
+- return ret;
+- } else {
+- offset = (dev->id |
+- KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT;
+- qpd->tba_addr = (uint64_t)vm_mmap(filep, 0,
++
++ offset = (dev->id | KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT;
++ qpd->tba_addr = (uint64_t)vm_mmap(filep, 0,
+ KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
+ MAP_SHARED, offset);
+
+- if (IS_ERR_VALUE(qpd->tba_addr)) {
+- pr_err("Failure to set tba address. error -%d.\n",
+- (int)qpd->tba_addr);
+- qpd->tba_addr = 0;
+- qpd->cwsr_kaddr = NULL;
+- return -ENOMEM;
+- }
++ if (IS_ERR_VALUE(qpd->tba_addr)) {
++ pr_err("Failure to set tba address. error -%d.\n",
++ (int)qpd->tba_addr);
++ qpd->tba_addr = 0;
++ qpd->cwsr_kaddr = NULL;
++ return -ENOMEM;
+ }
+
+ memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+@@ -530,6 +499,38 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+ return 0;
+ }
+
++static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
++{
++ struct kfd_dev *dev = pdd->dev;
++ struct qcm_process_device *qpd = &pdd->qpd;
++ uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_READONLY |
++ ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
++ void *kaddr;
++ int ret;
++
++ if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
++ return 0;
++
++ /* cwsr_base is only set for dGPU */
++ ret = kfd_process_alloc_gpuvm(pdd->process, dev, qpd->cwsr_base,
++ KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd,
++ flags);
++ if (ret)
++ return ret;
++
++ qpd->cwsr_kaddr = kaddr;
++ qpd->tba_addr = qpd->cwsr_base;
++
++ memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
++
++ qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
++ pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
++ qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
++
++ return 0;
++}
++
+ static struct kfd_process *create_process(const struct task_struct *thread,
+ struct file *filep)
+ {
+@@ -586,10 +587,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
+ INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
+ process->last_restore_timestamp = get_jiffies_64();
+
+- err = kfd_process_reserve_ib_mem(process);
+- if (err)
+- goto err_reserve_ib_mem;
+- err = kfd_process_init_cwsr(process, filep);
++ err = kfd_process_init_cwsr_apu(process, filep);
+ if (err)
+ goto err_init_cwsr;
+
+@@ -601,7 +599,6 @@ static struct kfd_process *create_process(const struct task_struct *thread,
+ return process;
+
+ err_init_cwsr:
+-err_reserve_ib_mem:
+ kfd_process_free_outstanding_kfd_bos(process);
+ kfd_process_destroy_pdds(process);
+ err_init_apertures:
+@@ -685,12 +682,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+ goto err_create_pdd;
+ }
+
+- /* Create the GPUVM context for this specific device */
+- if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
+- &p->process_info, &p->ef)) {
+- pr_err("Failed to create process VM object\n");
+- goto err_create_pdd;
+- }
+ return pdd;
+
+ err_create_pdd:
+@@ -701,6 +692,43 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+ return NULL;
+ }
+
++static int kfd_process_device_init_vm(struct kfd_process_device *pdd)
++{
++ struct kfd_process *p;
++ struct kfd_dev *dev;
++ int ret;
++
++ if (pdd->vm)
++ return 0;
++
++ p = pdd->process;
++ dev = pdd->dev;
++
++ ret = dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
++ &p->process_info, &p->ef);
++ if (ret) {
++ pr_err("Failed to create process VM object\n");
++ return ret;
++ }
++
++ ret = kfd_process_device_reserve_ib_mem(pdd);
++ if (ret)
++ goto err_reserve_ib_mem;
++ ret = kfd_process_device_init_cwsr_dgpu(pdd);
++ if (ret)
++ goto err_init_cwsr;
++
++ return 0;
++
++err_init_cwsr:
++err_reserve_ib_mem:
++ kfd_process_device_free_bos(pdd);
++ dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
++ pdd->vm = NULL;
++
++ return ret;
++}
++
+ /*
+ * Direct the IOMMU to bind the process (specifically the pasid->mm)
+ * to the device.
+@@ -724,6 +752,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
+ if (err)
+ return ERR_PTR(err);
+
++ err = kfd_process_device_init_vm(pdd);
++ if (err)
++ return ERR_PTR(err);
++
+ return pdd;
+ }
+
+--
+2.7.4
+