diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch new file mode 100644 index 00000000..c54abf5a --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch @@ -0,0 +1,357 @@ +From 30272a1b4da44c7866dad126b7026551498f6be2 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Fri, 23 Feb 2018 18:02:55 -0500 +Subject: [PATCH 3439/4131] drm/amdkfd: Create KFD VMs on demand + +Instead of creating all VMs on process creation, create them when +a process is bound to a device. This will later allow registering +an existing VM from a DRM render node FD at runtime, before the +process is bound to the device. This way the render node VM can be +used for KFD instead of creating our own redundant VM. + +Change-Id: I62f912e41af9c223752f199c22822c4ff6638ed4 +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 242 +++++++++++++++++-------------- + 1 file changed, 137 insertions(+), 105 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 7c6bcbd..91aac82 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -68,7 +68,6 @@ static struct kfd_process *find_process(const struct task_struct *thread, + static void kfd_process_ref_release(struct kref *ref); + static struct kfd_process *create_process(const struct task_struct *thread, + struct file *filep); +-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); + + static void evict_process_worker(struct work_struct *work); + static void restore_process_worker(struct work_struct *work); +@@ -175,47 +174,31 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, + return err; + } + +-/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage +- * The memory reserved is for KFD to submit IB to AMDGPU from kernel. +- * If the memory is reserved successfully, ib_kaddr_assigned will have +- * the CPU/kernel address. Check ib_kaddr_assigned before accessing the +- * memory. ++/* kfd_process_device_reserve_ib_mem - Reserve memory inside the ++ * process for IB usage The memory reserved is for KFD to submit ++ * IB to AMDGPU from kernel. If the memory is reserved ++ * successfully, ib_kaddr_assigned will have the CPU/kernel ++ * address. Check ib_kaddr_assigned before accessing the memory. + */ +-static int kfd_process_reserve_ib_mem(struct kfd_process *p) ++static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) + { +- int ret = 0; +- struct kfd_process_device *pdd = NULL; +- struct kfd_dev *kdev = NULL; +- struct qcm_process_device *qpd = NULL; +- void *kaddr; ++ struct qcm_process_device *qpd = &pdd->qpd; + uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED | +- ALLOC_MEM_FLAGS_NO_SUBSTITUTE | +- ALLOC_MEM_FLAGS_EXECUTE_ACCESS; ++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTE_ACCESS; ++ void *kaddr; ++ int ret; + +- list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +- kdev = pdd->dev; +- qpd = &pdd->qpd; +- if (qpd->ib_kaddr) +- continue; ++ if (qpd->ib_kaddr || !qpd->ib_base) ++ return 0; + +- if (qpd->ib_base) { /* is dGPU */ +- ret = kfd_process_alloc_gpuvm(p, kdev, +- qpd->ib_base, PAGE_SIZE, +- &kaddr, pdd, flags); +- if (!ret) +- qpd->ib_kaddr = kaddr; +- else +- /* In case of error, the kfd_bos for some pdds +- * which are already allocated successfully +- * will be freed in upper level function +- * i.e. create_process(). +- */ +- return ret; +- } else { +- /* FIXME: Support APU */ +- continue; +- } +- } ++ /* ib_base is only set for dGPU */ ++ ret = kfd_process_alloc_gpuvm(pdd->process, pdd->dev, ++ qpd->ib_base, PAGE_SIZE, ++ &kaddr, pdd, flags); ++ if (ret) ++ return ret; ++ ++ qpd->ib_kaddr = kaddr; + + return 0; + } +@@ -317,33 +300,42 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid) + return p; + } + +-static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) ++static void kfd_process_device_free_bos(struct kfd_process_device *pdd) + { +- struct kfd_process_device *pdd, *peer_pdd; ++ struct kfd_process *p = pdd->process; + struct kfd_bo *buf_obj; + int id; + +- list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +- /* +- * Remove all handles from idr and release appropriate +- * local memory object +- */ +- idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) { +- list_for_each_entry(peer_pdd, &p->per_device_data, +- per_device_list) { +- peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu( +- peer_pdd->dev->kgd, +- buf_obj->mem, peer_pdd->vm); +- } +- +- run_rdma_free_callback(buf_obj); +- pdd->dev->kfd2kgd->free_memory_of_gpu( +- pdd->dev->kgd, buf_obj->mem); +- kfd_process_device_remove_obj_handle(pdd, id); ++ /* ++ * Remove all handles from idr and release appropriate ++ * local memory object ++ */ ++ idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) { ++ struct kfd_process_device *peer_pdd; ++ ++ list_for_each_entry(peer_pdd, &p->per_device_data, ++ per_device_list) { ++ if (!peer_pdd->vm) ++ continue; ++ peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu( ++ peer_pdd->dev->kgd, buf_obj->mem, peer_pdd->vm); + } ++ ++ run_rdma_free_callback(buf_obj); ++ pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, ++ buf_obj->mem); ++ kfd_process_device_remove_obj_handle(pdd, id); + } + } + ++static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) ++{ ++ struct kfd_process_device *pdd; ++ ++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) ++ kfd_process_device_free_bos(pdd); ++} ++ + static void kfd_process_destroy_pdds(struct kfd_process *p) + { + struct kfd_process_device *pdd, *temp; +@@ -472,52 +464,29 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { + .release = kfd_process_notifier_release, + }; + +-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) ++static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) + { +- int ret; + unsigned long offset; +- struct kfd_process_device *pdd = NULL; +- struct kfd_dev *dev = NULL; +- struct qcm_process_device *qpd = NULL; +- void *kaddr; +- uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED | +- ALLOC_MEM_FLAGS_NO_SUBSTITUTE | +- ALLOC_MEM_FLAGS_READONLY | +- ALLOC_MEM_FLAGS_EXECUTE_ACCESS; ++ struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +- dev = pdd->dev; +- qpd = &pdd->qpd; +- if (!dev->cwsr_enabled || qpd->cwsr_kaddr) ++ struct kfd_dev *dev = pdd->dev; ++ struct qcm_process_device *qpd = &pdd->qpd; ++ ++ if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) + continue; +- if (qpd->cwsr_base) { +- /* cwsr_base is only set for DGPU */ +- ret = kfd_process_alloc_gpuvm(p, dev, qpd->cwsr_base, +- KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd, flags); +- if (!ret) { +- qpd->cwsr_kaddr = kaddr; +- qpd->tba_addr = qpd->cwsr_base; +- } else +- /* In case of error, the kfd_bos for some pdds +- * which are already allocated successfully +- * will be freed in upper level function +- * i.e. create_process(). +- */ +- return ret; +- } else { +- offset = (dev->id | +- KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT; +- qpd->tba_addr = (uint64_t)vm_mmap(filep, 0, ++ ++ offset = (dev->id | KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT; ++ qpd->tba_addr = (uint64_t)vm_mmap(filep, 0, + KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, + MAP_SHARED, offset); + +- if (IS_ERR_VALUE(qpd->tba_addr)) { +- pr_err("Failure to set tba address. error -%d.\n", +- (int)qpd->tba_addr); +- qpd->tba_addr = 0; +- qpd->cwsr_kaddr = NULL; +- return -ENOMEM; +- } ++ if (IS_ERR_VALUE(qpd->tba_addr)) { ++ pr_err("Failure to set tba address. error -%d.\n", ++ (int)qpd->tba_addr); ++ qpd->tba_addr = 0; ++ qpd->cwsr_kaddr = NULL; ++ return -ENOMEM; + } + + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); +@@ -530,6 +499,38 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) + return 0; + } + ++static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) ++{ ++ struct kfd_dev *dev = pdd->dev; ++ struct qcm_process_device *qpd = &pdd->qpd; ++ uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED | ++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_READONLY | ++ ALLOC_MEM_FLAGS_EXECUTE_ACCESS; ++ void *kaddr; ++ int ret; ++ ++ if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) ++ return 0; ++ ++ /* cwsr_base is only set for dGPU */ ++ ret = kfd_process_alloc_gpuvm(pdd->process, dev, qpd->cwsr_base, ++ KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd, ++ flags); ++ if (ret) ++ return ret; ++ ++ qpd->cwsr_kaddr = kaddr; ++ qpd->tba_addr = qpd->cwsr_base; ++ ++ memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); ++ ++ qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; ++ pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", ++ qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); ++ ++ return 0; ++} ++ + static struct kfd_process *create_process(const struct task_struct *thread, + struct file *filep) + { +@@ -586,10 +587,7 @@ static struct kfd_process *create_process(const struct task_struct *thread, + INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); + process->last_restore_timestamp = get_jiffies_64(); + +- err = kfd_process_reserve_ib_mem(process); +- if (err) +- goto err_reserve_ib_mem; +- err = kfd_process_init_cwsr(process, filep); ++ err = kfd_process_init_cwsr_apu(process, filep); + if (err) + goto err_init_cwsr; + +@@ -601,7 +599,6 @@ static struct kfd_process *create_process(const struct task_struct *thread, + return process; + + err_init_cwsr: +-err_reserve_ib_mem: + kfd_process_free_outstanding_kfd_bos(process); + kfd_process_destroy_pdds(process); + err_init_apertures: +@@ -685,12 +682,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + goto err_create_pdd; + } + +- /* Create the GPUVM context for this specific device */ +- if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm, +- &p->process_info, &p->ef)) { +- pr_err("Failed to create process VM object\n"); +- goto err_create_pdd; +- } + return pdd; + + err_create_pdd: +@@ -701,6 +692,43 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + return NULL; + } + ++static int kfd_process_device_init_vm(struct kfd_process_device *pdd) ++{ ++ struct kfd_process *p; ++ struct kfd_dev *dev; ++ int ret; ++ ++ if (pdd->vm) ++ return 0; ++ ++ p = pdd->process; ++ dev = pdd->dev; ++ ++ ret = dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm, ++ &p->process_info, &p->ef); ++ if (ret) { ++ pr_err("Failed to create process VM object\n"); ++ return ret; ++ } ++ ++ ret = kfd_process_device_reserve_ib_mem(pdd); ++ if (ret) ++ goto err_reserve_ib_mem; ++ ret = kfd_process_device_init_cwsr_dgpu(pdd); ++ if (ret) ++ goto err_init_cwsr; ++ ++ return 0; ++ ++err_init_cwsr: ++err_reserve_ib_mem: ++ kfd_process_device_free_bos(pdd); ++ dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm); ++ pdd->vm = NULL; ++ ++ return ret; ++} ++ + /* + * Direct the IOMMU to bind the process (specifically the pasid->mm) + * to the device. +@@ -724,6 +752,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, + if (err) + return ERR_PTR(err); + ++ err = kfd_process_device_init_vm(pdd); ++ if (err) ++ return ERR_PTR(err); ++ + return pdd; + } + +-- +2.7.4 + |