diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch | 335 |
1 files changed, 0 insertions, 335 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch deleted file mode 100644 index 1eb0f0e2..00000000 --- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch +++ /dev/null @@ -1,335 +0,0 @@ -From f038d18b7f4a5d69740d8c9bf2c8e67721753c01 Mon Sep 17 00:00:00 2001 -From: Amber Lin <Amber.Lin@amd.com> -Date: Fri, 8 Jul 2016 16:18:02 -0400 -Subject: [PATCH 1473/4131] drm/amdkfd: Flush TC for GFX v7 - -GFX v7 doesn't flush texture cache at DEQUEUE if any dirty cache remains. -This patch submits an IB packet of RELEASE_MEM command to flush the cache -before tearing down VMID. For each process, One page below CWSR memory is -reserved for IB usage. - -BUG: SWDEV-93847 - -Signed-off-by: Amber Lin <Amber.Lin@amd.com> ---- - drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 +++ - .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 19 ++++ - drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 6 +- - drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 +++++++ - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++ - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 115 +++++++++++++++++++++ - 6 files changed, 200 insertions(+), 2 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c -index 666853e..af3790f 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c -@@ -332,6 +332,21 @@ static void kfd_cwsr_fini(struct kfd_dev *kfd) - __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size)); - } - -+static void kfd_ib_mem_init(struct kfd_dev *kdev) -+{ -+ /* In certain cases we need to send IB from kernel using the GPU address -+ * space created by user applications. -+ * For example, on GFX v7, we need to flush TC associated to the VMID -+ * before tearing down the VMID. In order to do so, we need an address -+ * valid to the VMID to place the IB while this space was created on -+ * the user's side, not the kernel. -+ * Since kfd_set_process_dgpu_aperture reserves "cwsr_base + cwsr_size" -+ * but CWSR only uses pages above cwsr_base, we'll use one page memory -+ * under cwsr_base for IB submissions -+ */ -+ kdev->ib_size = PAGE_SIZE; -+} -+ - #if defined(CONFIG_DEBUG_FS) - - static int kfd_debugfs_open(struct inode *inode, struct file *file) -@@ -501,6 +516,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, - if (kfd_cwsr_init(kfd)) - goto device_iommu_pasid_error; - -+ kfd_ib_mem_init(kfd); -+ - if (kfd_resume(kfd)) - goto kfd_resume_error; - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -index aacc4dc..1506597 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -@@ -138,12 +138,31 @@ static int allocate_vmid(struct device_queue_manager *dqm, - return 0; - } - -+static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, -+ struct qcm_process_device *qpd) -+{ -+ uint32_t len; -+ -+ if (!qpd->ib_kaddr) -+ return -ENOMEM; -+ -+ len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); -+ -+ return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, -+ qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); -+} -+ - static void deallocate_vmid(struct device_queue_manager *dqm, - struct qcm_process_device *qpd, - struct queue *q) - { - int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; - -+ /* On GFX v7, CP doesn't flush TC at dequeue */ -+ if (q->device->device_info->asic_family == CHIP_HAWAII) -+ if (flush_texture_cache_nocpsch(q->device, qpd)) -+ pr_err("kfd: Failed to flush TC\n"); -+ - /* Release the vmid mapping */ - set_pasid_vmid_mapping(dqm, 0, qpd->vmid); - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c -index 587f847..c52853f 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c -@@ -295,6 +295,7 @@ - - - #define DGPU_VM_BASE_DEFAULT 0x100000 -+#define DGPU_IB_BASE_DEFAULT (DGPU_VM_BASE_DEFAULT - PAGE_SIZE) - - int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, - uint64_t base, uint64_t limit) -@@ -355,9 +356,10 @@ int kfd_init_apertures(struct kfd_process *process) - pdd->scratch_limit = - MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); - -- if (KFD_IS_DGPU(dev->device_info->asic_family)) -+ if (KFD_IS_DGPU(dev->device_info->asic_family)) { - pdd->qpd.cwsr_base = DGPU_VM_BASE_DEFAULT; -- -+ pdd->qpd.ib_base = DGPU_IB_BASE_DEFAULT; -+ } - } - - dev_dbg(kfd_device, "node id %u\n", id); -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c -index f777645..50d015f 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c -@@ -554,6 +554,43 @@ static int get_map_process_packet_size_scratch(void) - return sizeof(struct pm4_map_process_scratch); - } - -+/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size -+ * of this packet -+ * @gpu_addr - GPU address of the packet. It's a virtual address. -+ * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer -+ * Return - length of the packet -+ */ -+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) -+{ -+ struct pm4__release_mem *packet; -+ -+ WARN_ON(!buffer); -+ -+ packet = (struct pm4__release_mem *)buffer; -+ memset(buffer, 0, sizeof(struct pm4__release_mem)); -+ -+ packet->header.u32all = build_pm4_header(IT_RELEASE_MEM, -+ sizeof(struct pm4__release_mem)); -+ -+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; -+ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; -+ packet->bitfields2.tcl1_action_ena = 1; -+ packet->bitfields2.tc_action_ena = 1; -+ packet->bitfields2.cache_policy = cache_policy___release_mem__lru; -+ packet->bitfields2.atc = 0; -+ -+ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; -+ packet->bitfields3.int_sel = -+ int_sel___release_mem__send_interrupt_after_write_confirm; -+ -+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; -+ packet->address_hi = upper_32_bits(gpu_addr); -+ -+ packet->data_lo = 0; -+ -+ return sizeof(struct pm4__release_mem) / sizeof(unsigned int); -+} -+ - int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, - uint16_t fw_ver) - { -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -index d19fd6b..e702ed5 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -@@ -271,6 +271,9 @@ struct kfd_dev { - uint32_t cwsr_size; - uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/ - -+ /* IB usage */ -+ uint32_t ib_size; -+ - /* Debugfs */ - #if defined(CONFIG_DEBUG_FS) - struct dentry *debugfs_root; -@@ -529,6 +532,10 @@ struct qcm_process_device { - uint64_t tba_addr; - uint64_t tma_addr; - void *cwsr_kaddr; -+ -+ /* IB memory */ -+ uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */ -+ void *ib_kaddr; - }; - - /*8 byte handle containing GPU ID in the most significant 4 bytes and -@@ -842,6 +849,7 @@ struct packet_manager_firmware { - int (*get_map_process_packet_size)(void); - }; - -+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); - int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, - uint16_t fw_ver); - void pm_uninit(struct packet_manager *pm); -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c -index ff1669b..9b67aaf 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c -@@ -79,6 +79,120 @@ void kfd_process_destroy_wq(void) - } - } - -+static void kfd_process_free_gpuvm(struct kfd_dev *kdev, struct kgd_mem *mem, -+ void *vm) -+{ -+ kdev->kfd2kgd->unmap_memory_to_gpu(kdev->kgd, mem, vm); -+ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); -+} -+ -+/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process -+ * During the memory allocation of GPU, we can't hold the process lock. -+ * There's a chance someone else allocates the memory during the lock -+ * released time. In that case, -EINVAL is returned but kptr remains so -+ * the caller knows the memory is allocated (by someone else) and -+ * available to use. -+ */ -+static int kfd_process_alloc_gpuvm(struct kfd_process *p, -+ struct kfd_dev *kdev, uint64_t gpu_va, uint32_t size, -+ void *vm, void **kptr, struct kfd_process_device *pdd, -+ uint64_t *addr_to_assign) -+{ -+ int err; -+ void *mem = NULL; -+ -+ /* can't hold the process lock while allocating from KGD */ -+ up_write(&p->lock); -+ -+ err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, vm, -+ (struct kgd_mem **)&mem, NULL, kptr, pdd, -+ ALLOC_MEM_FLAGS_GTT | -+ ALLOC_MEM_FLAGS_NONPAGED | -+ ALLOC_MEM_FLAGS_EXECUTE_ACCESS | -+ ALLOC_MEM_FLAGS_NO_SUBSTITUTE); -+ if (err) -+ goto err_alloc_mem; -+ -+ err = kfd_map_memory_to_gpu(kdev, mem, p, pdd); -+ if (err) -+ goto err_map_mem; -+ -+ down_write(&p->lock); -+ /* Check if someone else allocated the memory while we weren't looking -+ */ -+ if (*addr_to_assign) { -+ err = -EINVAL; -+ goto free_gpuvm; -+ } else { -+ /* Create an obj handle so kfd_process_device_remove_obj_handle -+ * will take care of the bo removal when the process finishes -+ */ -+ if (kfd_process_device_create_obj_handle( -+ pdd, mem, gpu_va, size) < 0) { -+ err = -ENOMEM; -+ *kptr = NULL; -+ goto free_gpuvm; -+ } -+ } -+ -+ return err; -+ -+free_gpuvm: -+ up_write(&p->lock); -+ kfd_process_free_gpuvm(kdev, (struct kgd_mem *)mem, pdd->vm); -+ down_write(&p->lock); -+ return err; -+ -+err_map_mem: -+ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); -+err_alloc_mem: -+ *kptr = NULL; -+ down_write(&p->lock); -+ return err; -+} -+ -+/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage -+ * The memory reserved is for KFD to submit IB to AMDGPU from kernel. -+ * If the memory is reserved successfully, ib_kaddr_assigned will have -+ * the CPU/kernel address. Check ib_kaddr_assigned before accessing the -+ * memory. -+ */ -+static int kfd_process_reserve_ib_mem(struct kfd_process *p) -+{ -+ int err = 0; -+ struct kfd_process_device *temp, *pdd = NULL; -+ struct kfd_dev *kdev = NULL; -+ struct qcm_process_device *qpd = NULL; -+ void *kaddr; -+ -+ down_write(&p->lock); -+ list_for_each_entry_safe(pdd, temp, &p->per_device_data, -+ per_device_list) { -+ kdev = pdd->dev; -+ qpd = &pdd->qpd; -+ if (!kdev->ib_size || qpd->ib_kaddr) -+ continue; -+ -+ if (qpd->ib_base) { /* is dGPU */ -+ err = kfd_process_alloc_gpuvm(p, kdev, -+ qpd->ib_base, kdev->ib_size, pdd->vm, -+ &kaddr, pdd, (uint64_t *)&qpd->ib_kaddr); -+ if (!err) -+ qpd->ib_kaddr = kaddr; -+ else if (qpd->ib_kaddr) -+ err = 0; -+ else -+ err = -ENOMEM; -+ } else { -+ /* FIXME: Support APU */ -+ err = -ENOMEM; -+ } -+ } -+ -+ up_write(&p->lock); -+ return err; -+} -+ - struct kfd_process *kfd_create_process(struct file *filep) - { - struct kfd_process *process; -@@ -117,6 +231,7 @@ struct kfd_process *kfd_create_process(struct file *filep) - up_write(&thread->mm->mmap_sem); - - kfd_process_init_cwsr(process, filep); -+ kfd_process_reserve_ib_mem(process); - - return process; - } --- -2.7.4 - |