diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch new file mode 100644 index 00000000..1eb0f0e2 --- /dev/null +++ b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch @@ -0,0 +1,335 @@ +From f038d18b7f4a5d69740d8c9bf2c8e67721753c01 Mon Sep 17 00:00:00 2001 +From: Amber Lin <Amber.Lin@amd.com> +Date: Fri, 8 Jul 2016 16:18:02 -0400 +Subject: [PATCH 1473/4131] drm/amdkfd: Flush TC for GFX v7 + +GFX v7 doesn't flush texture cache at DEQUEUE if any dirty cache remains. +This patch submits an IB packet of RELEASE_MEM command to flush the cache +before tearing down VMID. For each process, One page below CWSR memory is +reserved for IB usage. + +BUG: SWDEV-93847 + +Signed-off-by: Amber Lin <Amber.Lin@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 +++ + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 19 ++++ + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 6 +- + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 +++++++ + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++ + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 115 +++++++++++++++++++++ + 6 files changed, 200 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 666853e..af3790f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -332,6 +332,21 @@ static void kfd_cwsr_fini(struct kfd_dev *kfd) + __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size)); + } + ++static void kfd_ib_mem_init(struct kfd_dev *kdev) ++{ ++ /* In certain cases we need to send IB from kernel using the GPU address ++ * space created by user applications. ++ * For example, on GFX v7, we need to flush TC associated to the VMID ++ * before tearing down the VMID. In order to do so, we need an address ++ * valid to the VMID to place the IB while this space was created on ++ * the user's side, not the kernel. ++ * Since kfd_set_process_dgpu_aperture reserves "cwsr_base + cwsr_size" ++ * but CWSR only uses pages above cwsr_base, we'll use one page memory ++ * under cwsr_base for IB submissions ++ */ ++ kdev->ib_size = PAGE_SIZE; ++} ++ + #if defined(CONFIG_DEBUG_FS) + + static int kfd_debugfs_open(struct inode *inode, struct file *file) +@@ -501,6 +516,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + if (kfd_cwsr_init(kfd)) + goto device_iommu_pasid_error; + ++ kfd_ib_mem_init(kfd); ++ + if (kfd_resume(kfd)) + goto kfd_resume_error; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index aacc4dc..1506597 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -138,12 +138,31 @@ static int allocate_vmid(struct device_queue_manager *dqm, + return 0; + } + ++static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, ++ struct qcm_process_device *qpd) ++{ ++ uint32_t len; ++ ++ if (!qpd->ib_kaddr) ++ return -ENOMEM; ++ ++ len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); ++ ++ return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, ++ qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); ++} ++ + static void deallocate_vmid(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) + { + int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; + ++ /* On GFX v7, CP doesn't flush TC at dequeue */ ++ if (q->device->device_info->asic_family == CHIP_HAWAII) ++ if (flush_texture_cache_nocpsch(q->device, qpd)) ++ pr_err("kfd: Failed to flush TC\n"); ++ + /* Release the vmid mapping */ + set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +index 587f847..c52853f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +@@ -295,6 +295,7 @@ + + + #define DGPU_VM_BASE_DEFAULT 0x100000 ++#define DGPU_IB_BASE_DEFAULT (DGPU_VM_BASE_DEFAULT - PAGE_SIZE) + + int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, + uint64_t base, uint64_t limit) +@@ -355,9 +356,10 @@ int kfd_init_apertures(struct kfd_process *process) + pdd->scratch_limit = + MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + +- if (KFD_IS_DGPU(dev->device_info->asic_family)) ++ if (KFD_IS_DGPU(dev->device_info->asic_family)) { + pdd->qpd.cwsr_base = DGPU_VM_BASE_DEFAULT; +- ++ pdd->qpd.ib_base = DGPU_IB_BASE_DEFAULT; ++ } + } + + dev_dbg(kfd_device, "node id %u\n", id); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +index f777645..50d015f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +@@ -554,6 +554,43 @@ static int get_map_process_packet_size_scratch(void) + return sizeof(struct pm4_map_process_scratch); + } + ++/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size ++ * of this packet ++ * @gpu_addr - GPU address of the packet. It's a virtual address. ++ * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer ++ * Return - length of the packet ++ */ ++uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) ++{ ++ struct pm4__release_mem *packet; ++ ++ WARN_ON(!buffer); ++ ++ packet = (struct pm4__release_mem *)buffer; ++ memset(buffer, 0, sizeof(struct pm4__release_mem)); ++ ++ packet->header.u32all = build_pm4_header(IT_RELEASE_MEM, ++ sizeof(struct pm4__release_mem)); ++ ++ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; ++ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; ++ packet->bitfields2.tcl1_action_ena = 1; ++ packet->bitfields2.tc_action_ena = 1; ++ packet->bitfields2.cache_policy = cache_policy___release_mem__lru; ++ packet->bitfields2.atc = 0; ++ ++ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; ++ packet->bitfields3.int_sel = ++ int_sel___release_mem__send_interrupt_after_write_confirm; ++ ++ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; ++ packet->address_hi = upper_32_bits(gpu_addr); ++ ++ packet->data_lo = 0; ++ ++ return sizeof(struct pm4__release_mem) / sizeof(unsigned int); ++} ++ + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, + uint16_t fw_ver) + { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index d19fd6b..e702ed5 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -271,6 +271,9 @@ struct kfd_dev { + uint32_t cwsr_size; + uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/ + ++ /* IB usage */ ++ uint32_t ib_size; ++ + /* Debugfs */ + #if defined(CONFIG_DEBUG_FS) + struct dentry *debugfs_root; +@@ -529,6 +532,10 @@ struct qcm_process_device { + uint64_t tba_addr; + uint64_t tma_addr; + void *cwsr_kaddr; ++ ++ /* IB memory */ ++ uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */ ++ void *ib_kaddr; + }; + + /*8 byte handle containing GPU ID in the most significant 4 bytes and +@@ -842,6 +849,7 @@ struct packet_manager_firmware { + int (*get_map_process_packet_size)(void); + }; + ++uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, + uint16_t fw_ver); + void pm_uninit(struct packet_manager *pm); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index ff1669b..9b67aaf 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -79,6 +79,120 @@ void kfd_process_destroy_wq(void) + } + } + ++static void kfd_process_free_gpuvm(struct kfd_dev *kdev, struct kgd_mem *mem, ++ void *vm) ++{ ++ kdev->kfd2kgd->unmap_memory_to_gpu(kdev->kgd, mem, vm); ++ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); ++} ++ ++/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process ++ * During the memory allocation of GPU, we can't hold the process lock. ++ * There's a chance someone else allocates the memory during the lock ++ * released time. In that case, -EINVAL is returned but kptr remains so ++ * the caller knows the memory is allocated (by someone else) and ++ * available to use. ++ */ ++static int kfd_process_alloc_gpuvm(struct kfd_process *p, ++ struct kfd_dev *kdev, uint64_t gpu_va, uint32_t size, ++ void *vm, void **kptr, struct kfd_process_device *pdd, ++ uint64_t *addr_to_assign) ++{ ++ int err; ++ void *mem = NULL; ++ ++ /* can't hold the process lock while allocating from KGD */ ++ up_write(&p->lock); ++ ++ err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, vm, ++ (struct kgd_mem **)&mem, NULL, kptr, pdd, ++ ALLOC_MEM_FLAGS_GTT | ++ ALLOC_MEM_FLAGS_NONPAGED | ++ ALLOC_MEM_FLAGS_EXECUTE_ACCESS | ++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE); ++ if (err) ++ goto err_alloc_mem; ++ ++ err = kfd_map_memory_to_gpu(kdev, mem, p, pdd); ++ if (err) ++ goto err_map_mem; ++ ++ down_write(&p->lock); ++ /* Check if someone else allocated the memory while we weren't looking ++ */ ++ if (*addr_to_assign) { ++ err = -EINVAL; ++ goto free_gpuvm; ++ } else { ++ /* Create an obj handle so kfd_process_device_remove_obj_handle ++ * will take care of the bo removal when the process finishes ++ */ ++ if (kfd_process_device_create_obj_handle( ++ pdd, mem, gpu_va, size) < 0) { ++ err = -ENOMEM; ++ *kptr = NULL; ++ goto free_gpuvm; ++ } ++ } ++ ++ return err; ++ ++free_gpuvm: ++ up_write(&p->lock); ++ kfd_process_free_gpuvm(kdev, (struct kgd_mem *)mem, pdd->vm); ++ down_write(&p->lock); ++ return err; ++ ++err_map_mem: ++ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); ++err_alloc_mem: ++ *kptr = NULL; ++ down_write(&p->lock); ++ return err; ++} ++ ++/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage ++ * The memory reserved is for KFD to submit IB to AMDGPU from kernel. ++ * If the memory is reserved successfully, ib_kaddr_assigned will have ++ * the CPU/kernel address. Check ib_kaddr_assigned before accessing the ++ * memory. ++ */ ++static int kfd_process_reserve_ib_mem(struct kfd_process *p) ++{ ++ int err = 0; ++ struct kfd_process_device *temp, *pdd = NULL; ++ struct kfd_dev *kdev = NULL; ++ struct qcm_process_device *qpd = NULL; ++ void *kaddr; ++ ++ down_write(&p->lock); ++ list_for_each_entry_safe(pdd, temp, &p->per_device_data, ++ per_device_list) { ++ kdev = pdd->dev; ++ qpd = &pdd->qpd; ++ if (!kdev->ib_size || qpd->ib_kaddr) ++ continue; ++ ++ if (qpd->ib_base) { /* is dGPU */ ++ err = kfd_process_alloc_gpuvm(p, kdev, ++ qpd->ib_base, kdev->ib_size, pdd->vm, ++ &kaddr, pdd, (uint64_t *)&qpd->ib_kaddr); ++ if (!err) ++ qpd->ib_kaddr = kaddr; ++ else if (qpd->ib_kaddr) ++ err = 0; ++ else ++ err = -ENOMEM; ++ } else { ++ /* FIXME: Support APU */ ++ err = -ENOMEM; ++ } ++ } ++ ++ up_write(&p->lock); ++ return err; ++} ++ + struct kfd_process *kfd_create_process(struct file *filep) + { + struct kfd_process *process; +@@ -117,6 +231,7 @@ struct kfd_process *kfd_create_process(struct file *filep) + up_write(&thread->mm->mmap_sem); + + kfd_process_init_cwsr(process, filep); ++ kfd_process_reserve_ib_mem(process); + + return process; + } +-- +2.7.4 + |