From f038d18b7f4a5d69740d8c9bf2c8e67721753c01 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Fri, 8 Jul 2016 16:18:02 -0400 Subject: [PATCH 1473/4131] drm/amdkfd: Flush TC for GFX v7 GFX v7 doesn't flush texture cache at DEQUEUE if any dirty cache remains. This patch submits an IB packet of RELEASE_MEM command to flush the cache before tearing down VMID. For each process, One page below CWSR memory is reserved for IB usage. BUG: SWDEV-93847 Signed-off-by: Amber Lin --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 +++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 19 ++++ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 115 +++++++++++++++++++++ 6 files changed, 200 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 666853e..af3790f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -332,6 +332,21 @@ static void kfd_cwsr_fini(struct kfd_dev *kfd) __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size)); } +static void kfd_ib_mem_init(struct kfd_dev *kdev) +{ + /* In certain cases we need to send IB from kernel using the GPU address + * space created by user applications. + * For example, on GFX v7, we need to flush TC associated to the VMID + * before tearing down the VMID. In order to do so, we need an address + * valid to the VMID to place the IB while this space was created on + * the user's side, not the kernel. + * Since kfd_set_process_dgpu_aperture reserves "cwsr_base + cwsr_size" + * but CWSR only uses pages above cwsr_base, we'll use one page memory + * under cwsr_base for IB submissions + */ + kdev->ib_size = PAGE_SIZE; +} + #if defined(CONFIG_DEBUG_FS) static int kfd_debugfs_open(struct inode *inode, struct file *file) @@ -501,6 +516,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (kfd_cwsr_init(kfd)) goto device_iommu_pasid_error; + kfd_ib_mem_init(kfd); + if (kfd_resume(kfd)) goto kfd_resume_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index aacc4dc..1506597 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -138,12 +138,31 @@ static int allocate_vmid(struct device_queue_manager *dqm, return 0; } +static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, + struct qcm_process_device *qpd) +{ + uint32_t len; + + if (!qpd->ib_kaddr) + return -ENOMEM; + + len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + + return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); +} + static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; + /* On GFX v7, CP doesn't flush TC at dequeue */ + if (q->device->device_info->asic_family == CHIP_HAWAII) + if (flush_texture_cache_nocpsch(q->device, qpd)) + pr_err("kfd: Failed to flush TC\n"); + /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 587f847..c52853f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -295,6 +295,7 @@ #define DGPU_VM_BASE_DEFAULT 0x100000 +#define DGPU_IB_BASE_DEFAULT (DGPU_VM_BASE_DEFAULT - PAGE_SIZE) int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, uint64_t base, uint64_t limit) @@ -355,9 +356,10 @@ int kfd_init_apertures(struct kfd_process *process) pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); - if (KFD_IS_DGPU(dev->device_info->asic_family)) + if (KFD_IS_DGPU(dev->device_info->asic_family)) { pdd->qpd.cwsr_base = DGPU_VM_BASE_DEFAULT; - + pdd->qpd.ib_base = DGPU_IB_BASE_DEFAULT; + } } dev_dbg(kfd_device, "node id %u\n", id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index f777645..50d015f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -554,6 +554,43 @@ static int get_map_process_packet_size_scratch(void) return sizeof(struct pm4_map_process_scratch); } +/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size + * of this packet + * @gpu_addr - GPU address of the packet. It's a virtual address. + * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer + * Return - length of the packet + */ +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4__release_mem *packet; + + WARN_ON(!buffer); + + packet = (struct pm4__release_mem *)buffer; + memset(buffer, 0, sizeof(struct pm4__release_mem)); + + packet->header.u32all = build_pm4_header(IT_RELEASE_MEM, + sizeof(struct pm4__release_mem)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return sizeof(struct pm4__release_mem) / sizeof(unsigned int); +} + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, uint16_t fw_ver) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d19fd6b..e702ed5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -271,6 +271,9 @@ struct kfd_dev { uint32_t cwsr_size; uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/ + /* IB usage */ + uint32_t ib_size; + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_root; @@ -529,6 +532,10 @@ struct qcm_process_device { uint64_t tba_addr; uint64_t tma_addr; void *cwsr_kaddr; + + /* IB memory */ + uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */ + void *ib_kaddr; }; /*8 byte handle containing GPU ID in the most significant 4 bytes and @@ -842,6 +849,7 @@ struct packet_manager_firmware { int (*get_map_process_packet_size)(void); }; +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, uint16_t fw_ver); void pm_uninit(struct packet_manager *pm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ff1669b..9b67aaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -79,6 +79,120 @@ void kfd_process_destroy_wq(void) } } +static void kfd_process_free_gpuvm(struct kfd_dev *kdev, struct kgd_mem *mem, + void *vm) +{ + kdev->kfd2kgd->unmap_memory_to_gpu(kdev->kgd, mem, vm); + kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); +} + +/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process + * During the memory allocation of GPU, we can't hold the process lock. + * There's a chance someone else allocates the memory during the lock + * released time. In that case, -EINVAL is returned but kptr remains so + * the caller knows the memory is allocated (by someone else) and + * available to use. + */ +static int kfd_process_alloc_gpuvm(struct kfd_process *p, + struct kfd_dev *kdev, uint64_t gpu_va, uint32_t size, + void *vm, void **kptr, struct kfd_process_device *pdd, + uint64_t *addr_to_assign) +{ + int err; + void *mem = NULL; + + /* can't hold the process lock while allocating from KGD */ + up_write(&p->lock); + + err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, vm, + (struct kgd_mem **)&mem, NULL, kptr, pdd, + ALLOC_MEM_FLAGS_GTT | + ALLOC_MEM_FLAGS_NONPAGED | + ALLOC_MEM_FLAGS_EXECUTE_ACCESS | + ALLOC_MEM_FLAGS_NO_SUBSTITUTE); + if (err) + goto err_alloc_mem; + + err = kfd_map_memory_to_gpu(kdev, mem, p, pdd); + if (err) + goto err_map_mem; + + down_write(&p->lock); + /* Check if someone else allocated the memory while we weren't looking + */ + if (*addr_to_assign) { + err = -EINVAL; + goto free_gpuvm; + } else { + /* Create an obj handle so kfd_process_device_remove_obj_handle + * will take care of the bo removal when the process finishes + */ + if (kfd_process_device_create_obj_handle( + pdd, mem, gpu_va, size) < 0) { + err = -ENOMEM; + *kptr = NULL; + goto free_gpuvm; + } + } + + return err; + +free_gpuvm: + up_write(&p->lock); + kfd_process_free_gpuvm(kdev, (struct kgd_mem *)mem, pdd->vm); + down_write(&p->lock); + return err; + +err_map_mem: + kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); +err_alloc_mem: + *kptr = NULL; + down_write(&p->lock); + return err; +} + +/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage + * The memory reserved is for KFD to submit IB to AMDGPU from kernel. + * If the memory is reserved successfully, ib_kaddr_assigned will have + * the CPU/kernel address. Check ib_kaddr_assigned before accessing the + * memory. + */ +static int kfd_process_reserve_ib_mem(struct kfd_process *p) +{ + int err = 0; + struct kfd_process_device *temp, *pdd = NULL; + struct kfd_dev *kdev = NULL; + struct qcm_process_device *qpd = NULL; + void *kaddr; + + down_write(&p->lock); + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { + kdev = pdd->dev; + qpd = &pdd->qpd; + if (!kdev->ib_size || qpd->ib_kaddr) + continue; + + if (qpd->ib_base) { /* is dGPU */ + err = kfd_process_alloc_gpuvm(p, kdev, + qpd->ib_base, kdev->ib_size, pdd->vm, + &kaddr, pdd, (uint64_t *)&qpd->ib_kaddr); + if (!err) + qpd->ib_kaddr = kaddr; + else if (qpd->ib_kaddr) + err = 0; + else + err = -ENOMEM; + } else { + /* FIXME: Support APU */ + err = -ENOMEM; + } + } + + up_write(&p->lock); + return err; +} + struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; @@ -117,6 +231,7 @@ struct kfd_process *kfd_create_process(struct file *filep) up_write(&thread->mm->mmap_sem); kfd_process_init_cwsr(process, filep); + kfd_process_reserve_ib_mem(process); return process; } -- 2.7.4