aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1159-drm-amdkfd-Flush-TC-for-GFX-v7.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1159-drm-amdkfd-Flush-TC-for-GFX-v7.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1159-drm-amdkfd-Flush-TC-for-GFX-v7.patch335
1 files changed, 335 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1159-drm-amdkfd-Flush-TC-for-GFX-v7.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1159-drm-amdkfd-Flush-TC-for-GFX-v7.patch
new file mode 100644
index 00000000..21714894
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1159-drm-amdkfd-Flush-TC-for-GFX-v7.patch
@@ -0,0 +1,335 @@
+From a4f81fb2804858867dcc2d0bf338c76a09867a36 Mon Sep 17 00:00:00 2001
+From: Amber Lin <Amber.Lin@amd.com>
+Date: Fri, 8 Jul 2016 16:18:02 -0400
+Subject: [PATCH 1159/4131] drm/amdkfd: Flush TC for GFX v7
+
+GFX v7 doesn't flush texture cache at DEQUEUE if any dirty cache remains.
+This patch submits an IB packet of RELEASE_MEM command to flush the cache
+before tearing down VMID. For each process, One page below CWSR memory is
+reserved for IB usage.
+
+BUG: SWDEV-93847
+
+Signed-off-by: Amber Lin <Amber.Lin@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 +++
+ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 19 ++++
+ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 6 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 +++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 115 +++++++++++++++++++++
+ 6 files changed, 200 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 666853e..af3790f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -332,6 +332,21 @@ static void kfd_cwsr_fini(struct kfd_dev *kfd)
+ __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size));
+ }
+
++static void kfd_ib_mem_init(struct kfd_dev *kdev)
++{
++ /* In certain cases we need to send IB from kernel using the GPU address
++ * space created by user applications.
++ * For example, on GFX v7, we need to flush TC associated to the VMID
++ * before tearing down the VMID. In order to do so, we need an address
++ * valid to the VMID to place the IB while this space was created on
++ * the user's side, not the kernel.
++ * Since kfd_set_process_dgpu_aperture reserves "cwsr_base + cwsr_size"
++ * but CWSR only uses pages above cwsr_base, we'll use one page memory
++ * under cwsr_base for IB submissions
++ */
++ kdev->ib_size = PAGE_SIZE;
++}
++
+ #if defined(CONFIG_DEBUG_FS)
+
+ static int kfd_debugfs_open(struct inode *inode, struct file *file)
+@@ -501,6 +516,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ if (kfd_cwsr_init(kfd))
+ goto device_iommu_pasid_error;
+
++ kfd_ib_mem_init(kfd);
++
+ if (kfd_resume(kfd))
+ goto kfd_resume_error;
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index aacc4dc..1506597 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -138,12 +138,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
+ return 0;
+ }
+
++static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
++ struct qcm_process_device *qpd)
++{
++ uint32_t len;
++
++ if (!qpd->ib_kaddr)
++ return -ENOMEM;
++
++ len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
++
++ return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
++ qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
++}
++
+ static void deallocate_vmid(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ struct queue *q)
+ {
+ int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+
++ /* On GFX v7, CP doesn't flush TC at dequeue */
++ if (q->device->device_info->asic_family == CHIP_HAWAII)
++ if (flush_texture_cache_nocpsch(q->device, qpd))
++ pr_err("kfd: Failed to flush TC\n");
++
+ /* Release the vmid mapping */
+ set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+index 587f847..c52853f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+@@ -295,6 +295,7 @@
+
+
+ #define DGPU_VM_BASE_DEFAULT 0x100000
++#define DGPU_IB_BASE_DEFAULT (DGPU_VM_BASE_DEFAULT - PAGE_SIZE)
+
+ int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd,
+ uint64_t base, uint64_t limit)
+@@ -355,9 +356,10 @@ int kfd_init_apertures(struct kfd_process *process)
+ pdd->scratch_limit =
+ MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+
+- if (KFD_IS_DGPU(dev->device_info->asic_family))
++ if (KFD_IS_DGPU(dev->device_info->asic_family)) {
+ pdd->qpd.cwsr_base = DGPU_VM_BASE_DEFAULT;
+-
++ pdd->qpd.ib_base = DGPU_IB_BASE_DEFAULT;
++ }
+ }
+
+ dev_dbg(kfd_device, "node id %u\n", id);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+index ea0dcd1..47071cc 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+@@ -556,6 +556,43 @@ static int get_map_process_packet_size_scratch(void)
+ return sizeof(struct pm4_map_process_scratch);
+ }
+
++/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
++ * of this packet
++ * @gpu_addr - GPU address of the packet. It's a virtual address.
++ * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
++ * Return - length of the packet
++ */
++uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
++{
++ struct pm4__release_mem *packet;
++
++ WARN_ON(!buffer);
++
++ packet = (struct pm4__release_mem *)buffer;
++ memset(buffer, 0, sizeof(struct pm4__release_mem));
++
++ packet->header.u32all = build_pm4_header(IT_RELEASE_MEM,
++ sizeof(struct pm4__release_mem));
++
++ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
++ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
++ packet->bitfields2.tcl1_action_ena = 1;
++ packet->bitfields2.tc_action_ena = 1;
++ packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
++ packet->bitfields2.atc = 0;
++
++ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
++ packet->bitfields3.int_sel =
++ int_sel___release_mem__send_interrupt_after_write_confirm;
++
++ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
++ packet->address_hi = upper_32_bits(gpu_addr);
++
++ packet->data_lo = 0;
++
++ return sizeof(struct pm4__release_mem) / sizeof(unsigned int);
++}
++
+ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm,
+ uint16_t fw_ver)
+ {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index a222efc..3814e5a 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -271,6 +271,9 @@ struct kfd_dev {
+ uint32_t cwsr_size;
+ uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/
+
++ /* IB usage */
++ uint32_t ib_size;
++
+ /* Debugfs */
+ #if defined(CONFIG_DEBUG_FS)
+ struct dentry *debugfs_root;
+@@ -529,6 +532,10 @@ struct qcm_process_device {
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+ void *cwsr_kaddr;
++
++ /* IB memory */
++ uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */
++ void *ib_kaddr;
+ };
+
+ /*8 byte handle containing GPU ID in the most significant 4 bytes and
+@@ -842,6 +849,7 @@ struct packet_manager_firmware {
+ int (*get_map_process_packet_size)(void);
+ };
+
++uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm,
+ uint16_t fw_ver);
+ void pm_uninit(struct packet_manager *pm);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index ff1669b..9b67aaf 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -79,6 +79,120 @@ void kfd_process_destroy_wq(void)
+ }
+ }
+
++static void kfd_process_free_gpuvm(struct kfd_dev *kdev, struct kgd_mem *mem,
++ void *vm)
++{
++ kdev->kfd2kgd->unmap_memory_to_gpu(kdev->kgd, mem, vm);
++ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
++}
++
++/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
++ * During the memory allocation of GPU, we can't hold the process lock.
++ * There's a chance someone else allocates the memory during the lock
++ * released time. In that case, -EINVAL is returned but kptr remains so
++ * the caller knows the memory is allocated (by someone else) and
++ * available to use.
++ */
++static int kfd_process_alloc_gpuvm(struct kfd_process *p,
++ struct kfd_dev *kdev, uint64_t gpu_va, uint32_t size,
++ void *vm, void **kptr, struct kfd_process_device *pdd,
++ uint64_t *addr_to_assign)
++{
++ int err;
++ void *mem = NULL;
++
++ /* can't hold the process lock while allocating from KGD */
++ up_write(&p->lock);
++
++ err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, vm,
++ (struct kgd_mem **)&mem, NULL, kptr, pdd,
++ ALLOC_MEM_FLAGS_GTT |
++ ALLOC_MEM_FLAGS_NONPAGED |
++ ALLOC_MEM_FLAGS_EXECUTE_ACCESS |
++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE);
++ if (err)
++ goto err_alloc_mem;
++
++ err = kfd_map_memory_to_gpu(kdev, mem, p, pdd);
++ if (err)
++ goto err_map_mem;
++
++ down_write(&p->lock);
++ /* Check if someone else allocated the memory while we weren't looking
++ */
++ if (*addr_to_assign) {
++ err = -EINVAL;
++ goto free_gpuvm;
++ } else {
++ /* Create an obj handle so kfd_process_device_remove_obj_handle
++ * will take care of the bo removal when the process finishes
++ */
++ if (kfd_process_device_create_obj_handle(
++ pdd, mem, gpu_va, size) < 0) {
++ err = -ENOMEM;
++ *kptr = NULL;
++ goto free_gpuvm;
++ }
++ }
++
++ return err;
++
++free_gpuvm:
++ up_write(&p->lock);
++ kfd_process_free_gpuvm(kdev, (struct kgd_mem *)mem, pdd->vm);
++ down_write(&p->lock);
++ return err;
++
++err_map_mem:
++ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
++err_alloc_mem:
++ *kptr = NULL;
++ down_write(&p->lock);
++ return err;
++}
++
++/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage
++ * The memory reserved is for KFD to submit IB to AMDGPU from kernel.
++ * If the memory is reserved successfully, ib_kaddr_assigned will have
++ * the CPU/kernel address. Check ib_kaddr_assigned before accessing the
++ * memory.
++ */
++static int kfd_process_reserve_ib_mem(struct kfd_process *p)
++{
++ int err = 0;
++ struct kfd_process_device *temp, *pdd = NULL;
++ struct kfd_dev *kdev = NULL;
++ struct qcm_process_device *qpd = NULL;
++ void *kaddr;
++
++ down_write(&p->lock);
++ list_for_each_entry_safe(pdd, temp, &p->per_device_data,
++ per_device_list) {
++ kdev = pdd->dev;
++ qpd = &pdd->qpd;
++ if (!kdev->ib_size || qpd->ib_kaddr)
++ continue;
++
++ if (qpd->ib_base) { /* is dGPU */
++ err = kfd_process_alloc_gpuvm(p, kdev,
++ qpd->ib_base, kdev->ib_size, pdd->vm,
++ &kaddr, pdd, (uint64_t *)&qpd->ib_kaddr);
++ if (!err)
++ qpd->ib_kaddr = kaddr;
++ else if (qpd->ib_kaddr)
++ err = 0;
++ else
++ err = -ENOMEM;
++ } else {
++ /* FIXME: Support APU */
++ err = -ENOMEM;
++ }
++ }
++
++ up_write(&p->lock);
++ return err;
++}
++
+ struct kfd_process *kfd_create_process(struct file *filep)
+ {
+ struct kfd_process *process;
+@@ -117,6 +231,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
+ up_write(&thread->mm->mmap_sem);
+
+ kfd_process_init_cwsr(process, filep);
++ kfd_process_reserve_ib_mem(process);
+
+ return process;
+ }
+--
+2.7.4
+