aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch335
1 files changed, 0 insertions, 335 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch
deleted file mode 100644
index 1eb0f0e2..00000000
--- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1473-drm-amdkfd-Flush-TC-for-GFX-v7.patch
+++ /dev/null
@@ -1,335 +0,0 @@
-From f038d18b7f4a5d69740d8c9bf2c8e67721753c01 Mon Sep 17 00:00:00 2001
-From: Amber Lin <Amber.Lin@amd.com>
-Date: Fri, 8 Jul 2016 16:18:02 -0400
-Subject: [PATCH 1473/4131] drm/amdkfd: Flush TC for GFX v7
-
-GFX v7 doesn't flush texture cache at DEQUEUE if any dirty cache remains.
-This patch submits an IB packet of RELEASE_MEM command to flush the cache
-before tearing down VMID. For each process, One page below CWSR memory is
-reserved for IB usage.
-
-BUG: SWDEV-93847
-
-Signed-off-by: Amber Lin <Amber.Lin@amd.com>
----
- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 +++
- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 19 ++++
- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 6 +-
- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 +++++++
- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++
- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 115 +++++++++++++++++++++
- 6 files changed, 200 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-index 666853e..af3790f 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-@@ -332,6 +332,21 @@ static void kfd_cwsr_fini(struct kfd_dev *kfd)
- __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size));
- }
-
-+static void kfd_ib_mem_init(struct kfd_dev *kdev)
-+{
-+ /* In certain cases we need to send IB from kernel using the GPU address
-+ * space created by user applications.
-+ * For example, on GFX v7, we need to flush TC associated to the VMID
-+ * before tearing down the VMID. In order to do so, we need an address
-+ * valid to the VMID to place the IB while this space was created on
-+ * the user's side, not the kernel.
-+ * Since kfd_set_process_dgpu_aperture reserves "cwsr_base + cwsr_size"
-+ * but CWSR only uses pages above cwsr_base, we'll use one page memory
-+ * under cwsr_base for IB submissions
-+ */
-+ kdev->ib_size = PAGE_SIZE;
-+}
-+
- #if defined(CONFIG_DEBUG_FS)
-
- static int kfd_debugfs_open(struct inode *inode, struct file *file)
-@@ -501,6 +516,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
- if (kfd_cwsr_init(kfd))
- goto device_iommu_pasid_error;
-
-+ kfd_ib_mem_init(kfd);
-+
- if (kfd_resume(kfd))
- goto kfd_resume_error;
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
-index aacc4dc..1506597 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
-@@ -138,12 +138,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
- return 0;
- }
-
-+static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
-+ struct qcm_process_device *qpd)
-+{
-+ uint32_t len;
-+
-+ if (!qpd->ib_kaddr)
-+ return -ENOMEM;
-+
-+ len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
-+
-+ return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
-+ qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
-+}
-+
- static void deallocate_vmid(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd,
- struct queue *q)
- {
- int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
-
-+ /* On GFX v7, CP doesn't flush TC at dequeue */
-+ if (q->device->device_info->asic_family == CHIP_HAWAII)
-+ if (flush_texture_cache_nocpsch(q->device, qpd))
-+ pr_err("kfd: Failed to flush TC\n");
-+
- /* Release the vmid mapping */
- set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
-index 587f847..c52853f 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
-@@ -295,6 +295,7 @@
-
-
- #define DGPU_VM_BASE_DEFAULT 0x100000
-+#define DGPU_IB_BASE_DEFAULT (DGPU_VM_BASE_DEFAULT - PAGE_SIZE)
-
- int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd,
- uint64_t base, uint64_t limit)
-@@ -355,9 +356,10 @@ int kfd_init_apertures(struct kfd_process *process)
- pdd->scratch_limit =
- MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
-
-- if (KFD_IS_DGPU(dev->device_info->asic_family))
-+ if (KFD_IS_DGPU(dev->device_info->asic_family)) {
- pdd->qpd.cwsr_base = DGPU_VM_BASE_DEFAULT;
--
-+ pdd->qpd.ib_base = DGPU_IB_BASE_DEFAULT;
-+ }
- }
-
- dev_dbg(kfd_device, "node id %u\n", id);
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
-index f777645..50d015f 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
-@@ -554,6 +554,43 @@ static int get_map_process_packet_size_scratch(void)
- return sizeof(struct pm4_map_process_scratch);
- }
-
-+/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
-+ * of this packet
-+ * @gpu_addr - GPU address of the packet. It's a virtual address.
-+ * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
-+ * Return - length of the packet
-+ */
-+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
-+{
-+ struct pm4__release_mem *packet;
-+
-+ WARN_ON(!buffer);
-+
-+ packet = (struct pm4__release_mem *)buffer;
-+ memset(buffer, 0, sizeof(struct pm4__release_mem));
-+
-+ packet->header.u32all = build_pm4_header(IT_RELEASE_MEM,
-+ sizeof(struct pm4__release_mem));
-+
-+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-+ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
-+ packet->bitfields2.tcl1_action_ena = 1;
-+ packet->bitfields2.tc_action_ena = 1;
-+ packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
-+ packet->bitfields2.atc = 0;
-+
-+ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
-+ packet->bitfields3.int_sel =
-+ int_sel___release_mem__send_interrupt_after_write_confirm;
-+
-+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-+ packet->address_hi = upper_32_bits(gpu_addr);
-+
-+ packet->data_lo = 0;
-+
-+ return sizeof(struct pm4__release_mem) / sizeof(unsigned int);
-+}
-+
- int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm,
- uint16_t fw_ver)
- {
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-index d19fd6b..e702ed5 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-@@ -271,6 +271,9 @@ struct kfd_dev {
- uint32_t cwsr_size;
- uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/
-
-+ /* IB usage */
-+ uint32_t ib_size;
-+
- /* Debugfs */
- #if defined(CONFIG_DEBUG_FS)
- struct dentry *debugfs_root;
-@@ -529,6 +532,10 @@ struct qcm_process_device {
- uint64_t tba_addr;
- uint64_t tma_addr;
- void *cwsr_kaddr;
-+
-+ /* IB memory */
-+ uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */
-+ void *ib_kaddr;
- };
-
- /*8 byte handle containing GPU ID in the most significant 4 bytes and
-@@ -842,6 +849,7 @@ struct packet_manager_firmware {
- int (*get_map_process_packet_size)(void);
- };
-
-+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
- int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm,
- uint16_t fw_ver);
- void pm_uninit(struct packet_manager *pm);
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-index ff1669b..9b67aaf 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-@@ -79,6 +79,120 @@ void kfd_process_destroy_wq(void)
- }
- }
-
-+static void kfd_process_free_gpuvm(struct kfd_dev *kdev, struct kgd_mem *mem,
-+ void *vm)
-+{
-+ kdev->kfd2kgd->unmap_memory_to_gpu(kdev->kgd, mem, vm);
-+ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
-+}
-+
-+/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
-+ * During the memory allocation of GPU, we can't hold the process lock.
-+ * There's a chance someone else allocates the memory during the lock
-+ * released time. In that case, -EINVAL is returned but kptr remains so
-+ * the caller knows the memory is allocated (by someone else) and
-+ * available to use.
-+ */
-+static int kfd_process_alloc_gpuvm(struct kfd_process *p,
-+ struct kfd_dev *kdev, uint64_t gpu_va, uint32_t size,
-+ void *vm, void **kptr, struct kfd_process_device *pdd,
-+ uint64_t *addr_to_assign)
-+{
-+ int err;
-+ void *mem = NULL;
-+
-+ /* can't hold the process lock while allocating from KGD */
-+ up_write(&p->lock);
-+
-+ err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, vm,
-+ (struct kgd_mem **)&mem, NULL, kptr, pdd,
-+ ALLOC_MEM_FLAGS_GTT |
-+ ALLOC_MEM_FLAGS_NONPAGED |
-+ ALLOC_MEM_FLAGS_EXECUTE_ACCESS |
-+ ALLOC_MEM_FLAGS_NO_SUBSTITUTE);
-+ if (err)
-+ goto err_alloc_mem;
-+
-+ err = kfd_map_memory_to_gpu(kdev, mem, p, pdd);
-+ if (err)
-+ goto err_map_mem;
-+
-+ down_write(&p->lock);
-+ /* Check if someone else allocated the memory while we weren't looking
-+ */
-+ if (*addr_to_assign) {
-+ err = -EINVAL;
-+ goto free_gpuvm;
-+ } else {
-+ /* Create an obj handle so kfd_process_device_remove_obj_handle
-+ * will take care of the bo removal when the process finishes
-+ */
-+ if (kfd_process_device_create_obj_handle(
-+ pdd, mem, gpu_va, size) < 0) {
-+ err = -ENOMEM;
-+ *kptr = NULL;
-+ goto free_gpuvm;
-+ }
-+ }
-+
-+ return err;
-+
-+free_gpuvm:
-+ up_write(&p->lock);
-+ kfd_process_free_gpuvm(kdev, (struct kgd_mem *)mem, pdd->vm);
-+ down_write(&p->lock);
-+ return err;
-+
-+err_map_mem:
-+ kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
-+err_alloc_mem:
-+ *kptr = NULL;
-+ down_write(&p->lock);
-+ return err;
-+}
-+
-+/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage
-+ * The memory reserved is for KFD to submit IB to AMDGPU from kernel.
-+ * If the memory is reserved successfully, ib_kaddr_assigned will have
-+ * the CPU/kernel address. Check ib_kaddr_assigned before accessing the
-+ * memory.
-+ */
-+static int kfd_process_reserve_ib_mem(struct kfd_process *p)
-+{
-+ int err = 0;
-+ struct kfd_process_device *temp, *pdd = NULL;
-+ struct kfd_dev *kdev = NULL;
-+ struct qcm_process_device *qpd = NULL;
-+ void *kaddr;
-+
-+ down_write(&p->lock);
-+ list_for_each_entry_safe(pdd, temp, &p->per_device_data,
-+ per_device_list) {
-+ kdev = pdd->dev;
-+ qpd = &pdd->qpd;
-+ if (!kdev->ib_size || qpd->ib_kaddr)
-+ continue;
-+
-+ if (qpd->ib_base) { /* is dGPU */
-+ err = kfd_process_alloc_gpuvm(p, kdev,
-+ qpd->ib_base, kdev->ib_size, pdd->vm,
-+ &kaddr, pdd, (uint64_t *)&qpd->ib_kaddr);
-+ if (!err)
-+ qpd->ib_kaddr = kaddr;
-+ else if (qpd->ib_kaddr)
-+ err = 0;
-+ else
-+ err = -ENOMEM;
-+ } else {
-+ /* FIXME: Support APU */
-+ err = -ENOMEM;
-+ }
-+ }
-+
-+ up_write(&p->lock);
-+ return err;
-+}
-+
- struct kfd_process *kfd_create_process(struct file *filep)
- {
- struct kfd_process *process;
-@@ -117,6 +231,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
- up_write(&thread->mm->mmap_sem);
-
- kfd_process_init_cwsr(process, filep);
-+ kfd_process_reserve_ib_mem(process);
-
- return process;
- }
---
-2.7.4
-