aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1554-drm-amdkfd-Fix-the-PTE-not-cleared-on-BO-unmapping-b.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1554-drm-amdkfd-Fix-the-PTE-not-cleared-on-BO-unmapping-b.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1554-drm-amdkfd-Fix-the-PTE-not-cleared-on-BO-unmapping-b.patch362
1 files changed, 362 insertions, 0 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1554-drm-amdkfd-Fix-the-PTE-not-cleared-on-BO-unmapping-b.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1554-drm-amdkfd-Fix-the-PTE-not-cleared-on-BO-unmapping-b.patch
new file mode 100644
index 00000000..dcb738b0
--- /dev/null
+++ b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1554-drm-amdkfd-Fix-the-PTE-not-cleared-on-BO-unmapping-b.patch
@@ -0,0 +1,362 @@
+From 090fd495113df59a3421e4e778b776ff8b6cd75f Mon Sep 17 00:00:00 2001
+From: Yong Zhao <yong.zhao@amd.com>
+Date: Mon, 31 Oct 2016 17:18:22 -0400
+Subject: [PATCH 1554/4131] drm/amdkfd: Fix the PTE not cleared on BO unmapping
+ bug
+
+This bug hides very well because the BO mapping and unmapping are
+working in charm to not erase the PTE and not release the vm mapping
+until the BO gets freed. This is also the case for memory eviction.
+
+We fix the bug by correctly erase the PTE and release the vm mapping
+on unmapping and rewrite the PTE and recreate vm mapping on mapping.
+
+Fix BUG: SWDEV-89590 SWDEV-106528
+
+Change-Id: If7312e8912aedc9365f359ec99719bf76d493665
+Signed-off-by: Yong Zhao <yong.zhao@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 7 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 174 +++++++++++------------
+ 2 files changed, 87 insertions(+), 94 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+index dcf2c5a..62ef856 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+@@ -41,6 +41,7 @@ struct kfd_bo_va_list {
+ void *kgd_dev;
+ bool is_mapped;
+ bool map_fail;
++ uint64_t va;
+ };
+
+ struct kgd_mem {
+@@ -55,9 +56,11 @@ struct kgd_mem {
+ unsigned int evicted; /* eviction counter */
+ struct delayed_work work; /* for restore evicted mem */
+ struct mm_struct *mm; /* for restore */
++
++ uint32_t pte_flags;
++
++
+ /* flags bitfield */
+- bool readonly : 1;
+- bool execute : 1;
+ bool no_substitute : 1;
+ bool aql_queue : 1;
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index ff3be96..606996e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -163,12 +163,9 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+ {
+ int ret;
+ struct kfd_bo_va_list *bo_va_entry;
+- uint32_t flags;
+ struct amdgpu_bo *bo = mem->bo;
+ uint64_t va = mem->va;
+ struct list_head *list_bo_va = &mem->bo_va_list;
+- bool readonly = mem->readonly;
+- bool execute = mem->execute;
+
+ if (is_aql)
+ va += bo->tbo.mem.size;
+@@ -191,23 +188,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+ goto err_vmadd;
+ }
+
+- flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE;
+- if (readonly)
+- flags = AMDGPU_PTE_READABLE;
+- if (execute)
+- flags |= AMDGPU_PTE_EXECUTABLE;
+-
+- /* Set virtual address for the allocation, allocate PTs,
+- * if needed, and zero them */
+- ret = amdgpu_vm_bo_map(adev, bo_va_entry->bo_va,
+- va, 0, amdgpu_bo_size(bo),
+- flags | AMDGPU_PTE_VALID);
+- if (ret != 0) {
+- pr_err("amdkfd: Failed to set virtual address for BO. ret == %d (0x%llx)\n",
+- ret, va);
+- goto err_vmsetaddr;
+- }
+-
++ bo_va_entry->va = va;
+ bo_va_entry->kgd_dev = (void *)adev;
+ list_add(&bo_va_entry->bo_list, list_bo_va);
+
+@@ -216,12 +197,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+
+ return 0;
+
+-err_vmsetaddr:
+- amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
+- /* This will put the bo_va_mapping on the vm->freed
+- * list. amdgpu_vm_clear_freed needs the PTs to be reserved so
+- * we don't call it here. That can wait until the next time
+- * the page tables are updated for a map or unmap. */
+ err_vmadd:
+ kfree(bo_va_entry);
+ return ret;
+@@ -409,6 +384,7 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
+ uint64_t user_addr = 0;
+ int byte_align;
+ u32 alloc_domain;
++ uint32_t pte_flags;
+ struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+
+ BUG_ON(kgd == NULL);
+@@ -438,11 +414,18 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
+ }
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ mutex_init(&(*mem)->lock);
+- (*mem)->readonly = readonly;
+- (*mem)->execute = execute;
++
+ (*mem)->no_substitute = no_sub;
+ (*mem)->aql_queue = aql_queue;
+
++ pte_flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_VALID;
++ if (!readonly)
++ pte_flags |= AMDGPU_PTE_WRITEABLE;
++ if (execute)
++ pte_flags |= AMDGPU_PTE_EXECUTABLE;
++
++ (*mem)->pte_flags = pte_flags;
++
+ alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain;
+ pr_debug("amdkfd: allocating BO on domain %d with size %llu\n",
+ alloc_domain, size);
+@@ -811,46 +794,56 @@ static int update_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+ return 0;
+ }
+
+-static int map_bo_to_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo,
+- struct amdgpu_bo_va *bo_va,
++static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
++ struct amdgpu_bo *bo,
++ struct kfd_bo_va_list *entry,
++ struct amdgpu_sync *sync)
++{
++ struct amdgpu_bo_va *bo_va = entry->bo_va;
++ struct amdgpu_vm *vm = bo_va->vm;
++
++ amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
++
++ amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
++
++ amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
++
++ amdgpu_amdkfd_bo_invalidate(bo);
++
++ return 0;
++}
++
++static int update_gpuvm_pte(struct amdgpu_device *adev, struct amdgpu_bo *bo,
++ struct kfd_bo_va_list *entry,
+ struct amdgpu_sync *sync)
+ {
+- struct amdgpu_vm *vm;
+ int ret;
++ struct amdgpu_vm *vm;
++ struct amdgpu_bo_va *bo_va;
+
++ bo_va = entry->bo_va;
+ vm = bo_va->vm;
+ /* Validate PT / PTs */
+ ret = validate_pt_pd_bos(vm);
+ if (ret != 0) {
+- pr_err("amdkfd: Failed to validate PTs\n");
+- goto err_unpin_bo;
++ pr_err("amdkfd: Failed to validate_pt_pd_bos\n");
++ return ret;
+ }
+
+ /* Update the page directory */
+ ret = amdgpu_vm_update_page_directory(adev, vm);
+ if (ret != 0) {
+- pr_err("amdkfd: Failed to radeon_vm_update_page_directory\n");
+- goto err_unpin_bo;
++ pr_err("amdkfd: Failed to amdgpu_vm_update_page_directory\n");
++ return ret;
+ }
+
+ amdgpu_sync_fence(adev, sync, vm->page_directory_fence);
+
+- /*
+- * The previously "released" BOs are really released and their VAs are
+- * removed from PT. This function is called here because it requires
+- * the radeon_vm::mutex to be locked and PT to be reserved
+- */
+- ret = amdgpu_vm_clear_freed(adev, vm, NULL);
+- if (ret != 0) {
+- pr_err("amdkfd: Failed to radeon_vm_clear_freed\n");
+- goto err_unpin_bo;
+- }
+-
+ /* Update the page tables */
+ ret = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem);
+ if (ret != 0) {
+- pr_err("amdkfd: Failed to radeon_vm_bo_update\n");
+- goto err_unpin_bo;
++ pr_err("amdkfd: Failed to amdgpu_vm_bo_update\n");
++ return ret;
+ }
+
+ amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
+@@ -859,9 +852,38 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo,
+ amdgpu_vm_move_pt_bos_in_lru(adev, vm);
+
+ return 0;
++}
++
++static int map_bo_to_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo,
++ struct kfd_bo_va_list *entry, uint32_t pte_flags,
++ struct amdgpu_sync *sync)
++{
++ int ret;
++ struct amdgpu_bo_va *bo_va;
++
++ bo_va = entry->bo_va;
++ /* Set virtual address for the allocation, allocate PTs,
++ * if needed, and zero them.
++ */
++ ret = amdgpu_vm_bo_map(adev, bo_va,
++ entry->va, 0, amdgpu_bo_size(bo),
++ pte_flags);
++ if (ret != 0) {
++ pr_err("amdkfd: Failed to map bo in vm. ret == %d (0x%llx)\n",
++ ret, entry->va);
++ return ret;
++ }
++
++ ret = update_gpuvm_pte(adev, bo, entry, sync);
++ if (ret != 0) {
++ pr_err("amdkfd: update_gpuvm_pte() failed\n");
++ goto update_gpuvm_pte_failed;
++ }
++
++ return 0;
+
+-err_unpin_bo:
+- amdgpu_amdkfd_bo_invalidate(bo);
++update_gpuvm_pte_failed:
++ unmap_bo_from_gpuvm(adev, bo, entry, sync);
+ return ret;
+ }
+
+@@ -1130,8 +1152,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ pr_debug("amdkfd: Trying to map VA 0x%llx to vm %p\n",
+ mem->va, vm);
+
+- ret = map_bo_to_gpuvm(adev, bo, entry->bo_va,
+- &ctx.sync);
++ ret = map_bo_to_gpuvm(adev, bo, entry, mem->pte_flags,
++ &ctx.sync);
+ if (ret != 0) {
+ pr_err("amdkfd: Failed to map radeon bo to gpuvm\n");
+ goto map_bo_to_gpuvm_failed;
+@@ -1237,15 +1259,6 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+ new_vm->master->n_vms++;
+ *vm = (void *) new_vm;
+
+- /*
+- * The previously "released" BOs are really released and their VAs are
+- * removed from PT. This function is called here because it requires
+- * the radeon_vm::mutex to be locked and PT to be reserved
+- */
+- ret = amdgpu_vm_clear_freed(adev, &new_vm->base, NULL);
+- if (ret != 0)
+- pr_err("amdgpu: Failed to amdgpu_vm_clear_freed\n");
+-
+ pr_debug("amdgpu: created process vm with address 0x%llx\n",
+ get_vm_pd_gpu_offset(&new_vm->base));
+
+@@ -1305,29 +1318,6 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ return 0;
+ }
+
+-static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
+- struct amdgpu_bo *bo,
+- struct amdgpu_bo_va *bo_va,
+- struct amdgpu_sync *sync)
+-{
+- struct amdgpu_vm *vm = bo_va->vm;
+-
+- /*
+- * The previously "released" BOs are really released and their VAs are
+- * removed from PT. This function is called here because it requires
+- * the radeon_vm::mutex to be locked and PT to be reserved
+- */
+- amdgpu_vm_clear_freed(adev, vm, NULL);
+-
+- /* Update the page tables - Remove the mapping from bo_va */
+- amdgpu_vm_bo_update(adev, bo_va, NULL);
+- amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
+-
+- amdgpu_amdkfd_bo_invalidate(bo);
+-
+- return 0;
+-}
+-
+ static bool is_mem_on_local_device(struct kgd_dev *kgd,
+ struct list_head *bo_va_list, void *vm)
+ {
+@@ -1396,7 +1386,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+ mem->bo->tbo.mem.size);
+
+ ret = unmap_bo_from_gpuvm(adev, mem->bo,
+- entry->bo_va, &ctx.sync);
++ entry, &ctx.sync);
+ if (ret == 0) {
+ entry->is_mapped = false;
+ } else {
+@@ -1686,8 +1676,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, int dma_buf_fd,
+
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ mutex_init(&(*mem)->lock);
+- (*mem)->readonly = false;
+- (*mem)->execute = true; /* executable by default */
++ (*mem)->pte_flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_VALID
++ | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE;
+
+ (*mem)->bo = amdgpu_bo_ref(bo);
+ (*mem)->va = va;
+@@ -1750,7 +1740,7 @@ int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm)
+ adev = (struct amdgpu_device *)entry->kgd_dev;
+
+ r = unmap_bo_from_gpuvm(adev, mem->bo,
+- entry->bo_va, &ctx.sync);
++ entry, &ctx.sync);
+ if (r != 0) {
+ pr_err("failed unmap va 0x%llx\n",
+ mem->va);
+@@ -1849,8 +1839,8 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm)
+ continue;
+ }
+
+- r = map_bo_to_gpuvm(adev, mem->bo, entry->bo_va,
+- &ctx.sync);
++ r = map_bo_to_gpuvm(adev, mem->bo, entry, mem->pte_flags,
++ &ctx.sync);
+ if (unlikely(r != 0)) {
+ pr_err("Failed to map BO to gpuvm\n");
+ entry->map_fail = true;
+@@ -2026,9 +2016,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
+
+ list_for_each_entry(bo_va_entry, &mem->bo_va_list,
+ bo_list) {
+- ret = map_bo_to_gpuvm((struct amdgpu_device *)
++ ret = update_gpuvm_pte((struct amdgpu_device *)
+ bo_va_entry->kgd_dev,
+- bo, bo_va_entry->bo_va,
++ bo, bo_va_entry,
+ &ctx.sync);
+ if (ret) {
+ pr_debug("Memory eviction: Map failed. Try again\n");
+--
+2.7.4
+