diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch new file mode 100644 index 00000000..44040fbc --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch @@ -0,0 +1,370 @@ +From 909b82ea5625d797f9bde9be6378ba3ee8a55ec5 Mon Sep 17 00:00:00 2001 +From: Lan Xiao <Lan.Xiao@amd.com> +Date: Fri, 23 Jun 2017 16:06:48 -0400 +Subject: [PATCH 1727/4131] drm/amd: Implement parallel memory mapping on mGPUs + +Alter the KFD-KGD interface to optimize multi-GPU memory mappings to +work concurrently instead of sequentially. Return the fences +during the process, wait for all fences after the mappings are done. +The fences are stored in the associated kgd_mem object. + +This change also enables interruptible waiting with proper signal +handling + +Change-Id: I9ae7f4bd54165b14dd5b37df5df6516aa80cba83 +Signed-off-by: Lan Xiao <Lan.Xiao@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++ + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 74 +++++++++++++++++------ + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++ + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++ + drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 + + 8 files changed, 78 insertions(+), 20 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index ba1e24c9..924e28a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -64,6 +64,8 @@ struct kgd_mem { + struct amdkfd_process_info *process_info; + struct page **user_pages; + ++ struct amdgpu_sync sync; ++ + + /* flags bitfield */ + bool coherent : 1; +@@ -190,6 +192,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + }) + + /* GPUVM API */ ++int amdgpu_amdkfd_gpuvm_sync_memory( ++ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index 4549dc0..0b2595e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = { + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .write_vmid_invalidate_request = write_vmid_invalidate_request, + .invalidate_tlbs = invalidate_tlbs, ++ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index 76e3d5d..08da99f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -189,6 +189,7 @@ static const struct kfd2kgd_calls kfd2kgd = { + get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, + .invalidate_tlbs = invalidate_tlbs, ++ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +index d10d213..42e0094 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +@@ -234,6 +234,7 @@ static const struct kfd2kgd_calls kfd2kgd = { + get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, + .invalidate_tlbs = invalidate_tlbs, ++ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index 8384dfb..475e7fb 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -655,6 +655,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, + + alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain; + ++ amdgpu_sync_create(&(*mem)->sync); ++ + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + if (ret) { + pr_err("Insufficient system memory\n"); +@@ -730,7 +732,7 @@ struct bo_vm_reservation_context { + struct amdgpu_bo_list_entry *vm_pd; + struct ww_acquire_ctx ticket; + struct list_head list, duplicates; +- struct amdgpu_sync sync; ++ struct amdgpu_sync *sync; + bool reserved; + }; + +@@ -751,7 +753,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, + + ctx->reserved = false; + ctx->n_vms = 1; +- amdgpu_sync_create(&ctx->sync); ++ ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); +@@ -812,7 +814,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + ctx->reserved = false; + ctx->n_vms = 0; + ctx->vm_pd = NULL; +- amdgpu_sync_create(&ctx->sync); ++ ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); +@@ -867,19 +869,27 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + return ret; + } + +-static void unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, +- bool wait) ++static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, ++ bool wait, bool intr) + { +- if (wait) /* FIXME: when called from user context, this needs to be interruptible */ +- amdgpu_sync_wait(&ctx->sync, false); ++ int ret = 0; ++ ++ if (wait) { ++ ret = amdgpu_sync_wait(ctx->sync, intr); ++ if (ret) ++ return ret; ++ } + + if (ctx->reserved) + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); + kfree(ctx->vm_pd); + +- amdgpu_sync_free(&ctx->sync); ++ ctx->sync = NULL; ++ + ctx->reserved = false; + ctx->vm_pd = NULL; ++ ++ return ret; + } + + static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, +@@ -1051,6 +1061,25 @@ static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) + return sg; + } + ++int amdgpu_amdkfd_gpuvm_sync_memory( ++ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) ++{ ++ int ret = 0; ++ struct amdgpu_sync sync; ++ struct amdgpu_device *adev; ++ ++ adev = get_amdgpu_device(kgd); ++ amdgpu_sync_create(&sync); ++ ++ mutex_lock(&mem->lock); ++ amdgpu_sync_clone(adev, &mem->sync, &sync); ++ mutex_unlock(&mem->lock); ++ ++ ret = amdgpu_sync_wait(&sync, intr); ++ amdgpu_sync_free(&sync); ++ return ret; ++} ++ + #define BOOL_TO_STR(b) (b == true) ? "true" : "false" + + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( +@@ -1137,7 +1166,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct amdgpu_device *adev; + struct kfd_bo_va_list *entry, *tmp; + struct bo_vm_reservation_context ctx; +- int ret; ++ int ret = 0; + struct ttm_validate_buffer *bo_list_entry; + struct amdkfd_process_info *process_info; + unsigned long bo_size; +@@ -1199,7 +1228,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + entry, bo_size); + } + +- unreserve_bo_and_vms(&ctx, false); ++ ret = unreserve_bo_and_vms(&ctx, false, true); ++ ++ /* Free the sync object */ ++ amdgpu_sync_free(&mem->sync); + + /* If the SG is not NULL, it's one we created for a doorbell + * BO. We need to free it. +@@ -1213,7 +1245,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + amdgpu_bo_unref(&mem->bo); + kfree(mem); + +- return 0; ++ return ret; + } + + int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( +@@ -1308,7 +1340,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + entry->va, entry->va + bo_size, + entry); + +- ret = map_bo_to_gpuvm(adev, entry, &ctx.sync, ++ ret = map_bo_to_gpuvm(adev, entry, ctx.sync, + is_invalid_userptr); + if (ret != 0) { + pr_err("Failed to map radeon bo to gpuvm\n"); +@@ -1325,7 +1357,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + amdgpu_bo_fence(bo, + &kfd_vm->process_info->eviction_fence->base, + true); +- unreserve_bo_and_vms(&ctx, true); ++ ret = unreserve_bo_and_vms(&ctx, false, true); + + mutex_unlock(&mem->process_info->lock); + mutex_unlock(&mem->lock); +@@ -1338,7 +1370,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + if (bo_va_entry) + remove_bo_from_vm(adev, bo_va_entry, bo_size); + add_bo_to_vm_failed: +- unreserve_bo_and_vms(&ctx, false); ++ unreserve_bo_and_vms(&ctx, false, false); + bo_reserve_failed: + mutex_unlock(&mem->process_info->lock); + mutex_unlock(&mem->lock); +@@ -1569,7 +1601,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + entry->va + bo_size, + entry); + +- ret = unmap_bo_from_gpuvm(adev, entry, &ctx.sync); ++ ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); + if (ret == 0) { + entry->is_mapped = false; + } else { +@@ -1600,7 +1632,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + } + + unreserve_out: +- unreserve_bo_and_vms(&ctx, false); ++ unreserve_bo_and_vms(&ctx, false, false); + out: + mutex_unlock(&mem->lock); + return ret; +@@ -2235,6 +2267,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) + struct amdgpu_amdkfd_fence *old_fence; + int ret = 0, i; + struct list_head duplicate_save; ++ struct amdgpu_sync sync_obj; + + INIT_LIST_HEAD(&duplicate_save); + INIT_LIST_HEAD(&ctx.list); +@@ -2287,7 +2320,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) + if (!list_empty(&duplicate_save)) + pr_err("BUG: list of BOs to reserve has duplicates!\n"); + +- amdgpu_sync_create(&ctx.sync); ++ amdgpu_sync_create(&sync_obj); ++ ctx.sync = &sync_obj; + + /* Validate PDs and PTs */ + ret = process_validate_vms(process_info); +@@ -2322,7 +2356,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, +- &ctx.sync); ++ ctx.sync); + if (ret) { + pr_debug("Memory eviction: update PTE failed. Try again\n"); + goto validate_map_fail; +@@ -2330,7 +2364,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) + } + } + +- amdgpu_sync_wait(&ctx.sync, false); ++ amdgpu_sync_wait(ctx.sync, false); + + /* Wait for validate to finish and attach new eviction fence */ + list_for_each_entry(mem, &process_info->kfd_bo_list, +@@ -2350,7 +2384,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) + } + validate_map_fail: + ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); +- amdgpu_sync_free(&ctx.sync); ++ amdgpu_sync_free(&sync_obj); + ttm_reserve_fail: + mutex_unlock(&process_info->lock); + evict_fence_fail: +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 64a4373..dbc3afd 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -1398,6 +1398,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + pr_err("Failed to map\n"); + } + ++ err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); ++ if (err) { ++ pr_debug("Sync memory failed, wait interrupted by user signal\n"); ++ goto sync_memory_failed; ++ } ++ + if (args->device_ids_array_size > 0 && devices_arr) + kfree(devices_arr); + +@@ -1407,6 +1413,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + up_write(&p->lock); + get_mem_obj_from_handle_failed: + copy_from_user_failed: ++sync_memory_failed: + kfree(devices_arr); + return err; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index d1ef118..f5e2282 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -136,6 +136,13 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, + if (err) + goto err_map_mem; + ++ err = kdev->kfd2kgd->sync_memory(kdev->kgd, (struct kgd_mem *) mem, ++ true); ++ if (err) { ++ pr_debug("Sync memory failed, wait interrupted by user signal\n"); ++ goto sync_memory_failed; ++ } ++ + /* Create an obj handle so kfd_process_device_remove_obj_handle + * will take care of the bo removal when the process finishes. + * We do not need to take p->lock, because the process is just +@@ -151,6 +158,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, + return err; + + free_gpuvm: ++sync_memory_failed: + kfd_process_free_gpuvm(mem, pdd); + return err; + +diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +index c0c1cc7..1364429 100644 +--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h ++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +@@ -337,6 +337,8 @@ struct kfd2kgd_calls { + + int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); + ++ int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); ++ + int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va, + uint64_t size, void *vm, + struct kgd_mem **mem, uint64_t *offset, +-- +2.7.4 + |