diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch | 370 |
1 files changed, 0 insertions, 370 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch deleted file mode 100644 index 44040fbc..00000000 --- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch +++ /dev/null @@ -1,370 +0,0 @@ -From 909b82ea5625d797f9bde9be6378ba3ee8a55ec5 Mon Sep 17 00:00:00 2001 -From: Lan Xiao <Lan.Xiao@amd.com> -Date: Fri, 23 Jun 2017 16:06:48 -0400 -Subject: [PATCH 1727/4131] drm/amd: Implement parallel memory mapping on mGPUs - -Alter the KFD-KGD interface to optimize multi-GPU memory mappings to -work concurrently instead of sequentially. Return the fences -during the process, wait for all fences after the mappings are done. -The fences are stored in the associated kgd_mem object. - -This change also enables interruptible waiting with proper signal -handling - -Change-Id: I9ae7f4bd54165b14dd5b37df5df6516aa80cba83 -Signed-off-by: Lan Xiao <Lan.Xiao@amd.com> ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++ - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1 + - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 74 +++++++++++++++++------ - drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++ - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++ - drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 + - 8 files changed, 78 insertions(+), 20 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -index ba1e24c9..924e28a 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -@@ -64,6 +64,8 @@ struct kgd_mem { - struct amdkfd_process_info *process_info; - struct page **user_pages; - -+ struct amdgpu_sync sync; -+ - - /* flags bitfield */ - bool coherent : 1; -@@ -190,6 +192,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, - }) - - /* GPUVM API */ -+int amdgpu_amdkfd_gpuvm_sync_memory( -+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); - int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - struct kgd_dev *kgd, uint64_t va, uint64_t size, - void *vm, struct kgd_mem **mem, -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c -index 4549dc0..0b2595e 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c -@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = { - .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, - .write_vmid_invalidate_request = write_vmid_invalidate_request, - .invalidate_tlbs = invalidate_tlbs, -+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c -index 76e3d5d..08da99f 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c -@@ -189,6 +189,7 @@ static const struct kfd2kgd_calls kfd2kgd = { - get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, - .invalidate_tlbs = invalidate_tlbs, -+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c -index d10d213..42e0094 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c -@@ -234,6 +234,7 @@ static const struct kfd2kgd_calls kfd2kgd = { - get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, - .invalidate_tlbs = invalidate_tlbs, -+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -index 8384dfb..475e7fb 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -@@ -655,6 +655,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, - - alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain; - -+ amdgpu_sync_create(&(*mem)->sync); -+ - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); - if (ret) { - pr_err("Insufficient system memory\n"); -@@ -730,7 +732,7 @@ struct bo_vm_reservation_context { - struct amdgpu_bo_list_entry *vm_pd; - struct ww_acquire_ctx ticket; - struct list_head list, duplicates; -- struct amdgpu_sync sync; -+ struct amdgpu_sync *sync; - bool reserved; - }; - -@@ -751,7 +753,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, - - ctx->reserved = false; - ctx->n_vms = 1; -- amdgpu_sync_create(&ctx->sync); -+ ctx->sync = &mem->sync; - - INIT_LIST_HEAD(&ctx->list); - INIT_LIST_HEAD(&ctx->duplicates); -@@ -812,7 +814,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, - ctx->reserved = false; - ctx->n_vms = 0; - ctx->vm_pd = NULL; -- amdgpu_sync_create(&ctx->sync); -+ ctx->sync = &mem->sync; - - INIT_LIST_HEAD(&ctx->list); - INIT_LIST_HEAD(&ctx->duplicates); -@@ -867,19 +869,27 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, - return ret; - } - --static void unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, -- bool wait) -+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, -+ bool wait, bool intr) - { -- if (wait) /* FIXME: when called from user context, this needs to be interruptible */ -- amdgpu_sync_wait(&ctx->sync, false); -+ int ret = 0; -+ -+ if (wait) { -+ ret = amdgpu_sync_wait(ctx->sync, intr); -+ if (ret) -+ return ret; -+ } - - if (ctx->reserved) - ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); - kfree(ctx->vm_pd); - -- amdgpu_sync_free(&ctx->sync); -+ ctx->sync = NULL; -+ - ctx->reserved = false; - ctx->vm_pd = NULL; -+ -+ return ret; - } - - static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, -@@ -1051,6 +1061,25 @@ static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) - return sg; - } - -+int amdgpu_amdkfd_gpuvm_sync_memory( -+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) -+{ -+ int ret = 0; -+ struct amdgpu_sync sync; -+ struct amdgpu_device *adev; -+ -+ adev = get_amdgpu_device(kgd); -+ amdgpu_sync_create(&sync); -+ -+ mutex_lock(&mem->lock); -+ amdgpu_sync_clone(adev, &mem->sync, &sync); -+ mutex_unlock(&mem->lock); -+ -+ ret = amdgpu_sync_wait(&sync, intr); -+ amdgpu_sync_free(&sync); -+ return ret; -+} -+ - #define BOOL_TO_STR(b) (b == true) ? "true" : "false" - - int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( -@@ -1137,7 +1166,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct amdgpu_device *adev; - struct kfd_bo_va_list *entry, *tmp; - struct bo_vm_reservation_context ctx; -- int ret; -+ int ret = 0; - struct ttm_validate_buffer *bo_list_entry; - struct amdkfd_process_info *process_info; - unsigned long bo_size; -@@ -1199,7 +1228,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - entry, bo_size); - } - -- unreserve_bo_and_vms(&ctx, false); -+ ret = unreserve_bo_and_vms(&ctx, false, true); -+ -+ /* Free the sync object */ -+ amdgpu_sync_free(&mem->sync); - - /* If the SG is not NULL, it's one we created for a doorbell - * BO. We need to free it. -@@ -1213,7 +1245,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - amdgpu_bo_unref(&mem->bo); - kfree(mem); - -- return 0; -+ return ret; - } - - int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( -@@ -1308,7 +1340,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - entry->va, entry->va + bo_size, - entry); - -- ret = map_bo_to_gpuvm(adev, entry, &ctx.sync, -+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync, - is_invalid_userptr); - if (ret != 0) { - pr_err("Failed to map radeon bo to gpuvm\n"); -@@ -1325,7 +1357,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - amdgpu_bo_fence(bo, - &kfd_vm->process_info->eviction_fence->base, - true); -- unreserve_bo_and_vms(&ctx, true); -+ ret = unreserve_bo_and_vms(&ctx, false, true); - - mutex_unlock(&mem->process_info->lock); - mutex_unlock(&mem->lock); -@@ -1338,7 +1370,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - if (bo_va_entry) - remove_bo_from_vm(adev, bo_va_entry, bo_size); - add_bo_to_vm_failed: -- unreserve_bo_and_vms(&ctx, false); -+ unreserve_bo_and_vms(&ctx, false, false); - bo_reserve_failed: - mutex_unlock(&mem->process_info->lock); - mutex_unlock(&mem->lock); -@@ -1569,7 +1601,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - entry->va + bo_size, - entry); - -- ret = unmap_bo_from_gpuvm(adev, entry, &ctx.sync); -+ ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); - if (ret == 0) { - entry->is_mapped = false; - } else { -@@ -1600,7 +1632,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - } - - unreserve_out: -- unreserve_bo_and_vms(&ctx, false); -+ unreserve_bo_and_vms(&ctx, false, false); - out: - mutex_unlock(&mem->lock); - return ret; -@@ -2235,6 +2267,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) - struct amdgpu_amdkfd_fence *old_fence; - int ret = 0, i; - struct list_head duplicate_save; -+ struct amdgpu_sync sync_obj; - - INIT_LIST_HEAD(&duplicate_save); - INIT_LIST_HEAD(&ctx.list); -@@ -2287,7 +2320,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) - if (!list_empty(&duplicate_save)) - pr_err("BUG: list of BOs to reserve has duplicates!\n"); - -- amdgpu_sync_create(&ctx.sync); -+ amdgpu_sync_create(&sync_obj); -+ ctx.sync = &sync_obj; - - /* Validate PDs and PTs */ - ret = process_validate_vms(process_info); -@@ -2322,7 +2356,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) - ret = update_gpuvm_pte((struct amdgpu_device *) - bo_va_entry->kgd_dev, - bo_va_entry, -- &ctx.sync); -+ ctx.sync); - if (ret) { - pr_debug("Memory eviction: update PTE failed. Try again\n"); - goto validate_map_fail; -@@ -2330,7 +2364,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) - } - } - -- amdgpu_sync_wait(&ctx.sync, false); -+ amdgpu_sync_wait(ctx.sync, false); - - /* Wait for validate to finish and attach new eviction fence */ - list_for_each_entry(mem, &process_info->kfd_bo_list, -@@ -2350,7 +2384,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info) - } - validate_map_fail: - ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); -- amdgpu_sync_free(&ctx.sync); -+ amdgpu_sync_free(&sync_obj); - ttm_reserve_fail: - mutex_unlock(&process_info->lock); - evict_fence_fail: -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c -index 64a4373..dbc3afd 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c -@@ -1398,6 +1398,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, - pr_err("Failed to map\n"); - } - -+ err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); -+ if (err) { -+ pr_debug("Sync memory failed, wait interrupted by user signal\n"); -+ goto sync_memory_failed; -+ } -+ - if (args->device_ids_array_size > 0 && devices_arr) - kfree(devices_arr); - -@@ -1407,6 +1413,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, - up_write(&p->lock); - get_mem_obj_from_handle_failed: - copy_from_user_failed: -+sync_memory_failed: - kfree(devices_arr); - return err; - } -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c -index d1ef118..f5e2282 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c -@@ -136,6 +136,13 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, - if (err) - goto err_map_mem; - -+ err = kdev->kfd2kgd->sync_memory(kdev->kgd, (struct kgd_mem *) mem, -+ true); -+ if (err) { -+ pr_debug("Sync memory failed, wait interrupted by user signal\n"); -+ goto sync_memory_failed; -+ } -+ - /* Create an obj handle so kfd_process_device_remove_obj_handle - * will take care of the bo removal when the process finishes. - * We do not need to take p->lock, because the process is just -@@ -151,6 +158,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, - return err; - - free_gpuvm: -+sync_memory_failed: - kfd_process_free_gpuvm(mem, pdd); - return err; - -diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h -index c0c1cc7..1364429 100644 ---- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h -+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h -@@ -337,6 +337,8 @@ struct kfd2kgd_calls { - - int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); - -+ int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); -+ - int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va, - uint64_t size, void *vm, - struct kgd_mem **mem, uint64_t *offset, --- -2.7.4 - |