aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch370
1 files changed, 0 insertions, 370 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch
deleted file mode 100644
index 44040fbc..00000000
--- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch
+++ /dev/null
@@ -1,370 +0,0 @@
-From 909b82ea5625d797f9bde9be6378ba3ee8a55ec5 Mon Sep 17 00:00:00 2001
-From: Lan Xiao <Lan.Xiao@amd.com>
-Date: Fri, 23 Jun 2017 16:06:48 -0400
-Subject: [PATCH 1727/4131] drm/amd: Implement parallel memory mapping on mGPUs
-
-Alter the KFD-KGD interface to optimize multi-GPU memory mappings to
-work concurrently instead of sequentially. Return the fences
-during the process, wait for all fences after the mappings are done.
-The fences are stored in the associated kgd_mem object.
-
-This change also enables interruptible waiting with proper signal
-handling
-
-Change-Id: I9ae7f4bd54165b14dd5b37df5df6516aa80cba83
-Signed-off-by: Lan Xiao <Lan.Xiao@amd.com>
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 74 +++++++++++++++++------
- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++
- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++
- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 +
- 8 files changed, 78 insertions(+), 20 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-index ba1e24c9..924e28a 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-@@ -64,6 +64,8 @@ struct kgd_mem {
- struct amdkfd_process_info *process_info;
- struct page **user_pages;
-
-+ struct amdgpu_sync sync;
-+
-
- /* flags bitfield */
- bool coherent : 1;
-@@ -190,6 +192,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
- })
-
- /* GPUVM API */
-+int amdgpu_amdkfd_gpuvm_sync_memory(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
- int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- struct kgd_dev *kgd, uint64_t va, uint64_t size,
- void *vm, struct kgd_mem **mem,
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-index 4549dc0..0b2595e 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
- .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
- .invalidate_tlbs = invalidate_tlbs,
-+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
- .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
- .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
- .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-index 76e3d5d..08da99f 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-@@ -189,6 +189,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
- get_atc_vmid_pasid_mapping_valid,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
- .invalidate_tlbs = invalidate_tlbs,
-+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
- .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
- .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
- .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-index d10d213..42e0094 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-@@ -234,6 +234,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
- get_atc_vmid_pasid_mapping_valid,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
- .invalidate_tlbs = invalidate_tlbs,
-+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
- .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
- .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
- .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-index 8384dfb..475e7fb 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-@@ -655,6 +655,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
-
- alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain;
-
-+ amdgpu_sync_create(&(*mem)->sync);
-+
- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
- if (ret) {
- pr_err("Insufficient system memory\n");
-@@ -730,7 +732,7 @@ struct bo_vm_reservation_context {
- struct amdgpu_bo_list_entry *vm_pd;
- struct ww_acquire_ctx ticket;
- struct list_head list, duplicates;
-- struct amdgpu_sync sync;
-+ struct amdgpu_sync *sync;
- bool reserved;
- };
-
-@@ -751,7 +753,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
-
- ctx->reserved = false;
- ctx->n_vms = 1;
-- amdgpu_sync_create(&ctx->sync);
-+ ctx->sync = &mem->sync;
-
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->duplicates);
-@@ -812,7 +814,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
- ctx->reserved = false;
- ctx->n_vms = 0;
- ctx->vm_pd = NULL;
-- amdgpu_sync_create(&ctx->sync);
-+ ctx->sync = &mem->sync;
-
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->duplicates);
-@@ -867,19 +869,27 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
- return ret;
- }
-
--static void unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
-- bool wait)
-+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
-+ bool wait, bool intr)
- {
-- if (wait) /* FIXME: when called from user context, this needs to be interruptible */
-- amdgpu_sync_wait(&ctx->sync, false);
-+ int ret = 0;
-+
-+ if (wait) {
-+ ret = amdgpu_sync_wait(ctx->sync, intr);
-+ if (ret)
-+ return ret;
-+ }
-
- if (ctx->reserved)
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
- kfree(ctx->vm_pd);
-
-- amdgpu_sync_free(&ctx->sync);
-+ ctx->sync = NULL;
-+
- ctx->reserved = false;
- ctx->vm_pd = NULL;
-+
-+ return ret;
- }
-
- static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
-@@ -1051,6 +1061,25 @@ static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
- return sg;
- }
-
-+int amdgpu_amdkfd_gpuvm_sync_memory(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
-+{
-+ int ret = 0;
-+ struct amdgpu_sync sync;
-+ struct amdgpu_device *adev;
-+
-+ adev = get_amdgpu_device(kgd);
-+ amdgpu_sync_create(&sync);
-+
-+ mutex_lock(&mem->lock);
-+ amdgpu_sync_clone(adev, &mem->sync, &sync);
-+ mutex_unlock(&mem->lock);
-+
-+ ret = amdgpu_sync_wait(&sync, intr);
-+ amdgpu_sync_free(&sync);
-+ return ret;
-+}
-+
- #define BOOL_TO_STR(b) (b == true) ? "true" : "false"
-
- int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
-@@ -1137,7 +1166,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- struct amdgpu_device *adev;
- struct kfd_bo_va_list *entry, *tmp;
- struct bo_vm_reservation_context ctx;
-- int ret;
-+ int ret = 0;
- struct ttm_validate_buffer *bo_list_entry;
- struct amdkfd_process_info *process_info;
- unsigned long bo_size;
-@@ -1199,7 +1228,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- entry, bo_size);
- }
-
-- unreserve_bo_and_vms(&ctx, false);
-+ ret = unreserve_bo_and_vms(&ctx, false, true);
-+
-+ /* Free the sync object */
-+ amdgpu_sync_free(&mem->sync);
-
- /* If the SG is not NULL, it's one we created for a doorbell
- * BO. We need to free it.
-@@ -1213,7 +1245,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- amdgpu_bo_unref(&mem->bo);
- kfree(mem);
-
-- return 0;
-+ return ret;
- }
-
- int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-@@ -1308,7 +1340,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- entry->va, entry->va + bo_size,
- entry);
-
-- ret = map_bo_to_gpuvm(adev, entry, &ctx.sync,
-+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
- is_invalid_userptr);
- if (ret != 0) {
- pr_err("Failed to map radeon bo to gpuvm\n");
-@@ -1325,7 +1357,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- amdgpu_bo_fence(bo,
- &kfd_vm->process_info->eviction_fence->base,
- true);
-- unreserve_bo_and_vms(&ctx, true);
-+ ret = unreserve_bo_and_vms(&ctx, false, true);
-
- mutex_unlock(&mem->process_info->lock);
- mutex_unlock(&mem->lock);
-@@ -1338,7 +1370,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- if (bo_va_entry)
- remove_bo_from_vm(adev, bo_va_entry, bo_size);
- add_bo_to_vm_failed:
-- unreserve_bo_and_vms(&ctx, false);
-+ unreserve_bo_and_vms(&ctx, false, false);
- bo_reserve_failed:
- mutex_unlock(&mem->process_info->lock);
- mutex_unlock(&mem->lock);
-@@ -1569,7 +1601,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- entry->va + bo_size,
- entry);
-
-- ret = unmap_bo_from_gpuvm(adev, entry, &ctx.sync);
-+ ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
- if (ret == 0) {
- entry->is_mapped = false;
- } else {
-@@ -1600,7 +1632,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- }
-
- unreserve_out:
-- unreserve_bo_and_vms(&ctx, false);
-+ unreserve_bo_and_vms(&ctx, false, false);
- out:
- mutex_unlock(&mem->lock);
- return ret;
-@@ -2235,6 +2267,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
- struct amdgpu_amdkfd_fence *old_fence;
- int ret = 0, i;
- struct list_head duplicate_save;
-+ struct amdgpu_sync sync_obj;
-
- INIT_LIST_HEAD(&duplicate_save);
- INIT_LIST_HEAD(&ctx.list);
-@@ -2287,7 +2320,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
- if (!list_empty(&duplicate_save))
- pr_err("BUG: list of BOs to reserve has duplicates!\n");
-
-- amdgpu_sync_create(&ctx.sync);
-+ amdgpu_sync_create(&sync_obj);
-+ ctx.sync = &sync_obj;
-
- /* Validate PDs and PTs */
- ret = process_validate_vms(process_info);
-@@ -2322,7 +2356,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
- ret = update_gpuvm_pte((struct amdgpu_device *)
- bo_va_entry->kgd_dev,
- bo_va_entry,
-- &ctx.sync);
-+ ctx.sync);
- if (ret) {
- pr_debug("Memory eviction: update PTE failed. Try again\n");
- goto validate_map_fail;
-@@ -2330,7 +2364,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
- }
- }
-
-- amdgpu_sync_wait(&ctx.sync, false);
-+ amdgpu_sync_wait(ctx.sync, false);
-
- /* Wait for validate to finish and attach new eviction fence */
- list_for_each_entry(mem, &process_info->kfd_bo_list,
-@@ -2350,7 +2384,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
- }
- validate_map_fail:
- ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
-- amdgpu_sync_free(&ctx.sync);
-+ amdgpu_sync_free(&sync_obj);
- ttm_reserve_fail:
- mutex_unlock(&process_info->lock);
- evict_fence_fail:
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-index 64a4373..dbc3afd 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-@@ -1398,6 +1398,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
- pr_err("Failed to map\n");
- }
-
-+ err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
-+ if (err) {
-+ pr_debug("Sync memory failed, wait interrupted by user signal\n");
-+ goto sync_memory_failed;
-+ }
-+
- if (args->device_ids_array_size > 0 && devices_arr)
- kfree(devices_arr);
-
-@@ -1407,6 +1413,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
- up_write(&p->lock);
- get_mem_obj_from_handle_failed:
- copy_from_user_failed:
-+sync_memory_failed:
- kfree(devices_arr);
- return err;
- }
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-index d1ef118..f5e2282 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-@@ -136,6 +136,13 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
- if (err)
- goto err_map_mem;
-
-+ err = kdev->kfd2kgd->sync_memory(kdev->kgd, (struct kgd_mem *) mem,
-+ true);
-+ if (err) {
-+ pr_debug("Sync memory failed, wait interrupted by user signal\n");
-+ goto sync_memory_failed;
-+ }
-+
- /* Create an obj handle so kfd_process_device_remove_obj_handle
- * will take care of the bo removal when the process finishes.
- * We do not need to take p->lock, because the process is just
-@@ -151,6 +158,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
- return err;
-
- free_gpuvm:
-+sync_memory_failed:
- kfd_process_free_gpuvm(mem, pdd);
- return err;
-
-diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-index c0c1cc7..1364429 100644
---- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-@@ -337,6 +337,8 @@ struct kfd2kgd_calls {
-
- int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
-
-+ int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
-+
- int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va,
- uint64_t size, void *vm,
- struct kgd_mem **mem, uint64_t *offset,
---
-2.7.4
-