aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch370
1 files changed, 370 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch
new file mode 100644
index 00000000..44040fbc
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1727-drm-amd-Implement-parallel-memory-mapping-on-mGPUs.patch
@@ -0,0 +1,370 @@
+From 909b82ea5625d797f9bde9be6378ba3ee8a55ec5 Mon Sep 17 00:00:00 2001
+From: Lan Xiao <Lan.Xiao@amd.com>
+Date: Fri, 23 Jun 2017 16:06:48 -0400
+Subject: [PATCH 1727/4131] drm/amd: Implement parallel memory mapping on mGPUs
+
+Alter the KFD-KGD interface to optimize multi-GPU memory mappings to
+work concurrently instead of sequentially. Return the fences
+during the process, wait for all fences after the mappings are done.
+The fences are stored in the associated kgd_mem object.
+
+This change also enables interruptible waiting with proper signal
+handling
+
+Change-Id: I9ae7f4bd54165b14dd5b37df5df6516aa80cba83
+Signed-off-by: Lan Xiao <Lan.Xiao@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 74 +++++++++++++++++------
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++
+ drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 +
+ 8 files changed, 78 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+index ba1e24c9..924e28a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+@@ -64,6 +64,8 @@ struct kgd_mem {
+ struct amdkfd_process_info *process_info;
+ struct page **user_pages;
+
++ struct amdgpu_sync sync;
++
+
+ /* flags bitfield */
+ bool coherent : 1;
+@@ -190,6 +192,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
+ })
+
+ /* GPUVM API */
++int amdgpu_amdkfd_gpuvm_sync_memory(
++ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
+ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+ struct kgd_dev *kgd, uint64_t va, uint64_t size,
+ void *vm, struct kgd_mem **mem,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+index 4549dc0..0b2595e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
+ .invalidate_tlbs = invalidate_tlbs,
++ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+index 76e3d5d..08da99f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+@@ -189,6 +189,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
+ get_atc_vmid_pasid_mapping_valid,
+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
+ .invalidate_tlbs = invalidate_tlbs,
++ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+index d10d213..42e0094 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+@@ -234,6 +234,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
+ get_atc_vmid_pasid_mapping_valid,
+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
+ .invalidate_tlbs = invalidate_tlbs,
++ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index 8384dfb..475e7fb 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -655,6 +655,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
+
+ alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain;
+
++ amdgpu_sync_create(&(*mem)->sync);
++
+ ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+ if (ret) {
+ pr_err("Insufficient system memory\n");
+@@ -730,7 +732,7 @@ struct bo_vm_reservation_context {
+ struct amdgpu_bo_list_entry *vm_pd;
+ struct ww_acquire_ctx ticket;
+ struct list_head list, duplicates;
+- struct amdgpu_sync sync;
++ struct amdgpu_sync *sync;
+ bool reserved;
+ };
+
+@@ -751,7 +753,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
+
+ ctx->reserved = false;
+ ctx->n_vms = 1;
+- amdgpu_sync_create(&ctx->sync);
++ ctx->sync = &mem->sync;
+
+ INIT_LIST_HEAD(&ctx->list);
+ INIT_LIST_HEAD(&ctx->duplicates);
+@@ -812,7 +814,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+ ctx->reserved = false;
+ ctx->n_vms = 0;
+ ctx->vm_pd = NULL;
+- amdgpu_sync_create(&ctx->sync);
++ ctx->sync = &mem->sync;
+
+ INIT_LIST_HEAD(&ctx->list);
+ INIT_LIST_HEAD(&ctx->duplicates);
+@@ -867,19 +869,27 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+ return ret;
+ }
+
+-static void unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
+- bool wait)
++static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
++ bool wait, bool intr)
+ {
+- if (wait) /* FIXME: when called from user context, this needs to be interruptible */
+- amdgpu_sync_wait(&ctx->sync, false);
++ int ret = 0;
++
++ if (wait) {
++ ret = amdgpu_sync_wait(ctx->sync, intr);
++ if (ret)
++ return ret;
++ }
+
+ if (ctx->reserved)
+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
+ kfree(ctx->vm_pd);
+
+- amdgpu_sync_free(&ctx->sync);
++ ctx->sync = NULL;
++
+ ctx->reserved = false;
+ ctx->vm_pd = NULL;
++
++ return ret;
+ }
+
+ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
+@@ -1051,6 +1061,25 @@ static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
+ return sg;
+ }
+
++int amdgpu_amdkfd_gpuvm_sync_memory(
++ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
++{
++ int ret = 0;
++ struct amdgpu_sync sync;
++ struct amdgpu_device *adev;
++
++ adev = get_amdgpu_device(kgd);
++ amdgpu_sync_create(&sync);
++
++ mutex_lock(&mem->lock);
++ amdgpu_sync_clone(adev, &mem->sync, &sync);
++ mutex_unlock(&mem->lock);
++
++ ret = amdgpu_sync_wait(&sync, intr);
++ amdgpu_sync_free(&sync);
++ return ret;
++}
++
+ #define BOOL_TO_STR(b) (b == true) ? "true" : "false"
+
+ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+@@ -1137,7 +1166,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+ struct amdgpu_device *adev;
+ struct kfd_bo_va_list *entry, *tmp;
+ struct bo_vm_reservation_context ctx;
+- int ret;
++ int ret = 0;
+ struct ttm_validate_buffer *bo_list_entry;
+ struct amdkfd_process_info *process_info;
+ unsigned long bo_size;
+@@ -1199,7 +1228,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+ entry, bo_size);
+ }
+
+- unreserve_bo_and_vms(&ctx, false);
++ ret = unreserve_bo_and_vms(&ctx, false, true);
++
++ /* Free the sync object */
++ amdgpu_sync_free(&mem->sync);
+
+ /* If the SG is not NULL, it's one we created for a doorbell
+ * BO. We need to free it.
+@@ -1213,7 +1245,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+ amdgpu_bo_unref(&mem->bo);
+ kfree(mem);
+
+- return 0;
++ return ret;
+ }
+
+ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+@@ -1308,7 +1340,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ entry->va, entry->va + bo_size,
+ entry);
+
+- ret = map_bo_to_gpuvm(adev, entry, &ctx.sync,
++ ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+ is_invalid_userptr);
+ if (ret != 0) {
+ pr_err("Failed to map radeon bo to gpuvm\n");
+@@ -1325,7 +1357,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ amdgpu_bo_fence(bo,
+ &kfd_vm->process_info->eviction_fence->base,
+ true);
+- unreserve_bo_and_vms(&ctx, true);
++ ret = unreserve_bo_and_vms(&ctx, false, true);
+
+ mutex_unlock(&mem->process_info->lock);
+ mutex_unlock(&mem->lock);
+@@ -1338,7 +1370,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ if (bo_va_entry)
+ remove_bo_from_vm(adev, bo_va_entry, bo_size);
+ add_bo_to_vm_failed:
+- unreserve_bo_and_vms(&ctx, false);
++ unreserve_bo_and_vms(&ctx, false, false);
+ bo_reserve_failed:
+ mutex_unlock(&mem->process_info->lock);
+ mutex_unlock(&mem->lock);
+@@ -1569,7 +1601,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+ entry->va + bo_size,
+ entry);
+
+- ret = unmap_bo_from_gpuvm(adev, entry, &ctx.sync);
++ ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
+ if (ret == 0) {
+ entry->is_mapped = false;
+ } else {
+@@ -1600,7 +1632,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+ }
+
+ unreserve_out:
+- unreserve_bo_and_vms(&ctx, false);
++ unreserve_bo_and_vms(&ctx, false, false);
+ out:
+ mutex_unlock(&mem->lock);
+ return ret;
+@@ -2235,6 +2267,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
+ struct amdgpu_amdkfd_fence *old_fence;
+ int ret = 0, i;
+ struct list_head duplicate_save;
++ struct amdgpu_sync sync_obj;
+
+ INIT_LIST_HEAD(&duplicate_save);
+ INIT_LIST_HEAD(&ctx.list);
+@@ -2287,7 +2320,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
+ if (!list_empty(&duplicate_save))
+ pr_err("BUG: list of BOs to reserve has duplicates!\n");
+
+- amdgpu_sync_create(&ctx.sync);
++ amdgpu_sync_create(&sync_obj);
++ ctx.sync = &sync_obj;
+
+ /* Validate PDs and PTs */
+ ret = process_validate_vms(process_info);
+@@ -2322,7 +2356,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
+ ret = update_gpuvm_pte((struct amdgpu_device *)
+ bo_va_entry->kgd_dev,
+ bo_va_entry,
+- &ctx.sync);
++ ctx.sync);
+ if (ret) {
+ pr_debug("Memory eviction: update PTE failed. Try again\n");
+ goto validate_map_fail;
+@@ -2330,7 +2364,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
+ }
+ }
+
+- amdgpu_sync_wait(&ctx.sync, false);
++ amdgpu_sync_wait(ctx.sync, false);
+
+ /* Wait for validate to finish and attach new eviction fence */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+@@ -2350,7 +2384,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
+ }
+ validate_map_fail:
+ ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
+- amdgpu_sync_free(&ctx.sync);
++ amdgpu_sync_free(&sync_obj);
+ ttm_reserve_fail:
+ mutex_unlock(&process_info->lock);
+ evict_fence_fail:
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 64a4373..dbc3afd 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -1398,6 +1398,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+ pr_err("Failed to map\n");
+ }
+
++ err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
++ if (err) {
++ pr_debug("Sync memory failed, wait interrupted by user signal\n");
++ goto sync_memory_failed;
++ }
++
+ if (args->device_ids_array_size > 0 && devices_arr)
+ kfree(devices_arr);
+
+@@ -1407,6 +1413,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+ up_write(&p->lock);
+ get_mem_obj_from_handle_failed:
+ copy_from_user_failed:
++sync_memory_failed:
+ kfree(devices_arr);
+ return err;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index d1ef118..f5e2282 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -136,6 +136,13 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
+ if (err)
+ goto err_map_mem;
+
++ err = kdev->kfd2kgd->sync_memory(kdev->kgd, (struct kgd_mem *) mem,
++ true);
++ if (err) {
++ pr_debug("Sync memory failed, wait interrupted by user signal\n");
++ goto sync_memory_failed;
++ }
++
+ /* Create an obj handle so kfd_process_device_remove_obj_handle
+ * will take care of the bo removal when the process finishes.
+ * We do not need to take p->lock, because the process is just
+@@ -151,6 +158,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
+ return err;
+
+ free_gpuvm:
++sync_memory_failed:
+ kfd_process_free_gpuvm(mem, pdd);
+ return err;
+
+diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+index c0c1cc7..1364429 100644
+--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+@@ -337,6 +337,8 @@ struct kfd2kgd_calls {
+
+ int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
+
++ int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
++
+ int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va,
+ uint64_t size, void *vm,
+ struct kgd_mem **mem, uint64_t *offset,
+--
+2.7.4
+