From b1254553a45a88b74f646c5acb4319309769e200 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 11 Jul 2017 17:57:48 -0400 Subject: [PATCH 1733/4131] drm/amdkfd: Flush TLBs after syncing with page table update Flushing TLBs must happen after the page table update is completed in memory. The helper function kfd_map_memory_to_gpu cannot be used any more because sync needs to happen between map and flush. Update all callers to implement the correct map, sync, flush sequence and remove helper function. Bug: SWDEV-126375 Change-Id: Iecf5972a87e4bae6ad68d06e0d0589c1a2755927 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 38 ++++++++++++++++---------------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 ++++- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dbc3afd..814ad7f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1301,23 +1301,6 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, return ret; } -int kfd_map_memory_to_gpu(void *mem, struct kfd_process_device *pdd) -{ - int err; - struct kfd_dev *dev = pdd->dev; - - err = dev->kfd2kgd->map_memory_to_gpu( - dev->kgd, (struct kgd_mem *) mem, pdd->vm); - - /* Theoretically we don't need this flush. However, as there are - * some bugs in our PTE handling for mapping and unmapping, we - * need this flush to pass all the tests. - */ - kfd_flush_tlb(dev, pdd->process->pasid); - - return err; -} - static int kfd_ioctl_map_memory_to_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -1388,12 +1371,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, err = -EFAULT; goto get_mem_obj_from_handle_failed; } - err = kfd_map_memory_to_gpu(mem, peer_pdd); + err = peer->kfd2kgd->map_memory_to_gpu( + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); if (err != 0) pr_err("Failed to map\n"); } } else { - err = kfd_map_memory_to_gpu(mem, pdd); + err = dev->kfd2kgd->map_memory_to_gpu( + dev->kgd, (struct kgd_mem *)mem, pdd->vm); if (err != 0) pr_err("Failed to map\n"); } @@ -1404,6 +1389,21 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto sync_memory_failed; } + /* Flush TLBs after waiting for the page table updates to complete */ + if (args->device_ids_array_size > 0) { + for (i = 0; i < num_dev; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(dev, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer, p->pasid); + } + } else { + kfd_flush_tlb(dev, p->pasid); + } + if (args->device_ids_array_size > 0 && devices_arr) kfree(devices_arr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ade86c1..5dd89f9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -807,7 +807,6 @@ void run_rdma_free_callback(struct kfd_bo *buf_obj); struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid); /* kfd dgpu memory */ -int kfd_map_memory_to_gpu(void *mem, struct kfd_process_device *pdd); int kfd_unmap_memory_from_gpu(void *mem, struct kfd_process_device *pdd); /* Process device data iterator */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index f5e2282..5770b1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -132,7 +132,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, if (err) goto err_alloc_mem; - err = kfd_map_memory_to_gpu(mem, pdd); + err = kdev->kfd2kgd->map_memory_to_gpu( + kdev->kgd, (struct kgd_mem *)mem, pdd->vm); if (err) goto err_map_mem; @@ -143,6 +144,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, goto sync_memory_failed; } + kfd_flush_tlb(kdev, p->pasid); + /* Create an obj handle so kfd_process_device_remove_obj_handle * will take care of the bo removal when the process finishes. * We do not need to take p->lock, because the process is just -- 2.7.4