From 3e09aff00ca90b135406dd86642fde011937ce51 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 9 Feb 2017 17:59:45 -0500 Subject: [PATCH 1588/4131] drm/amdgpu: Retry failed userptr restore when the mapping is in flux update_user_pages fails with -EDEADLK if the virtual address mapping of the buffer is being updated while we're trying to get its pages. This can happen when a large buffer is being updated after fork, taking longer than the 1 jiffy delay between the first evict and restore. Reschedule restore after another 1 jiffy delay, hoping that the VM mapping will have settled down by then. Change-Id: Ic0c917ae8877bbd15e1989ca1c365df1a637c361 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 20 ++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7d10b71..fff1b85 100755 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -266,6 +266,7 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work) struct kgd_mem *mem = container_of(dwork, struct kgd_mem, work); struct amdgpu_device *adev; struct mm_struct *mm; + int ret = 0; mutex_lock(&mem->lock); @@ -282,16 +283,27 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work) * was scheduled. */ if (mem->evicted == 1) { - if (amdgpu_amdkfd_gpuvm_restore_mem(mem, mm) != -EBUSY) + ret = amdgpu_amdkfd_gpuvm_restore_mem(mem, mm); + if (ret != -EBUSY && ret != -EDEADLK) mem->evicted = 0; } - BUG_ON(mem->mm != mm); - mem->mm = NULL; + /* If restore failed due to the VM being updated concurrently, + * reschedule restore again in a jiffie + */ + if (ret == -EDEADLK && mem->evicted == 1) { + pr_err("Rescheduling restore\n"); + mm = NULL; + schedule_delayed_work(&mem->work, 1); + } else { + BUG_ON(mem->mm != mm); + mem->mm = NULL; + } mutex_unlock(&mem->lock); - mmput(mm); + if (mm) + mmput(mm); } int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 8dffb3a..e1bea18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1934,6 +1934,11 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm) if (ret == -ESRCH) /* process terminating, fail quiet and fast */ return ret; + else if (ret == -EDEADLK) + /* Someone else is still updating the + * VM, let's try again later + */ + return ret; pr_err("get_user_pages failed. Probably userptr is freed. %d\n", ret); } -- 2.7.4