From 5d02c44069710220fd2315b85d6bfdcb8b85c03b Mon Sep 17 00:00:00 2001 From: Philip Cox Date: Fri, 29 Mar 2019 16:53:04 -0400 Subject: [PATCH 2762/2940] drm/amdkfd: save context on queue suspend This is a quick fix to save the queue context, and control stack to the queue info area when we suspend a queue. The context size, along with the control stack size are also saved. between the context and control stack areas. Change-Id: Ie5b8773d33ac06c3c8da942abece23f00c73834b Signed-off-by: Philip Cox --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 64 +++++++---------- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 70 ++++++++++++++++++- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 26 ++++++- 3 files changed, 119 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 684f84f130a8..49d4f3cf5afd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2550,13 +2550,13 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, struct kfd_process_device *pdd; int r = 0; struct kfd_dev *dev; - struct kfd_process *process; + struct kfd_process *process = NULL; + struct pid *pid = NULL; uint32_t gpu_id; uint32_t debug_trap_action; uint32_t data1; uint32_t data2; uint32_t data3; - struct pid *pid; debug_trap_action = args->op; gpu_id = args->gpu_id; @@ -2583,7 +2583,27 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, } mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(dev, p); + + if (debug_trap_action == KFD_IOC_DBG_TRAP_NODE_SUSPEND || + debug_trap_action == KFD_IOC_DBG_TRAP_NODE_RESUME) { + + pid = find_get_pid(data1); + if (!pid) { + pr_err("Cannot find pid info for %i\n", data1); + r = -ESRCH; + goto unlock_out; + } + + process = kfd_lookup_process_by_pid(pid); + if (!process) { + pr_err("Cannot find process info info for %i\n", data1); + r = -ESRCH; + goto unlock_out; + } + pdd = kfd_get_process_device_data(dev, process); + } else { + pdd = kfd_get_process_device_data(dev, p); + } if (!pdd) { r = -EINVAL; goto unlock_out; @@ -2654,21 +2674,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, dev->vm_info.last_vmid_kfd); break; case KFD_IOC_DBG_TRAP_NODE_SUSPEND: - pid = find_get_pid(data1); - if (!pid) { - pr_err("Cannot find pid info for %i\n", data1); - r = -ESRCH; - goto unlock_out; - } - - process = kfd_lookup_process_by_pid(pid); - if (!process) { - pr_err("Cannot find process info info for %i\n", data1); - r = -ESRCH; - put_pid(pid); - goto unlock_out; - } - /* * To suspend/resume queues, we need: * ptrace to be enabled, @@ -2687,25 +2692,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, pr_err("Cannot debug process to suspend queues\n"); r = -ESRCH; } - kfd_unref_process(process); - put_pid(pid); break; case KFD_IOC_DBG_TRAP_NODE_RESUME: - pid = find_get_pid(data1); - if (!pid) { - pr_err("Cannot find pid info for %i\n", data1); - r = -ESRCH; - goto unlock_out; - } - - process = kfd_lookup_process_by_pid(pid); - if (!process) { - pr_err("Cannot find process info info for %i\n", data1); - r = -ESRCH; - put_pid(pid); - goto unlock_out; - } - /* * To suspend/resume queues, we need: * ptrace to be enabled, @@ -2724,8 +2712,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, pr_err("Cannot debug process to resume queues\n"); r = -ESRCH; } - kfd_unref_process(process); - put_pid(pid); break; default: pr_err("Invalid option: %i\n", debug_trap_action); @@ -2747,6 +2733,10 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, } unlock_out: + if (pid) + put_pid(pid); + if (process) + kfd_unref_process(process); mutex_unlock(&p->mutex); return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5b0fbf7ba659..525dea08d208 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1976,6 +1977,52 @@ int release_debug_trap_vmid(struct device_queue_manager *dqm) return r; } + +struct copy_context_work_handler_workarea { + struct work_struct copy_context_work; + struct device_queue_manager *dqm; + struct qcm_process_device *qpd; + struct mm_struct *mm; +}; + +void copy_context_work_handler (struct work_struct *work) +{ + struct copy_context_work_handler_workarea *workarea; + struct mqd_manager *mqd_mgr; + struct qcm_process_device *qpd; + struct device_queue_manager *dqm; + struct queue *q; + uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; + + workarea = container_of(work, + struct copy_context_work_handler_workarea, + copy_context_work); + + qpd = workarea->qpd; + dqm = workarea->dqm; + use_mm(workarea->mm); + + + list_for_each_entry(q, &qpd->queues_list, list) { + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; + + /* We ignore the return value from get_wave_state because + * i) right now, it always returns 0, and + * ii) if we hit an error, we would continue to the next queue + * anyway. + */ + mqd_mgr->get_wave_state(mqd_mgr, + q->mqd, + (void __user *) q->properties.ctx_save_restore_area_address, + &tmp_ctl_stack_used_size, + &tmp_save_area_used_size); + } + + unuse_mm(workarea->mm); +} + + + int suspend_queues(struct device_queue_manager *dqm, struct kfd_process *p, uint32_t flags) @@ -1984,6 +2031,9 @@ int suspend_queues(struct device_queue_manager *dqm, struct kfd_dev *dev; struct kfd_process_device *pdd; + bool queues_suspended = false; + struct copy_context_work_handler_workarea copy_context_worker; + dev = dqm->dev; list_for_each_entry(pdd, &p->per_device_data, per_device_list) { @@ -1991,8 +2041,21 @@ int suspend_queues(struct device_queue_manager *dqm, r = pdd->dev->dqm->ops.evict_process_queues( pdd->dev->dqm, &pdd->qpd); - if (r) + if (r) { pr_err("Failed to suspend process queues\n"); + break; + } + + copy_context_worker.qpd = &pdd->qpd; + copy_context_worker.dqm = dqm; + copy_context_worker.mm = get_task_mm(p->lead_thread); + queues_suspended = true; + + INIT_WORK_ONSTACK( + ©_context_worker.copy_context_work, + copy_context_work_handler); + + schedule_work(©_context_worker.copy_context_work); break; } } @@ -2001,6 +2064,11 @@ int suspend_queues(struct device_queue_manager *dqm, if (!r && flags & KFD__DBG_NODE_SUSPEND_MEMORY_FENCE) amdgpu_amdkfd_debug_mem_fence(dev->kgd); + if (queues_suspended) { + flush_work(©_context_worker.copy_context_work); + mmput(copy_context_worker.mm); + destroy_work_on_stack(©_context_worker.copy_context_work); + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 4ca984653ae6..b6c312f7ce7e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -337,13 +337,17 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, mm->dev->kgd, queue_address, pipe_id, queue_id); } - static int get_wave_state(struct mqd_manager *mm, void *mqd, void __user *ctl_stack, u32 *ctl_stack_used_size, u32 *save_area_used_size) { + void __user *user_data_ptr; struct v9_mqd *m; + struct { + uint32_t ctl_stack_size; + uint32_t save_area_size; + } user_data; /* Control stack is located one page after MQD. */ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); @@ -351,11 +355,27 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - - m->cp_hqd_cntl_stack_offset; + m->cp_hqd_cntl_stack_offset + sizeof(user_data); *save_area_used_size = m->cp_hqd_wg_state_offset - m->cp_hqd_cntl_stack_size; - if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) + /* To avoid breaking existing tools reading the control stack, + * set the IS_EVENT and IS_STATE bits to the sizes so that they + * are ignored if read as COMPUTE_RELAUNCH register. + */ + user_data.ctl_stack_size = 0xC0000000 | *ctl_stack_used_size; + user_data.save_area_size = 0xC0000000 | *save_area_used_size; + + /* The user ctl_stack_size and save_area size are located + * right below the start of the context save area. + */ + user_data_ptr = (void __user *)((uintptr_t)ctl_stack + + m->cp_hqd_cntl_stack_size - sizeof(user_data)); + + if (copy_to_user(ctl_stack, + (void *)((uintptr_t) mqd_ctl_stack + sizeof(user_data)), + m->cp_hqd_cntl_stack_size - sizeof(user_data)) + || copy_to_user(user_data_ptr, &user_data, sizeof(user_data))) return -EFAULT; return 0; -- 2.17.1