diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch new file mode 100644 index 00000000..7639ea38 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch @@ -0,0 +1,301 @@ +From 5d02c44069710220fd2315b85d6bfdcb8b85c03b Mon Sep 17 00:00:00 2001 +From: Philip Cox <Philip.Cox@amd.com> +Date: Fri, 29 Mar 2019 16:53:04 -0400 +Subject: [PATCH 2762/2940] drm/amdkfd: save context on queue suspend + +This is a quick fix to save the queue context, and control stack to +the queue info area when we suspend a queue. The context size, along +with the control stack size are also saved. between the context and +control stack areas. + +Change-Id: Ie5b8773d33ac06c3c8da942abece23f00c73834b +Signed-off-by: Philip Cox <Philip.Cox@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 64 +++++++---------- + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 70 ++++++++++++++++++- + .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 26 ++++++- + 3 files changed, 119 insertions(+), 41 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 684f84f130a8..49d4f3cf5afd 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -2550,13 +2550,13 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + struct kfd_process_device *pdd; + int r = 0; + struct kfd_dev *dev; +- struct kfd_process *process; ++ struct kfd_process *process = NULL; ++ struct pid *pid = NULL; + uint32_t gpu_id; + uint32_t debug_trap_action; + uint32_t data1; + uint32_t data2; + uint32_t data3; +- struct pid *pid; + + debug_trap_action = args->op; + gpu_id = args->gpu_id; +@@ -2583,7 +2583,27 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + } + + mutex_lock(&p->mutex); +- pdd = kfd_get_process_device_data(dev, p); ++ ++ if (debug_trap_action == KFD_IOC_DBG_TRAP_NODE_SUSPEND || ++ debug_trap_action == KFD_IOC_DBG_TRAP_NODE_RESUME) { ++ ++ pid = find_get_pid(data1); ++ if (!pid) { ++ pr_err("Cannot find pid info for %i\n", data1); ++ r = -ESRCH; ++ goto unlock_out; ++ } ++ ++ process = kfd_lookup_process_by_pid(pid); ++ if (!process) { ++ pr_err("Cannot find process info info for %i\n", data1); ++ r = -ESRCH; ++ goto unlock_out; ++ } ++ pdd = kfd_get_process_device_data(dev, process); ++ } else { ++ pdd = kfd_get_process_device_data(dev, p); ++ } + if (!pdd) { + r = -EINVAL; + goto unlock_out; +@@ -2654,21 +2674,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + dev->vm_info.last_vmid_kfd); + break; + case KFD_IOC_DBG_TRAP_NODE_SUSPEND: +- pid = find_get_pid(data1); +- if (!pid) { +- pr_err("Cannot find pid info for %i\n", data1); +- r = -ESRCH; +- goto unlock_out; +- } +- +- process = kfd_lookup_process_by_pid(pid); +- if (!process) { +- pr_err("Cannot find process info info for %i\n", data1); +- r = -ESRCH; +- put_pid(pid); +- goto unlock_out; +- } +- + /* + * To suspend/resume queues, we need: + * ptrace to be enabled, +@@ -2687,25 +2692,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + pr_err("Cannot debug process to suspend queues\n"); + r = -ESRCH; + } +- kfd_unref_process(process); +- put_pid(pid); + break; + case KFD_IOC_DBG_TRAP_NODE_RESUME: +- pid = find_get_pid(data1); +- if (!pid) { +- pr_err("Cannot find pid info for %i\n", data1); +- r = -ESRCH; +- goto unlock_out; +- } +- +- process = kfd_lookup_process_by_pid(pid); +- if (!process) { +- pr_err("Cannot find process info info for %i\n", data1); +- r = -ESRCH; +- put_pid(pid); +- goto unlock_out; +- } +- + /* + * To suspend/resume queues, we need: + * ptrace to be enabled, +@@ -2724,8 +2712,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + pr_err("Cannot debug process to resume queues\n"); + r = -ESRCH; + } +- kfd_unref_process(process); +- put_pid(pid); + break; + default: + pr_err("Invalid option: %i\n", debug_trap_action); +@@ -2747,6 +2733,10 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + } + + unlock_out: ++ if (pid) ++ put_pid(pid); ++ if (process) ++ kfd_unref_process(process); + mutex_unlock(&p->mutex); + return r; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 5b0fbf7ba659..525dea08d208 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -25,6 +25,7 @@ + #include <linux/printk.h> + #include <linux/slab.h> + #include <linux/list.h> ++#include <linux/mmu_context.h> + #include <linux/types.h> + #include <linux/bitops.h> + #include <linux/sched.h> +@@ -1976,6 +1977,52 @@ int release_debug_trap_vmid(struct device_queue_manager *dqm) + return r; + } + ++ ++struct copy_context_work_handler_workarea { ++ struct work_struct copy_context_work; ++ struct device_queue_manager *dqm; ++ struct qcm_process_device *qpd; ++ struct mm_struct *mm; ++}; ++ ++void copy_context_work_handler (struct work_struct *work) ++{ ++ struct copy_context_work_handler_workarea *workarea; ++ struct mqd_manager *mqd_mgr; ++ struct qcm_process_device *qpd; ++ struct device_queue_manager *dqm; ++ struct queue *q; ++ uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; ++ ++ workarea = container_of(work, ++ struct copy_context_work_handler_workarea, ++ copy_context_work); ++ ++ qpd = workarea->qpd; ++ dqm = workarea->dqm; ++ use_mm(workarea->mm); ++ ++ ++ list_for_each_entry(q, &qpd->queues_list, list) { ++ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; ++ ++ /* We ignore the return value from get_wave_state because ++ * i) right now, it always returns 0, and ++ * ii) if we hit an error, we would continue to the next queue ++ * anyway. ++ */ ++ mqd_mgr->get_wave_state(mqd_mgr, ++ q->mqd, ++ (void __user *) q->properties.ctx_save_restore_area_address, ++ &tmp_ctl_stack_used_size, ++ &tmp_save_area_used_size); ++ } ++ ++ unuse_mm(workarea->mm); ++} ++ ++ ++ + int suspend_queues(struct device_queue_manager *dqm, + struct kfd_process *p, + uint32_t flags) +@@ -1984,6 +2031,9 @@ int suspend_queues(struct device_queue_manager *dqm, + struct kfd_dev *dev; + struct kfd_process_device *pdd; + ++ bool queues_suspended = false; ++ struct copy_context_work_handler_workarea copy_context_worker; ++ + dev = dqm->dev; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +@@ -1991,8 +2041,21 @@ int suspend_queues(struct device_queue_manager *dqm, + r = pdd->dev->dqm->ops.evict_process_queues( + pdd->dev->dqm, + &pdd->qpd); +- if (r) ++ if (r) { + pr_err("Failed to suspend process queues\n"); ++ break; ++ } ++ ++ copy_context_worker.qpd = &pdd->qpd; ++ copy_context_worker.dqm = dqm; ++ copy_context_worker.mm = get_task_mm(p->lead_thread); ++ queues_suspended = true; ++ ++ INIT_WORK_ONSTACK( ++ ©_context_worker.copy_context_work, ++ copy_context_work_handler); ++ ++ schedule_work(©_context_worker.copy_context_work); + break; + } + } +@@ -2001,6 +2064,11 @@ int suspend_queues(struct device_queue_manager *dqm, + if (!r && flags & KFD__DBG_NODE_SUSPEND_MEMORY_FENCE) + amdgpu_amdkfd_debug_mem_fence(dev->kgd); + ++ if (queues_suspended) { ++ flush_work(©_context_worker.copy_context_work); ++ mmput(copy_context_worker.mm); ++ destroy_work_on_stack(©_context_worker.copy_context_work); ++ } + return r; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +index 4ca984653ae6..b6c312f7ce7e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +@@ -337,13 +337,17 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, + mm->dev->kgd, queue_address, + pipe_id, queue_id); + } +- + static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) + { ++ void __user *user_data_ptr; + struct v9_mqd *m; ++ struct { ++ uint32_t ctl_stack_size; ++ uint32_t save_area_size; ++ } user_data; + + /* Control stack is located one page after MQD. */ + void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); +@@ -351,11 +355,27 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, + m = get_mqd(mqd); + + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - +- m->cp_hqd_cntl_stack_offset; ++ m->cp_hqd_cntl_stack_offset + sizeof(user_data); + *save_area_used_size = m->cp_hqd_wg_state_offset - + m->cp_hqd_cntl_stack_size; + +- if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) ++ /* To avoid breaking existing tools reading the control stack, ++ * set the IS_EVENT and IS_STATE bits to the sizes so that they ++ * are ignored if read as COMPUTE_RELAUNCH register. ++ */ ++ user_data.ctl_stack_size = 0xC0000000 | *ctl_stack_used_size; ++ user_data.save_area_size = 0xC0000000 | *save_area_used_size; ++ ++ /* The user ctl_stack_size and save_area size are located ++ * right below the start of the context save area. ++ */ ++ user_data_ptr = (void __user *)((uintptr_t)ctl_stack ++ + m->cp_hqd_cntl_stack_size - sizeof(user_data)); ++ ++ if (copy_to_user(ctl_stack, ++ (void *)((uintptr_t) mqd_ctl_stack + sizeof(user_data)), ++ m->cp_hqd_cntl_stack_size - sizeof(user_data)) ++ || copy_to_user(user_data_ptr, &user_data, sizeof(user_data))) + return -EFAULT; + + return 0; +-- +2.17.1 + |