aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch301
1 files changed, 301 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch
new file mode 100644
index 00000000..7639ea38
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch
@@ -0,0 +1,301 @@
+From 5d02c44069710220fd2315b85d6bfdcb8b85c03b Mon Sep 17 00:00:00 2001
+From: Philip Cox <Philip.Cox@amd.com>
+Date: Fri, 29 Mar 2019 16:53:04 -0400
+Subject: [PATCH 2762/2940] drm/amdkfd: save context on queue suspend
+
+This is a quick fix to save the queue context, and control stack to
+the queue info area when we suspend a queue. The context size, along
+with the control stack size are also saved. between the context and
+control stack areas.
+
+Change-Id: Ie5b8773d33ac06c3c8da942abece23f00c73834b
+Signed-off-by: Philip Cox <Philip.Cox@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 64 +++++++----------
+ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 70 ++++++++++++++++++-
+ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 26 ++++++-
+ 3 files changed, 119 insertions(+), 41 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 684f84f130a8..49d4f3cf5afd 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -2550,13 +2550,13 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ struct kfd_process_device *pdd;
+ int r = 0;
+ struct kfd_dev *dev;
+- struct kfd_process *process;
++ struct kfd_process *process = NULL;
++ struct pid *pid = NULL;
+ uint32_t gpu_id;
+ uint32_t debug_trap_action;
+ uint32_t data1;
+ uint32_t data2;
+ uint32_t data3;
+- struct pid *pid;
+
+ debug_trap_action = args->op;
+ gpu_id = args->gpu_id;
+@@ -2583,7 +2583,27 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ }
+
+ mutex_lock(&p->mutex);
+- pdd = kfd_get_process_device_data(dev, p);
++
++ if (debug_trap_action == KFD_IOC_DBG_TRAP_NODE_SUSPEND ||
++ debug_trap_action == KFD_IOC_DBG_TRAP_NODE_RESUME) {
++
++ pid = find_get_pid(data1);
++ if (!pid) {
++ pr_err("Cannot find pid info for %i\n", data1);
++ r = -ESRCH;
++ goto unlock_out;
++ }
++
++ process = kfd_lookup_process_by_pid(pid);
++ if (!process) {
++ pr_err("Cannot find process info info for %i\n", data1);
++ r = -ESRCH;
++ goto unlock_out;
++ }
++ pdd = kfd_get_process_device_data(dev, process);
++ } else {
++ pdd = kfd_get_process_device_data(dev, p);
++ }
+ if (!pdd) {
+ r = -EINVAL;
+ goto unlock_out;
+@@ -2654,21 +2674,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ dev->vm_info.last_vmid_kfd);
+ break;
+ case KFD_IOC_DBG_TRAP_NODE_SUSPEND:
+- pid = find_get_pid(data1);
+- if (!pid) {
+- pr_err("Cannot find pid info for %i\n", data1);
+- r = -ESRCH;
+- goto unlock_out;
+- }
+-
+- process = kfd_lookup_process_by_pid(pid);
+- if (!process) {
+- pr_err("Cannot find process info info for %i\n", data1);
+- r = -ESRCH;
+- put_pid(pid);
+- goto unlock_out;
+- }
+-
+ /*
+ * To suspend/resume queues, we need:
+ * ptrace to be enabled,
+@@ -2687,25 +2692,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ pr_err("Cannot debug process to suspend queues\n");
+ r = -ESRCH;
+ }
+- kfd_unref_process(process);
+- put_pid(pid);
+ break;
+ case KFD_IOC_DBG_TRAP_NODE_RESUME:
+- pid = find_get_pid(data1);
+- if (!pid) {
+- pr_err("Cannot find pid info for %i\n", data1);
+- r = -ESRCH;
+- goto unlock_out;
+- }
+-
+- process = kfd_lookup_process_by_pid(pid);
+- if (!process) {
+- pr_err("Cannot find process info info for %i\n", data1);
+- r = -ESRCH;
+- put_pid(pid);
+- goto unlock_out;
+- }
+-
+ /*
+ * To suspend/resume queues, we need:
+ * ptrace to be enabled,
+@@ -2724,8 +2712,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ pr_err("Cannot debug process to resume queues\n");
+ r = -ESRCH;
+ }
+- kfd_unref_process(process);
+- put_pid(pid);
+ break;
+ default:
+ pr_err("Invalid option: %i\n", debug_trap_action);
+@@ -2747,6 +2733,10 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ }
+
+ unlock_out:
++ if (pid)
++ put_pid(pid);
++ if (process)
++ kfd_unref_process(process);
+ mutex_unlock(&p->mutex);
+ return r;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 5b0fbf7ba659..525dea08d208 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -25,6 +25,7 @@
+ #include <linux/printk.h>
+ #include <linux/slab.h>
+ #include <linux/list.h>
++#include <linux/mmu_context.h>
+ #include <linux/types.h>
+ #include <linux/bitops.h>
+ #include <linux/sched.h>
+@@ -1976,6 +1977,52 @@ int release_debug_trap_vmid(struct device_queue_manager *dqm)
+ return r;
+ }
+
++
++struct copy_context_work_handler_workarea {
++ struct work_struct copy_context_work;
++ struct device_queue_manager *dqm;
++ struct qcm_process_device *qpd;
++ struct mm_struct *mm;
++};
++
++void copy_context_work_handler (struct work_struct *work)
++{
++ struct copy_context_work_handler_workarea *workarea;
++ struct mqd_manager *mqd_mgr;
++ struct qcm_process_device *qpd;
++ struct device_queue_manager *dqm;
++ struct queue *q;
++ uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
++
++ workarea = container_of(work,
++ struct copy_context_work_handler_workarea,
++ copy_context_work);
++
++ qpd = workarea->qpd;
++ dqm = workarea->dqm;
++ use_mm(workarea->mm);
++
++
++ list_for_each_entry(q, &qpd->queues_list, list) {
++ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
++
++ /* We ignore the return value from get_wave_state because
++ * i) right now, it always returns 0, and
++ * ii) if we hit an error, we would continue to the next queue
++ * anyway.
++ */
++ mqd_mgr->get_wave_state(mqd_mgr,
++ q->mqd,
++ (void __user *) q->properties.ctx_save_restore_area_address,
++ &tmp_ctl_stack_used_size,
++ &tmp_save_area_used_size);
++ }
++
++ unuse_mm(workarea->mm);
++}
++
++
++
+ int suspend_queues(struct device_queue_manager *dqm,
+ struct kfd_process *p,
+ uint32_t flags)
+@@ -1984,6 +2031,9 @@ int suspend_queues(struct device_queue_manager *dqm,
+ struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
+
++ bool queues_suspended = false;
++ struct copy_context_work_handler_workarea copy_context_worker;
++
+ dev = dqm->dev;
+
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+@@ -1991,8 +2041,21 @@ int suspend_queues(struct device_queue_manager *dqm,
+ r = pdd->dev->dqm->ops.evict_process_queues(
+ pdd->dev->dqm,
+ &pdd->qpd);
+- if (r)
++ if (r) {
+ pr_err("Failed to suspend process queues\n");
++ break;
++ }
++
++ copy_context_worker.qpd = &pdd->qpd;
++ copy_context_worker.dqm = dqm;
++ copy_context_worker.mm = get_task_mm(p->lead_thread);
++ queues_suspended = true;
++
++ INIT_WORK_ONSTACK(
++ &copy_context_worker.copy_context_work,
++ copy_context_work_handler);
++
++ schedule_work(&copy_context_worker.copy_context_work);
+ break;
+ }
+ }
+@@ -2001,6 +2064,11 @@ int suspend_queues(struct device_queue_manager *dqm,
+ if (!r && flags & KFD__DBG_NODE_SUSPEND_MEMORY_FENCE)
+ amdgpu_amdkfd_debug_mem_fence(dev->kgd);
+
++ if (queues_suspended) {
++ flush_work(&copy_context_worker.copy_context_work);
++ mmput(copy_context_worker.mm);
++ destroy_work_on_stack(&copy_context_worker.copy_context_work);
++ }
+ return r;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+index 4ca984653ae6..b6c312f7ce7e 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+@@ -337,13 +337,17 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
+ mm->dev->kgd, queue_address,
+ pipe_id, queue_id);
+ }
+-
+ static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+ {
++ void __user *user_data_ptr;
+ struct v9_mqd *m;
++ struct {
++ uint32_t ctl_stack_size;
++ uint32_t save_area_size;
++ } user_data;
+
+ /* Control stack is located one page after MQD. */
+ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+@@ -351,11 +355,27 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ m = get_mqd(mqd);
+
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+- m->cp_hqd_cntl_stack_offset;
++ m->cp_hqd_cntl_stack_offset + sizeof(user_data);
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+- if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
++ /* To avoid breaking existing tools reading the control stack,
++ * set the IS_EVENT and IS_STATE bits to the sizes so that they
++ * are ignored if read as COMPUTE_RELAUNCH register.
++ */
++ user_data.ctl_stack_size = 0xC0000000 | *ctl_stack_used_size;
++ user_data.save_area_size = 0xC0000000 | *save_area_used_size;
++
++ /* The user ctl_stack_size and save_area size are located
++ * right below the start of the context save area.
++ */
++ user_data_ptr = (void __user *)((uintptr_t)ctl_stack
++ + m->cp_hqd_cntl_stack_size - sizeof(user_data));
++
++ if (copy_to_user(ctl_stack,
++ (void *)((uintptr_t) mqd_ctl_stack + sizeof(user_data)),
++ m->cp_hqd_cntl_stack_size - sizeof(user_data))
++ || copy_to_user(user_data_ptr, &user_data, sizeof(user_data)))
+ return -EFAULT;
+
+ return 0;
+--
+2.17.1
+