aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch478
1 files changed, 478 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch
new file mode 100644
index 00000000..5b9121db
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch
@@ -0,0 +1,478 @@
+From 7b5a8eee9cf58fbd76caeb49bc137749fc25e476 Mon Sep 17 00:00:00 2001
+From: Philip Cox <Philip.Cox@amd.com>
+Date: Thu, 11 Apr 2019 11:49:22 -0400
+Subject: [PATCH 2777/2940] drm/amdkfd: Implement queue based suspend/resume
+
+Rather than suspending and resuming all the queues on a node,
+we need the ability to specify a list of queues to suspend and resume.
+
+Change-Id: Id7d5a32e3bcc3806ecea3ae9b89cfafb28469beb
+Signed-off-by: Philip Cox <Philip.Cox@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++--
+ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 262 ++++++++++++++----
+ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 14 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +-
+ 4 files changed, 229 insertions(+), 99 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 4abf4f462fa1..d5416af01ac4 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -2555,7 +2555,7 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ struct kfd_ioctl_dbg_trap_args *args = data;
+ struct kfd_process_device *pdd = NULL;
+ int r = 0;
+- struct kfd_dev *dev;
++ struct kfd_dev *dev = NULL;
+ struct kfd_process *target = NULL;
+ struct pid *pid = NULL;
+ uint32_t *queue_id_array = NULL;
+@@ -2565,7 +2565,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ uint32_t data2;
+ uint32_t data3;
+ bool is_suspend_or_resume;
+- uint8_t id;
+
+ debug_trap_action = args->op;
+ gpu_id = args->gpu_id;
+@@ -2720,39 +2719,24 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
+ data1,
+ dev->vm_info.last_vmid_kfd);
+ break;
+- case KFD_IOC_DBG_TRAP_NODE_SUSPEND:
+- id = 0;
+- /* We need to loop over all of the topology devices */
+- while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
+- if (!dev) {
+- /* Not a GPU. Skip it */
+- id++;
+- continue;
+- }
+
+- r = suspend_queues(dev->dqm, target, data1);
+- if (r)
+- goto unlock_out;
+-
+- id++;
+- }
++ case KFD_IOC_DBG_TRAP_NODE_SUSPEND:
++ r = suspend_queues(target,
++ data2, /* Number of queues */
++ data3, /* Grace Period */
++ data1, /* Flags */
++ queue_id_array); /* array of queue ids */
++ if (r)
++ goto unlock_out;
+ break;
+- case KFD_IOC_DBG_TRAP_NODE_RESUME:
+- id = 0;
+- /* We need to loop over all of the topology devices */
+- while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
+- if (!dev) {
+- /* Not a GPU. Skip it */
+- id++;
+- continue;
+- }
+
+- r = resume_queues(dev->dqm, target);
+- if (r)
+- goto unlock_out;
+-
+- id++;
+- }
++ case KFD_IOC_DBG_TRAP_NODE_RESUME:
++ r = resume_queues(target,
++ data2, /* Number of queues */
++ data1, /* Flags */
++ queue_id_array); /* array of queue ids */
++ if (r)
++ goto unlock_out;
+ break;
+ default:
+ pr_err("Invalid option: %i\n", debug_trap_action);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index d0c316b91af0..497d449fc6d0 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -600,6 +600,66 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+ return retval;
+ }
+
++/* suspend_single_queue does not lock the dqm like the
++ * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should
++ * lock the dqm before calling, and unlock after calling.
++ *
++ * The reason we don't lock the dqm is because this function may be
++ * called on multipe queues in a loop, so rather than locking/unlocking
++ * multiple times, we will just keep the dqm locked for all of the calls.
++ */
++static int suspend_single_queue(struct device_queue_manager *dqm,
++ struct kfd_process_device *pdd,
++ struct queue *q)
++{
++ int retval = 0;
++
++ pr_debug("Suspending PASID %u queue [%i]\n",
++ pdd->process->pasid,
++ q->properties.queue_id);
++
++ q->properties.is_suspended = true;
++ if (q->properties.is_active) {
++ dqm->queue_count--;
++ q->properties.is_active = false;
++ }
++
++ return retval;
++}
++
++/* resume_single_queue does not lock the dqm like the functions
++ * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should
++ * lock the dqm before calling, and unlock after calling.
++ *
++ * The reason we don't lock the dqm is because this function may be
++ * called on multipe queues in a loop, so rather than locking/unlocking
++ * multiple times, we will just keep the dqm locked for all of the calls.
++ */
++static int resume_single_queue(struct device_queue_manager *dqm,
++ struct qcm_process_device *qpd,
++ struct queue *q)
++{
++ struct kfd_process_device *pdd;
++ uint64_t pd_base;
++ int retval = 0;
++
++ pdd = qpd_to_pdd(qpd);
++ /* Retrieve PD base */
++ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
++
++ pr_debug("Restoring from suspend PASID %u queue [%i]\n",
++ pdd->process->pasid,
++ q->properties.queue_id);
++
++ q->properties.is_suspended = false;
++
++ if (QUEUE_IS_ACTIVE(q->properties)) {
++ q->properties.is_active = true;
++ dqm->queue_count++;
++ }
++
++ return retval;
++}
+ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+ {
+@@ -1227,7 +1287,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ * updates the is_evicted flag but is a no-op otherwise.
+ */
+ q->properties.is_evicted = !!qpd->evicted;
+-
++ q->properties.is_suspended = false;
+ dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+ q->properties.tba_addr = qpd->tba_addr;
+ q->properties.tma_addr = qpd->tma_addr;
+@@ -1979,114 +2039,194 @@ int release_debug_trap_vmid(struct device_queue_manager *dqm)
+ return r;
+ }
+
++bool queue_id_in_array(unsigned int queue_id,
++ uint32_t num_queues,
++ uint32_t *queue_ids)
++{
++ int i;
++
++ for (i = 0; i < num_queues; i++)
++ if (queue_id == queue_ids[i])
++ return true;
++ return false;
++}
+
+ struct copy_context_work_handler_workarea {
+ struct work_struct copy_context_work;
+- struct device_queue_manager *dqm;
+- struct qcm_process_device *qpd;
+- struct mm_struct *mm;
++ struct kfd_process *p;
+ };
+
+ void copy_context_work_handler (struct work_struct *work)
+ {
+ struct copy_context_work_handler_workarea *workarea;
+ struct mqd_manager *mqd_mgr;
+- struct qcm_process_device *qpd;
+- struct device_queue_manager *dqm;
++ struct kfd_process_device *pdd;
+ struct queue *q;
++ struct mm_struct *mm;
++ struct kfd_process *p;
+ uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
+
+ workarea = container_of(work,
+ struct copy_context_work_handler_workarea,
+ copy_context_work);
+
+- qpd = workarea->qpd;
+- dqm = workarea->dqm;
+- use_mm(workarea->mm);
++ p = workarea->p;
++ mm = get_task_mm(p->lead_thread);
++ use_mm(mm);
++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
++ struct device_queue_manager *dqm = pdd->dev->dqm;
++ struct qcm_process_device *qpd = &pdd->qpd;
+
++ dqm_lock(dqm);
+
+- list_for_each_entry(q, &qpd->queues_list, list) {
+- mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
+
+- /* We ignore the return value from get_wave_state because
+- * i) right now, it always returns 0, and
+- * ii) if we hit an error, we would continue to the next queue
+- * anyway.
+- */
+- mqd_mgr->get_wave_state(mqd_mgr,
+- q->mqd,
+- (void __user *) q->properties.ctx_save_restore_area_address,
+- &tmp_ctl_stack_used_size,
+- &tmp_save_area_used_size);
+- }
+-
+- unuse_mm(workarea->mm);
+-}
++ list_for_each_entry(q, &qpd->queues_list, list) {
++ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
+
++ /* We ignore the return value from get_wave_state
++ * because
++ * i) right now, it always returns 0, and
++ * ii) if we hit an error, we would continue to the
++ * next queue anyway.
++ */
++ mqd_mgr->get_wave_state(mqd_mgr,
++ q->mqd,
++ (void __user *) q->properties.ctx_save_restore_area_address,
++ &tmp_ctl_stack_used_size,
++ &tmp_save_area_used_size);
++ }
+
++ dqm_unlock(dqm);
++ }
++ unuse_mm(mm);
++ mmput(mm);
++}
+
+-int suspend_queues(struct device_queue_manager *dqm,
+- struct kfd_process *p,
+- uint32_t flags)
++int suspend_queues(struct kfd_process *p,
++ uint32_t num_queues,
++ uint32_t grace_period,
++ uint32_t flags,
++ uint32_t *queue_ids)
+ {
+ int r = -ENODEV;
+- struct kfd_dev *dev;
++ bool any_queues_suspended = false;
+ struct kfd_process_device *pdd;
++ struct queue *q;
+
+- bool queues_suspended = false;
+- struct copy_context_work_handler_workarea copy_context_worker;
++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
++ bool queues_suspended_on_device = false;
++ struct device_queue_manager *dqm = pdd->dev->dqm;
++ struct qcm_process_device *qpd = &pdd->qpd;
+
+- dev = dqm->dev;
++ dqm_lock(dqm);
+
+- list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+- if (dqm->dev == pdd->dev) {
+- r = pdd->dev->dqm->ops.evict_process_queues(
+- pdd->dev->dqm,
+- &pdd->qpd);
++ /* We need to loop over all of the queues on this
++ * device, and check if it is in the list passed in,
++ * and if it is, we will evict it.
++ */
++ list_for_each_entry(q, &qpd->queues_list, list) {
++ if (queue_id_in_array(q->properties.queue_id,
++ num_queues,
++ queue_ids)) {
++ if (q->properties.is_suspended)
++ continue;
++ r = suspend_single_queue(dqm,
++ pdd,
++ q);
++ if (r) {
++ pr_err("Failed to suspend process queues. queue_id == %i\n",
++ q->properties.queue_id);
++ dqm_unlock(dqm);
++ return r;
++ }
++ queues_suspended_on_device = true;
++ any_queues_suspended = true;
++ }
++ }
++
++ if (queues_suspended_on_device) {
++ r = execute_queues_cpsch(dqm,
++ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (r) {
+- pr_err("Failed to suspend process queues\n");
+- break;
++ pr_err("Failed to suspend process queues.\n");
++ dqm_unlock(dqm);
++ return r;
+ }
++ }
+
+- copy_context_worker.qpd = &pdd->qpd;
+- copy_context_worker.dqm = dqm;
+- copy_context_worker.mm = get_task_mm(p->lead_thread);
+- queues_suspended = true;
++ dqm_unlock(dqm);
++ amdgpu_amdkfd_debug_mem_fence(dqm->dev->kgd);
++ }
+
+- INIT_WORK_ONSTACK(
+- &copy_context_worker.copy_context_work,
+- copy_context_work_handler);
++ if (any_queues_suspended) {
++ struct copy_context_work_handler_workarea copy_context_worker;
++
++ INIT_WORK_ONSTACK(
++ &copy_context_worker.copy_context_work,
++ copy_context_work_handler);
++
++ copy_context_worker.p = p;
++
++ schedule_work(&copy_context_worker.copy_context_work);
+
+- schedule_work(&copy_context_worker.copy_context_work);
+- break;
+- }
+- }
+
+- if (queues_suspended) {
+- amdgpu_amdkfd_debug_mem_fence(dev->kgd);
+ flush_work(&copy_context_worker.copy_context_work);
+- mmput(copy_context_worker.mm);
+ destroy_work_on_stack(&copy_context_worker.copy_context_work);
+ }
+ return r;
+ }
+
+-int resume_queues(struct device_queue_manager *dqm, struct kfd_process *p)
++int resume_queues(struct kfd_process *p,
++ uint32_t num_queues,
++ uint32_t flags,
++ uint32_t *queue_ids)
+ {
+ int r = -ENODEV;
+ struct kfd_process_device *pdd;
++ struct queue *q;
+
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+- if (dqm->dev == pdd->dev) {
+- r = pdd->dev->dqm->ops.restore_process_queues(
+- pdd->dev->dqm,
+- &pdd->qpd);
+- if (r)
++ bool queues_resumed_on_device = false;
++ struct device_queue_manager *dqm = pdd->dev->dqm;
++ struct qcm_process_device *qpd = &pdd->qpd;
++
++ dqm_lock(dqm);
++
++ /* We need to loop over all of the queues on this
++ * device, and check if it is in the list passed in,
++ * and if it is, we will restore it.
++ */
++ list_for_each_entry(q, &qpd->queues_list, list) {
++ if (queue_id_in_array(q->properties.queue_id,
++ num_queues,
++ queue_ids)) {
++ if (!q->properties.is_suspended)
++ continue;
++ r = resume_single_queue(dqm,
++ &pdd->qpd,
++ q);
++ if (r) {
++ pr_err("Failed to resume process queues\n");
++ dqm_unlock(dqm);
++ return r;
++ }
++ queues_resumed_on_device = true;
++ }
++ }
++
++ if (queues_resumed_on_device) {
++ r = execute_queues_cpsch(dqm,
++ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
++ 0);
++ if (r) {
+ pr_err("Failed to resume process queues\n");
+- break;
++ dqm_unlock(dqm);
++ return r;
++ }
+ }
+- }
+
++ dqm_unlock(dqm);
++ }
+ return r;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+index 01f8249cb2ed..8eb10f610c12 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+@@ -224,11 +224,15 @@ bool check_if_queues_active(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+ int reserve_debug_trap_vmid(struct device_queue_manager *dqm);
+ int release_debug_trap_vmid(struct device_queue_manager *dqm);
+-int suspend_queues(struct device_queue_manager *dqm,
+- struct kfd_process *p,
+- uint32_t flags);
+-int resume_queues(struct device_queue_manager *dqm, struct kfd_process *p);
+-
++int suspend_queues(struct kfd_process *p,
++ uint32_t num_queues,
++ uint32_t grace_period,
++ uint32_t flags,
++ uint32_t *queue_ids);
++int resume_queues(struct kfd_process *p,
++ uint32_t num_queues,
++ uint32_t flags,
++ uint32_t *queue_ids);
+
+ static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
+ {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index a0311b2ed5d6..7cd1404129f4 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -498,6 +498,7 @@ struct queue_properties {
+ uint32_t doorbell_off;
+ bool is_interop;
+ bool is_evicted;
++ bool is_suspended;
+ bool is_active;
+ /* Not relevant for user mode queues in cp scheduling */
+ unsigned int vmid;
+@@ -521,7 +522,8 @@ struct queue_properties {
+ #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \
+ (q).queue_address != 0 && \
+ (q).queue_percent > 0 && \
+- !(q).is_evicted)
++ !(q).is_evicted && \
++ !(q).is_suspended)
+
+ /**
+ * struct queue
+--
+2.17.1
+