diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch new file mode 100644 index 00000000..5b9121db --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2777-drm-amdkfd-Implement-queue-based-suspend-resume.patch @@ -0,0 +1,478 @@ +From 7b5a8eee9cf58fbd76caeb49bc137749fc25e476 Mon Sep 17 00:00:00 2001 +From: Philip Cox <Philip.Cox@amd.com> +Date: Thu, 11 Apr 2019 11:49:22 -0400 +Subject: [PATCH 2777/2940] drm/amdkfd: Implement queue based suspend/resume + +Rather than suspending and resuming all the queues on a node, +we need the ability to specify a list of queues to suspend and resume. + +Change-Id: Id7d5a32e3bcc3806ecea3ae9b89cfafb28469beb +Signed-off-by: Philip Cox <Philip.Cox@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++-- + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 262 ++++++++++++++---- + .../drm/amd/amdkfd/kfd_device_queue_manager.h | 14 +- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +- + 4 files changed, 229 insertions(+), 99 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 4abf4f462fa1..d5416af01ac4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -2555,7 +2555,7 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + struct kfd_ioctl_dbg_trap_args *args = data; + struct kfd_process_device *pdd = NULL; + int r = 0; +- struct kfd_dev *dev; ++ struct kfd_dev *dev = NULL; + struct kfd_process *target = NULL; + struct pid *pid = NULL; + uint32_t *queue_id_array = NULL; +@@ -2565,7 +2565,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + uint32_t data2; + uint32_t data3; + bool is_suspend_or_resume; +- uint8_t id; + + debug_trap_action = args->op; + gpu_id = args->gpu_id; +@@ -2720,39 +2719,24 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, + data1, + dev->vm_info.last_vmid_kfd); + break; +- case KFD_IOC_DBG_TRAP_NODE_SUSPEND: +- id = 0; +- /* We need to loop over all of the topology devices */ +- while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { +- if (!dev) { +- /* Not a GPU. Skip it */ +- id++; +- continue; +- } + +- r = suspend_queues(dev->dqm, target, data1); +- if (r) +- goto unlock_out; +- +- id++; +- } ++ case KFD_IOC_DBG_TRAP_NODE_SUSPEND: ++ r = suspend_queues(target, ++ data2, /* Number of queues */ ++ data3, /* Grace Period */ ++ data1, /* Flags */ ++ queue_id_array); /* array of queue ids */ ++ if (r) ++ goto unlock_out; + break; +- case KFD_IOC_DBG_TRAP_NODE_RESUME: +- id = 0; +- /* We need to loop over all of the topology devices */ +- while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { +- if (!dev) { +- /* Not a GPU. Skip it */ +- id++; +- continue; +- } + +- r = resume_queues(dev->dqm, target); +- if (r) +- goto unlock_out; +- +- id++; +- } ++ case KFD_IOC_DBG_TRAP_NODE_RESUME: ++ r = resume_queues(target, ++ data2, /* Number of queues */ ++ data1, /* Flags */ ++ queue_id_array); /* array of queue ids */ ++ if (r) ++ goto unlock_out; + break; + default: + pr_err("Invalid option: %i\n", debug_trap_action); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index d0c316b91af0..497d449fc6d0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -600,6 +600,66 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + return retval; + } + ++/* suspend_single_queue does not lock the dqm like the ++ * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should ++ * lock the dqm before calling, and unlock after calling. ++ * ++ * The reason we don't lock the dqm is because this function may be ++ * called on multipe queues in a loop, so rather than locking/unlocking ++ * multiple times, we will just keep the dqm locked for all of the calls. ++ */ ++static int suspend_single_queue(struct device_queue_manager *dqm, ++ struct kfd_process_device *pdd, ++ struct queue *q) ++{ ++ int retval = 0; ++ ++ pr_debug("Suspending PASID %u queue [%i]\n", ++ pdd->process->pasid, ++ q->properties.queue_id); ++ ++ q->properties.is_suspended = true; ++ if (q->properties.is_active) { ++ dqm->queue_count--; ++ q->properties.is_active = false; ++ } ++ ++ return retval; ++} ++ ++/* resume_single_queue does not lock the dqm like the functions ++ * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should ++ * lock the dqm before calling, and unlock after calling. ++ * ++ * The reason we don't lock the dqm is because this function may be ++ * called on multipe queues in a loop, so rather than locking/unlocking ++ * multiple times, we will just keep the dqm locked for all of the calls. ++ */ ++static int resume_single_queue(struct device_queue_manager *dqm, ++ struct qcm_process_device *qpd, ++ struct queue *q) ++{ ++ struct kfd_process_device *pdd; ++ uint64_t pd_base; ++ int retval = 0; ++ ++ pdd = qpd_to_pdd(qpd); ++ /* Retrieve PD base */ ++ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); ++ ++ pr_debug("Restoring from suspend PASID %u queue [%i]\n", ++ pdd->process->pasid, ++ q->properties.queue_id); ++ ++ q->properties.is_suspended = false; ++ ++ if (QUEUE_IS_ACTIVE(q->properties)) { ++ q->properties.is_active = true; ++ dqm->queue_count++; ++ } ++ ++ return retval; ++} + static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { +@@ -1227,7 +1287,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + * updates the is_evicted flag but is a no-op otherwise. + */ + q->properties.is_evicted = !!qpd->evicted; +- ++ q->properties.is_suspended = false; + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; +@@ -1979,114 +2039,194 @@ int release_debug_trap_vmid(struct device_queue_manager *dqm) + return r; + } + ++bool queue_id_in_array(unsigned int queue_id, ++ uint32_t num_queues, ++ uint32_t *queue_ids) ++{ ++ int i; ++ ++ for (i = 0; i < num_queues; i++) ++ if (queue_id == queue_ids[i]) ++ return true; ++ return false; ++} + + struct copy_context_work_handler_workarea { + struct work_struct copy_context_work; +- struct device_queue_manager *dqm; +- struct qcm_process_device *qpd; +- struct mm_struct *mm; ++ struct kfd_process *p; + }; + + void copy_context_work_handler (struct work_struct *work) + { + struct copy_context_work_handler_workarea *workarea; + struct mqd_manager *mqd_mgr; +- struct qcm_process_device *qpd; +- struct device_queue_manager *dqm; ++ struct kfd_process_device *pdd; + struct queue *q; ++ struct mm_struct *mm; ++ struct kfd_process *p; + uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; + + workarea = container_of(work, + struct copy_context_work_handler_workarea, + copy_context_work); + +- qpd = workarea->qpd; +- dqm = workarea->dqm; +- use_mm(workarea->mm); ++ p = workarea->p; ++ mm = get_task_mm(p->lead_thread); ++ use_mm(mm); ++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { ++ struct device_queue_manager *dqm = pdd->dev->dqm; ++ struct qcm_process_device *qpd = &pdd->qpd; + ++ dqm_lock(dqm); + +- list_for_each_entry(q, &qpd->queues_list, list) { +- mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; + +- /* We ignore the return value from get_wave_state because +- * i) right now, it always returns 0, and +- * ii) if we hit an error, we would continue to the next queue +- * anyway. +- */ +- mqd_mgr->get_wave_state(mqd_mgr, +- q->mqd, +- (void __user *) q->properties.ctx_save_restore_area_address, +- &tmp_ctl_stack_used_size, +- &tmp_save_area_used_size); +- } +- +- unuse_mm(workarea->mm); +-} ++ list_for_each_entry(q, &qpd->queues_list, list) { ++ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; + ++ /* We ignore the return value from get_wave_state ++ * because ++ * i) right now, it always returns 0, and ++ * ii) if we hit an error, we would continue to the ++ * next queue anyway. ++ */ ++ mqd_mgr->get_wave_state(mqd_mgr, ++ q->mqd, ++ (void __user *) q->properties.ctx_save_restore_area_address, ++ &tmp_ctl_stack_used_size, ++ &tmp_save_area_used_size); ++ } + ++ dqm_unlock(dqm); ++ } ++ unuse_mm(mm); ++ mmput(mm); ++} + +-int suspend_queues(struct device_queue_manager *dqm, +- struct kfd_process *p, +- uint32_t flags) ++int suspend_queues(struct kfd_process *p, ++ uint32_t num_queues, ++ uint32_t grace_period, ++ uint32_t flags, ++ uint32_t *queue_ids) + { + int r = -ENODEV; +- struct kfd_dev *dev; ++ bool any_queues_suspended = false; + struct kfd_process_device *pdd; ++ struct queue *q; + +- bool queues_suspended = false; +- struct copy_context_work_handler_workarea copy_context_worker; ++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { ++ bool queues_suspended_on_device = false; ++ struct device_queue_manager *dqm = pdd->dev->dqm; ++ struct qcm_process_device *qpd = &pdd->qpd; + +- dev = dqm->dev; ++ dqm_lock(dqm); + +- list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +- if (dqm->dev == pdd->dev) { +- r = pdd->dev->dqm->ops.evict_process_queues( +- pdd->dev->dqm, +- &pdd->qpd); ++ /* We need to loop over all of the queues on this ++ * device, and check if it is in the list passed in, ++ * and if it is, we will evict it. ++ */ ++ list_for_each_entry(q, &qpd->queues_list, list) { ++ if (queue_id_in_array(q->properties.queue_id, ++ num_queues, ++ queue_ids)) { ++ if (q->properties.is_suspended) ++ continue; ++ r = suspend_single_queue(dqm, ++ pdd, ++ q); ++ if (r) { ++ pr_err("Failed to suspend process queues. queue_id == %i\n", ++ q->properties.queue_id); ++ dqm_unlock(dqm); ++ return r; ++ } ++ queues_suspended_on_device = true; ++ any_queues_suspended = true; ++ } ++ } ++ ++ if (queues_suspended_on_device) { ++ r = execute_queues_cpsch(dqm, ++ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (r) { +- pr_err("Failed to suspend process queues\n"); +- break; ++ pr_err("Failed to suspend process queues.\n"); ++ dqm_unlock(dqm); ++ return r; + } ++ } + +- copy_context_worker.qpd = &pdd->qpd; +- copy_context_worker.dqm = dqm; +- copy_context_worker.mm = get_task_mm(p->lead_thread); +- queues_suspended = true; ++ dqm_unlock(dqm); ++ amdgpu_amdkfd_debug_mem_fence(dqm->dev->kgd); ++ } + +- INIT_WORK_ONSTACK( +- ©_context_worker.copy_context_work, +- copy_context_work_handler); ++ if (any_queues_suspended) { ++ struct copy_context_work_handler_workarea copy_context_worker; ++ ++ INIT_WORK_ONSTACK( ++ ©_context_worker.copy_context_work, ++ copy_context_work_handler); ++ ++ copy_context_worker.p = p; ++ ++ schedule_work(©_context_worker.copy_context_work); + +- schedule_work(©_context_worker.copy_context_work); +- break; +- } +- } + +- if (queues_suspended) { +- amdgpu_amdkfd_debug_mem_fence(dev->kgd); + flush_work(©_context_worker.copy_context_work); +- mmput(copy_context_worker.mm); + destroy_work_on_stack(©_context_worker.copy_context_work); + } + return r; + } + +-int resume_queues(struct device_queue_manager *dqm, struct kfd_process *p) ++int resume_queues(struct kfd_process *p, ++ uint32_t num_queues, ++ uint32_t flags, ++ uint32_t *queue_ids) + { + int r = -ENODEV; + struct kfd_process_device *pdd; ++ struct queue *q; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +- if (dqm->dev == pdd->dev) { +- r = pdd->dev->dqm->ops.restore_process_queues( +- pdd->dev->dqm, +- &pdd->qpd); +- if (r) ++ bool queues_resumed_on_device = false; ++ struct device_queue_manager *dqm = pdd->dev->dqm; ++ struct qcm_process_device *qpd = &pdd->qpd; ++ ++ dqm_lock(dqm); ++ ++ /* We need to loop over all of the queues on this ++ * device, and check if it is in the list passed in, ++ * and if it is, we will restore it. ++ */ ++ list_for_each_entry(q, &qpd->queues_list, list) { ++ if (queue_id_in_array(q->properties.queue_id, ++ num_queues, ++ queue_ids)) { ++ if (!q->properties.is_suspended) ++ continue; ++ r = resume_single_queue(dqm, ++ &pdd->qpd, ++ q); ++ if (r) { ++ pr_err("Failed to resume process queues\n"); ++ dqm_unlock(dqm); ++ return r; ++ } ++ queues_resumed_on_device = true; ++ } ++ } ++ ++ if (queues_resumed_on_device) { ++ r = execute_queues_cpsch(dqm, ++ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, ++ 0); ++ if (r) { + pr_err("Failed to resume process queues\n"); +- break; ++ dqm_unlock(dqm); ++ return r; ++ } + } +- } + ++ dqm_unlock(dqm); ++ } + return r; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +index 01f8249cb2ed..8eb10f610c12 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +@@ -224,11 +224,15 @@ bool check_if_queues_active(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + int reserve_debug_trap_vmid(struct device_queue_manager *dqm); + int release_debug_trap_vmid(struct device_queue_manager *dqm); +-int suspend_queues(struct device_queue_manager *dqm, +- struct kfd_process *p, +- uint32_t flags); +-int resume_queues(struct device_queue_manager *dqm, struct kfd_process *p); +- ++int suspend_queues(struct kfd_process *p, ++ uint32_t num_queues, ++ uint32_t grace_period, ++ uint32_t flags, ++ uint32_t *queue_ids); ++int resume_queues(struct kfd_process *p, ++ uint32_t num_queues, ++ uint32_t flags, ++ uint32_t *queue_ids); + + static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) + { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index a0311b2ed5d6..7cd1404129f4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -498,6 +498,7 @@ struct queue_properties { + uint32_t doorbell_off; + bool is_interop; + bool is_evicted; ++ bool is_suspended; + bool is_active; + /* Not relevant for user mode queues in cp scheduling */ + unsigned int vmid; +@@ -521,7 +522,8 @@ struct queue_properties { + #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ + (q).queue_address != 0 && \ + (q).queue_percent > 0 && \ +- !(q).is_evicted) ++ !(q).is_evicted && \ ++ !(q).is_suspended) + + /** + * struct queue +-- +2.17.1 + |