From 9d9fd29ad62d5df64d38933b8668b35187ca6d25 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Thu, 15 Jun 2017 14:22:14 -0400 Subject: [PATCH 1719/4131] drm/amdkfd: Reset process queues if it VM_FAULTs Currently, queues are preempt during process termination or if it VM_FAULTs. Instead reset the queues. BUG: SWDEV-110763 Change-Id: Ib088fb8426f22948ec9641c88586c5107a9ba442 Signed-off-by: Harish Kasiviswanathan --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 38 ++++++++++++---------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 6249c49..9ffadc0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -733,7 +733,7 @@ static int quiesce_process_mm(struct kfd_process *p) unsigned int n_evicted = 0; list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - r = process_evict_queues(pdd->dev->dqm, &pdd->qpd); + r = process_evict_queues(pdd->dev->dqm, &pdd->qpd, false); if (r != 0) { pr_err("Failed to evict process queues\n"); goto fail; @@ -805,7 +805,7 @@ int kgd2kfd_quiesce_mm(struct kfd_dev *kfd, struct mm_struct *mm) r = -ENODEV; pdd = kfd_get_process_device_data(kfd, p); if (pdd) - r = process_evict_queues(kfd->dqm, &pdd->qpd); + r = process_evict_queues(kfd->dqm, &pdd->qpd, false); } else { r = quiesce_process_mm(p); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1c1535e..052a059 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int execute_queues_cpsch(struct device_queue_manager *dqm, - bool static_queues_included); + bool static_queues_included, + bool reset); static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param, bool reset); @@ -549,7 +550,8 @@ static struct mqd_manager *get_mqd_manager_nocpsch( } int process_evict_queues(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) + struct qcm_process_device *qpd, + bool reset) { struct queue *q, *next; struct mqd_manager *mqd; @@ -588,7 +590,7 @@ int process_evict_queues(struct device_queue_manager *dqm, dqm->queue_count--; } if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = execute_queues_cpsch(dqm, qpd->is_debug); + retval = execute_queues_cpsch(dqm, qpd->is_debug, reset); out: mutex_unlock(&dqm->lock); @@ -658,7 +660,7 @@ int process_restore_queues(struct device_queue_manager *dqm, } } if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = execute_queues_cpsch(dqm, false); + retval = execute_queues_cpsch(dqm, false, false); if (retval == 0) qpd->evicted = 0; @@ -974,7 +976,7 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); mutex_lock(&dqm->lock); - execute_queues_cpsch(dqm, false); + execute_queues_cpsch(dqm, false, false); mutex_unlock(&dqm->lock); return 0; @@ -1022,7 +1024,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, list_add(&kq->list, &qpd->priv_queue_list); dqm->queue_count++; qpd->is_debug = true; - execute_queues_cpsch(dqm, false); + execute_queues_cpsch(dqm, false, false); mutex_unlock(&dqm->lock); return 0; @@ -1037,7 +1039,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; - execute_queues_cpsch(dqm, true); + execute_queues_cpsch(dqm, true, false); /* * Unconditionally decrement this counter, regardless of the queue's * type. @@ -1110,7 +1112,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(&q->list, &qpd->queues_list); if (q->properties.is_active) { dqm->queue_count++; - retval = execute_queues_cpsch(dqm, false); + retval = execute_queues_cpsch(dqm, false, false); } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -1158,10 +1160,11 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, } static int unmap_sdma_queues(struct device_queue_manager *dqm, - unsigned int sdma_engine) + unsigned int sdma_engine, + bool reset) { return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, reset, sdma_engine); } @@ -1206,8 +1209,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, dqm->sdma_queue_count); if (dqm->sdma_queue_count > 0) { - unmap_sdma_queues(dqm, 0); - unmap_sdma_queues(dqm, 1); + unmap_sdma_queues(dqm, 0, reset); + unmap_sdma_queues(dqm, 1, reset); } retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, @@ -1234,7 +1237,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* dqm->lock mutex has to be locked before calling this function */ static int execute_queues_cpsch(struct device_queue_manager *dqm, - bool static_queues_included) + bool static_queues_included, + bool reset) { int retval; enum kfd_unmap_queues_filter filter; @@ -1243,7 +1247,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; - retval = unmap_queues_cpsch(dqm, filter, 0, false); + retval = unmap_queues_cpsch(dqm, filter, 0, reset); if (retval != 0) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); return retval; @@ -1297,7 +1301,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (q->properties.is_active) dqm->queue_count--; - retval = execute_queues_cpsch(dqm, false); + retval = execute_queues_cpsch(dqm, false, false); mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -1521,7 +1525,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } } - retval = execute_queues_cpsch(dqm, true); + retval = execute_queues_cpsch(dqm, true, true); /* lastly, free mqd resources */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { @@ -1654,7 +1658,7 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, return -EINVAL; pdd = kfd_get_process_device_data(dqm->dev, p); if (pdd) - ret = process_evict_queues(dqm, &pdd->qpd); + ret = process_evict_queues(dqm, &pdd->qpd, true); kfd_unref_process(p); return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 841283a..a492307 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -216,7 +216,8 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); int process_evict_queues(struct device_queue_manager *dqm, - struct qcm_process_device *qpd); + struct qcm_process_device *qpd, + bool reset); int process_restore_queues(struct device_queue_manager *dqm, struct qcm_process_device *qpd); -- 2.7.4