diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1298-drm-amdkfd-Reset-process-queues-if-it-VM_FAULTs.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1298-drm-amdkfd-Reset-process-queues-if-it-VM_FAULTs.patch | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1298-drm-amdkfd-Reset-process-queues-if-it-VM_FAULTs.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1298-drm-amdkfd-Reset-process-queues-if-it-VM_FAULTs.patch new file mode 100644 index 00000000..efccde19 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1298-drm-amdkfd-Reset-process-queues-if-it-VM_FAULTs.patch @@ -0,0 +1,206 @@ +From a93c77f2b3512cbe67090fdce034827adff9ef76 Mon Sep 17 00:00:00 2001 +From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +Date: Thu, 15 Jun 2017 14:22:14 -0400 +Subject: [PATCH 1298/4131] drm/amdkfd: Reset process queues if it VM_FAULTs + +Currently, queues are preempt during process termination or if it +VM_FAULTs. Instead reset the queues. + +BUG: SWDEV-110763 + +Change-Id: Ib088fb8426f22948ec9641c88586c5107a9ba442 +Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +-- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 38 ++++++++++++---------- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +- + 3 files changed, 25 insertions(+), 20 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 6249c49..9ffadc0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -733,7 +733,7 @@ static int quiesce_process_mm(struct kfd_process *p) + unsigned int n_evicted = 0; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +- r = process_evict_queues(pdd->dev->dqm, &pdd->qpd); ++ r = process_evict_queues(pdd->dev->dqm, &pdd->qpd, false); + if (r != 0) { + pr_err("Failed to evict process queues\n"); + goto fail; +@@ -805,7 +805,7 @@ int kgd2kfd_quiesce_mm(struct kfd_dev *kfd, struct mm_struct *mm) + r = -ENODEV; + pdd = kfd_get_process_device_data(kfd, p); + if (pdd) +- r = process_evict_queues(kfd->dqm, &pdd->qpd); ++ r = process_evict_queues(kfd->dqm, &pdd->qpd, false); + } else { + r = quiesce_process_mm(p); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 1c1535e..052a059 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + + static int execute_queues_cpsch(struct device_queue_manager *dqm, +- bool static_queues_included); ++ bool static_queues_included, ++ bool reset); + static int unmap_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset); +@@ -549,7 +550,8 @@ static struct mqd_manager *get_mqd_manager_nocpsch( + } + + int process_evict_queues(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd) ++ struct qcm_process_device *qpd, ++ bool reset) + { + struct queue *q, *next; + struct mqd_manager *mqd; +@@ -588,7 +590,7 @@ int process_evict_queues(struct device_queue_manager *dqm, + dqm->queue_count--; + } + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) +- retval = execute_queues_cpsch(dqm, qpd->is_debug); ++ retval = execute_queues_cpsch(dqm, qpd->is_debug, reset); + + out: + mutex_unlock(&dqm->lock); +@@ -658,7 +660,7 @@ int process_restore_queues(struct device_queue_manager *dqm, + } + } + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) +- retval = execute_queues_cpsch(dqm, false); ++ retval = execute_queues_cpsch(dqm, false, false); + + if (retval == 0) + qpd->evicted = 0; +@@ -974,7 +976,7 @@ static int start_cpsch(struct device_queue_manager *dqm) + init_interrupts(dqm); + + mutex_lock(&dqm->lock); +- execute_queues_cpsch(dqm, false); ++ execute_queues_cpsch(dqm, false, false); + mutex_unlock(&dqm->lock); + + return 0; +@@ -1022,7 +1024,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, + list_add(&kq->list, &qpd->priv_queue_list); + dqm->queue_count++; + qpd->is_debug = true; +- execute_queues_cpsch(dqm, false); ++ execute_queues_cpsch(dqm, false, false); + mutex_unlock(&dqm->lock); + + return 0; +@@ -1037,7 +1039,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, + list_del(&kq->list); + dqm->queue_count--; + qpd->is_debug = false; +- execute_queues_cpsch(dqm, true); ++ execute_queues_cpsch(dqm, true, false); + /* + * Unconditionally decrement this counter, regardless of the queue's + * type. +@@ -1110,7 +1112,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + list_add(&q->list, &qpd->queues_list); + if (q->properties.is_active) { + dqm->queue_count++; +- retval = execute_queues_cpsch(dqm, false); ++ retval = execute_queues_cpsch(dqm, false, false); + } + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) +@@ -1158,10 +1160,11 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + } + + static int unmap_sdma_queues(struct device_queue_manager *dqm, +- unsigned int sdma_engine) ++ unsigned int sdma_engine, ++ bool reset) + { + return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, +- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, ++ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, reset, + sdma_engine); + } + +@@ -1206,8 +1209,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, + dqm->sdma_queue_count); + + if (dqm->sdma_queue_count > 0) { +- unmap_sdma_queues(dqm, 0); +- unmap_sdma_queues(dqm, 1); ++ unmap_sdma_queues(dqm, 0, reset); ++ unmap_sdma_queues(dqm, 1, reset); + } + + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, +@@ -1234,7 +1237,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, + + /* dqm->lock mutex has to be locked before calling this function */ + static int execute_queues_cpsch(struct device_queue_manager *dqm, +- bool static_queues_included) ++ bool static_queues_included, ++ bool reset) + { + int retval; + enum kfd_unmap_queues_filter filter; +@@ -1243,7 +1247,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + +- retval = unmap_queues_cpsch(dqm, filter, 0, false); ++ retval = unmap_queues_cpsch(dqm, filter, 0, reset); + if (retval != 0) { + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); + return retval; +@@ -1297,7 +1301,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + if (q->properties.is_active) + dqm->queue_count--; + +- retval = execute_queues_cpsch(dqm, false); ++ retval = execute_queues_cpsch(dqm, false, false); + + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + +@@ -1521,7 +1525,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + } + } + +- retval = execute_queues_cpsch(dqm, true); ++ retval = execute_queues_cpsch(dqm, true, true); + + /* lastly, free mqd resources */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { +@@ -1654,7 +1658,7 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, + return -EINVAL; + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) +- ret = process_evict_queues(dqm, &pdd->qpd); ++ ret = process_evict_queues(dqm, &pdd->qpd, true); + kfd_unref_process(p); + + return ret; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +index 841283a..a492307 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +@@ -216,7 +216,8 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); + unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); + + int process_evict_queues(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd); ++ struct qcm_process_device *qpd, ++ bool reset); + int process_restore_queues(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + +-- +2.7.4 + |