diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1344-drm-amdkfd-Remove-indiscriminate-resetting-of-queues.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1344-drm-amdkfd-Remove-indiscriminate-resetting-of-queues.patch | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1344-drm-amdkfd-Remove-indiscriminate-resetting-of-queues.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1344-drm-amdkfd-Remove-indiscriminate-resetting-of-queues.patch new file mode 100644 index 00000000..a3d6474b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1344-drm-amdkfd-Remove-indiscriminate-resetting-of-queues.patch @@ -0,0 +1,336 @@ +From e44332781979018efc12a102ff6943a5c1d340c1 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Sun, 24 Sep 2017 00:54:33 -0400 +Subject: [PATCH 1344/4131] drm/amdkfd: Remove indiscriminate resetting of + queues + +Resetting queues affects all processes. We can't allow any action +triggered by a user mode process to affect other processes. Therefore +process termination and VM faults cannot be allowed to reset queues +indiscriminately for all processes. + +Change-Id: I41f0a7426ac0825041548e0718cb236be417d75d +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 11 +--- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 65 +++++++++------------- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +- + 6 files changed, 33 insertions(+), 55 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +index 5f122a1..00536a1 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c ++++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +@@ -95,17 +95,10 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { + struct kfd_vm_fault_info info; + ++ kfd_process_vm_fault(dev->dqm, ihre->pasid); ++ + memset(&info, 0, sizeof(info)); + dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info); +- /* When CWSR is disabled, we choose to reset the device, which +- * will reset the queues from other processes on this device. +- * This is a bug that we accept given by-pasid reset does not +- * work well. +- */ +- if (dev->cwsr_enabled) +- kfd_process_vm_fault(dev->dqm, ihre->pasid, false); +- else +- kfd_process_vm_fault(dev->dqm, ihre->pasid, true); + if (!info.page_addr && !info.status) + return; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index acee0aa..af2424e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -800,7 +800,7 @@ static int quiesce_process_mm(struct kfd_process *p) + unsigned int n_evicted = 0; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { +- r = process_evict_queues(pdd->dev->dqm, &pdd->qpd, false); ++ r = process_evict_queues(pdd->dev->dqm, &pdd->qpd); + if (r != 0) { + pr_err("Failed to evict process queues\n"); + goto fail; +@@ -872,7 +872,7 @@ int kgd2kfd_quiesce_mm(struct kfd_dev *kfd, struct mm_struct *mm) + r = -ENODEV; + pdd = kfd_get_process_device_data(kfd, p); + if (pdd) +- r = process_evict_queues(kfd->dqm, &pdd->qpd, false); ++ r = process_evict_queues(kfd->dqm, &pdd->qpd); + } else { + r = quiesce_process_mm(p); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 3a09cbc..2d8c238 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -45,11 +45,10 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + + static int execute_queues_cpsch(struct device_queue_manager *dqm, +- bool static_queues_included, +- bool reset); ++ bool static_queues_included); + static int unmap_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, +- uint32_t filter_param, bool reset); ++ uint32_t filter_param); + + static int map_queues_cpsch(struct device_queue_manager *dqm); + +@@ -503,8 +502,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + /* HWS mode, unmap first to own mqd */ + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { + retval = unmap_queues_cpsch(dqm, +- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, +- false); ++ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval) { + pr_err("unmap queue failed"); + goto out_unlock; +@@ -567,8 +565,7 @@ static struct mqd_manager *get_mqd_manager_nocpsch( + } + + int process_evict_queues(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd, +- bool reset) ++ struct qcm_process_device *qpd) + { + struct queue *q, *next; + struct mqd_manager *mqd; +@@ -607,7 +604,7 @@ int process_evict_queues(struct device_queue_manager *dqm, + dqm->queue_count--; + } + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) +- retval = execute_queues_cpsch(dqm, qpd->is_debug, reset); ++ retval = execute_queues_cpsch(dqm, qpd->is_debug); + + out: + mutex_unlock(&dqm->lock); +@@ -677,7 +674,7 @@ int process_restore_queues(struct device_queue_manager *dqm, + } + } + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) +- retval = execute_queues_cpsch(dqm, false, false); ++ retval = execute_queues_cpsch(dqm, false); + + if (retval == 0) + qpd->evicted = 0; +@@ -998,7 +995,7 @@ static int start_cpsch(struct device_queue_manager *dqm) + init_interrupts(dqm); + + mutex_lock(&dqm->lock); +- execute_queues_cpsch(dqm, false, false); ++ execute_queues_cpsch(dqm, false); + mutex_unlock(&dqm->lock); + + return 0; +@@ -1013,7 +1010,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) + { + mutex_lock(&dqm->lock); + +- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); ++ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + + mutex_unlock(&dqm->lock); + +@@ -1046,7 +1043,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, + list_add(&kq->list, &qpd->priv_queue_list); + dqm->queue_count++; + qpd->is_debug = true; +- execute_queues_cpsch(dqm, false, false); ++ execute_queues_cpsch(dqm, false); + mutex_unlock(&dqm->lock); + + return 0; +@@ -1061,7 +1058,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, + list_del(&kq->list); + dqm->queue_count--; + qpd->is_debug = false; +- execute_queues_cpsch(dqm, true, false); ++ execute_queues_cpsch(dqm, true); + /* + * Unconditionally decrement this counter, regardless of the queue's + * type. +@@ -1135,7 +1132,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + qpd->queue_count++; + if (q->properties.is_active) { + dqm->queue_count++; +- retval = execute_queues_cpsch(dqm, false, false); ++ retval = execute_queues_cpsch(dqm, false); + } + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) +@@ -1183,11 +1180,10 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + } + + static int unmap_sdma_queues(struct device_queue_manager *dqm, +- unsigned int sdma_engine, +- bool reset) ++ unsigned int sdma_engine) + { + return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, +- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, reset, ++ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, + sdma_engine); + } + +@@ -1208,7 +1204,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) + + retval = pm_send_runlist(&dqm->packets, &dqm->queues); + if (retval) { +- pr_err("failed to execute runlist"); ++ pr_err("failed to execute runlist\n"); + return retval; + } + dqm->active_runlist = true; +@@ -1219,7 +1215,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) + /* dqm->lock mutex has to be locked before calling this function */ + static int unmap_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, +- uint32_t filter_param, bool reset) ++ uint32_t filter_param) + { + int retval; + +@@ -1232,12 +1228,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, + dqm->sdma_queue_count); + + if (dqm->sdma_queue_count > 0) { +- unmap_sdma_queues(dqm, 0, reset); +- unmap_sdma_queues(dqm, 1, reset); ++ unmap_sdma_queues(dqm, 0); ++ unmap_sdma_queues(dqm, 1); + } + + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, +- filter, filter_param, reset, 0); ++ filter, filter_param, false, 0); + if (retval) + return retval; + +@@ -1248,7 +1244,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, + retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, + QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); + if (retval) { +- pr_err("%s queues failed.", reset ? "Resetting" : "Unmapping"); ++ pr_err("Unmapping queues failed.\n"); + return retval; + } + +@@ -1260,8 +1256,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, + + /* dqm->lock mutex has to be locked before calling this function */ + static int execute_queues_cpsch(struct device_queue_manager *dqm, +- bool static_queues_included, +- bool reset) ++ bool static_queues_included) + { + int retval; + enum kfd_unmap_queues_filter filter; +@@ -1270,9 +1265,9 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + +- retval = unmap_queues_cpsch(dqm, filter, 0, reset); ++ retval = unmap_queues_cpsch(dqm, filter, 0); + if (retval) { +- pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); ++ pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + return retval; + } + +@@ -1325,7 +1320,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + if (q->properties.is_active) + dqm->queue_count--; + +- retval = execute_queues_cpsch(dqm, false, false); ++ retval = execute_queues_cpsch(dqm, false); + if (retval == -ETIME) + qpd->reset_wavefronts = true; + +@@ -1552,15 +1547,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + } + } + +- /* When CWSR is disabled, we choose to reset the device, which will +- * reset the queues from other processes on this device. This is +- * a bug that we accept given by-pasid reset does not work well. +- */ +- if (dqm->dev->cwsr_enabled) +- retval = execute_queues_cpsch(dqm, true, false); +- else +- retval = execute_queues_cpsch(dqm, true, true); +- ++ retval = execute_queues_cpsch(dqm, true); + if (retval || qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); + dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); +@@ -1692,7 +1679,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) + } + + int kfd_process_vm_fault(struct device_queue_manager *dqm, +- unsigned int pasid, bool reset) ++ unsigned int pasid) + { + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); +@@ -1702,7 +1689,7 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, + return -EINVAL; + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) +- ret = process_evict_queues(dqm, &pdd->qpd, reset); ++ ret = process_evict_queues(dqm, &pdd->qpd); + kfd_unref_process(p); + + return ret; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +index a492307..841283a 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +@@ -216,8 +216,7 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); + unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); + + int process_evict_queues(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd, +- bool reset); ++ struct qcm_process_device *qpd); + int process_restore_queues(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +index ccfc89a..b2c6b52 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +@@ -122,7 +122,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, + info.prot_read = ring_id & 0x10; + info.prot_write = ring_id & 0x20; + +- kfd_process_vm_fault(dev->dqm, pasid, false); ++ kfd_process_vm_fault(dev->dqm, pasid); + kfd_signal_vm_fault_event(dev, pasid, &info); + } + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index c853956..43a8838 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -892,8 +892,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm); + struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + enum kfd_queue_type type); + void kernel_queue_uninit(struct kernel_queue *kq); +-int kfd_process_vm_fault(struct device_queue_manager *dqm, +- unsigned int pasid, bool reset); ++int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid); + + /* Process Queue Manager */ + struct process_queue_node { +-- +2.7.4 + |