From e2a8565998cbdc24d717bed98cb69beaa9aad4f0 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 19 Apr 2017 13:11:30 -0500 Subject: [PATCH 1660/4131] drm/amdkfd: Fix MQD modification race during eviction The eviction path invokes update_mqd before preempting the scheduler which may be using the MQD concurrently. This leads to a race in which the scheduler sees partial values of MQD registers when mapping them to the HQD. Do not modify the MQD during eviction (or restore). Instead just mark q->is_active directly. Fixes CP hangs under stress testing. Change-Id: I6a2395cf8ec307048894e32d3a14851f136ed510 Signed-off-by: Jay Cornwall --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index dcdc380..24ef621 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -629,10 +629,11 @@ int process_evict_queues(struct device_queue_manager *dqm, continue; } /* if the queue is not active anyway, it is not evicted */ - if (q->properties.is_active == true) + if (q->properties.is_active == true) { q->properties.is_evicted = true; + q->properties.is_active = false; + } - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); if (is_queue_nocpsch(dqm, q) && q->properties.is_evicted) retval = mqd->destroy_mqd(mqd, q->mqd, @@ -700,7 +701,8 @@ int process_restore_queues(struct device_queue_manager *dqm, } if (q->properties.is_evicted) { q->properties.is_evicted = false; - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + q->properties.is_active = true; + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA)) -- 2.7.4