diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4156-drm-amdkfd-GPU-recovery-support-from-KFD-step-1.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/4156-drm-amdkfd-GPU-recovery-support-from-KFD-step-1.patch | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4156-drm-amdkfd-GPU-recovery-support-from-KFD-step-1.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4156-drm-amdkfd-GPU-recovery-support-from-KFD-step-1.patch new file mode 100644 index 00000000..e08063f5 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4156-drm-amdkfd-GPU-recovery-support-from-KFD-step-1.patch @@ -0,0 +1,150 @@ +From 60aacf851af2af7e39ff8782112d59093dbbc11b Mon Sep 17 00:00:00 2001 +From: Shaoyun Liu <Shaoyun.Liu@amd.com> +Date: Wed, 28 Feb 2018 11:46:32 -0500 +Subject: [PATCH 4156/5725] drm/amdkfd: GPU recovery support from KFD (step 1) + +Lock KFD and evict existing queues on reset + +Change-Id: I0f0526b5beac68bd7a96ead58b95a57d4f7f8b13 +Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com> +Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 ++++ + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 43 +++++++++++++++++++++++++++++--- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 5 ++++ + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +++ + 4 files changed, 54 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 73aec76..fd62468 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -136,6 +136,11 @@ static int kfd_open(struct inode *inode, struct file *filep) + if (IS_ERR(process)) + return PTR_ERR(process); + ++ if (kfd_is_locked()) { ++ kfd_unref_process(process); ++ return -EAGAIN; ++ } ++ + dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", + process->pasid, process->is_32bit_user_mode); + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index a9ad2a8..768373f 100755 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -32,7 +32,13 @@ + #include "kfd_iommu.h" + + #define MQD_SIZE_ALIGNED 768 +-static atomic_t kfd_device_suspended = ATOMIC_INIT(0); ++ ++/* ++ * kfd_locked is used to lock the kfd driver during suspend or reset ++ * once locked, kfd driver will stop any further GPU execution. ++ * create process (open) will return -EAGAIN. ++ */ ++static atomic_t kfd_locked = ATOMIC_INIT(0); + + #ifdef KFD_SUPPORT_IOMMU_V2 + static const struct kfd_device_info kaveri_device_info = { +@@ -549,21 +555,52 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) + + int kgd2kfd_pre_reset(struct kfd_dev *kfd) + { ++ if (!kfd->init_complete) ++ return 0; ++ kgd2kfd_suspend(kfd); ++ ++ /* hold dqm->lock to prevent further execution*/ ++ mutex_lock(&kfd->dqm->lock); ++ ++ kfd_signal_reset_event(kfd); + return 0; + } + ++/* ++ * Fix me. KFD won't be able to resume existing process for now. ++ * We will keep all existing process in a evicted state and ++ * wait the process to be terminated. ++ */ ++ + int kgd2kfd_post_reset(struct kfd_dev *kfd) + { ++ int ret, count; ++ ++ if (!kfd->init_complete) ++ return 0; ++ ++ mutex_unlock(&kfd->dqm->lock); ++ ++ ret = kfd_resume(kfd); ++ if (ret) ++ return ret; ++ count = atomic_dec_return(&kfd_locked); ++ WARN_ONCE(count != 0, "KFD reset ref. error"); + return 0; + } + ++bool kfd_is_locked(void) ++{ ++ return (atomic_read(&kfd_locked) > 0); ++} ++ + void kgd2kfd_suspend(struct kfd_dev *kfd) + { + if (!kfd->init_complete) + return; + + /* For first KFD device suspend all the KFD processes */ +- if (atomic_inc_return(&kfd_device_suspended) == 1) ++ if (atomic_inc_return(&kfd_locked) == 1) + kfd_suspend_all_processes(); + + kfd->dqm->ops.stop(kfd->dqm); +@@ -582,7 +619,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) + if (ret) + return ret; + +- count = atomic_dec_return(&kfd_device_suspended); ++ count = atomic_dec_return(&kfd_locked); + WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); + if (count == 0) + ret = kfd_resume_all_processes(); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index 644ce9d..09c1c31 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -1009,3 +1009,8 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + mutex_unlock(&p->event_mutex); + kfd_unref_process(p); + } ++ ++void kfd_signal_reset_event(struct kfd_dev *dev) ++{ ++ /*todo*/ ++} +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 0c2fa89..0a019a6 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -1044,10 +1044,14 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); + void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + struct kfd_vm_fault_info *info); + ++void kfd_signal_reset_event(struct kfd_dev *dev); ++ + void kfd_flush_tlb(struct kfd_process_device *pdd); + + int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); + ++bool kfd_is_locked(void); ++ + #define KFD_SCRATCH_KV_FW_VER 413 + + /* PeerDirect support */ +-- +2.7.4 + |