aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1516-drm-amdkfd-Add-restore-work-item-function.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1516-drm-amdkfd-Add-restore-work-item-function.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/1516-drm-amdkfd-Add-restore-work-item-function.patch164
1 files changed, 164 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1516-drm-amdkfd-Add-restore-work-item-function.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1516-drm-amdkfd-Add-restore-work-item-function.patch
new file mode 100644
index 00000000..759b0fbc
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1516-drm-amdkfd-Add-restore-work-item-function.patch
@@ -0,0 +1,164 @@
+From 4797dfd92f43f3cc68e5fa1e1d280f86c5d2128f Mon Sep 17 00:00:00 2001
+From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Date: Tue, 5 Jul 2016 17:32:48 -0400
+Subject: [PATCH 1516/4131] drm/amdkfd: Add restore work item function
+
+If any BO from a process is evicted, then a restore work queue is
+scheduled to start after a TIMEOUT. This thread will restore all the BOs
+and the user queues.
+
+Change-Id: I5591e2665bd8e7617243ad1772d0b057c0ce9f5e
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 82 ++++++++++++++++++++++++++++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 12 +++++
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 3 ++
+ 3 files changed, 97 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index a47d7f1..0ce9572 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -745,6 +745,88 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm)
+ return r;
+ }
+
++/* resume_process_mm -
++ * Resume all user queues that belongs to given process p. The caller must
++ * ensure that process p context is valid.
++ */
++static int resume_process_mm(struct kfd_process *p)
++{
++ struct kfd_process_device *pdd;
++ struct mm_struct *mm = (struct mm_struct *)p->mm;
++ int r, ret = 0;
++
++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
++ if (pdd->dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
++ down_read(&mm->mmap_sem);
++
++ r = process_restore_queues(pdd->dev->dqm, &pdd->qpd);
++ if (r != 0) {
++ pr_err("Failed to restore process queues\n");
++ if (ret == 0)
++ ret = r;
++ }
++
++ if (pdd->dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
++ up_read(&mm->mmap_sem);
++ }
++
++ return ret;
++}
++
++/** kfd_schedule_restore_bos_and_queues - Schedules work queue that will
++ * restore all BOs that belong to given process and then restore its queues
++ *
++ * @mm: mm_struct that identifies the KFD process
++ *
++ */
++static int kfd_schedule_restore_bos_and_queues(struct kfd_process *p)
++{
++ if (delayed_work_pending(&p->restore_work)) {
++ WARN(1, "Trying to evict an unrestored process\n");
++ cancel_delayed_work_sync(&p->restore_work);
++ }
++
++ /* During process initialization restore_work is initialized
++ * to kfd_restore_bo_worker
++ */
++ schedule_delayed_work(&p->restore_work, PROCESS_RESTORE_TIME_MS);
++ return 0;
++}
++
++void kfd_restore_bo_worker(struct work_struct *work)
++{
++ struct delayed_work *dwork;
++ struct kfd_process *p;
++ struct kfd_process_device *pdd;
++ int ret = 0;
++
++ dwork = to_delayed_work(work);
++
++ /* Process termination destroys this worker thread. So during the
++ * lifetime of this thread, kfd_process p will be valid
++ */
++ p = container_of(dwork, struct kfd_process, restore_work);
++
++ /* Call restore_process_bos on the first KGD device. This function
++ * takes care of restoring the whole process including other devices.
++ * Restore can fail if enough memory is not available. If so,
++ * reschedule again.
++ */
++ pdd = list_first_entry(&p->per_device_data,
++ struct kfd_process_device,
++ per_device_list);
++
++ ret = pdd->dev->kfd2kgd->restore_process_bos(p->master_vm);
++ if (ret) {
++ kfd_schedule_restore_bos_and_queues(p);
++ return;
++ }
++
++ ret = resume_process_mm(p);
++ if (ret)
++ pr_err("Failed to resume user queues\n");
++}
++
+ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+ unsigned int chunk_size)
+ {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index a672a72..eb9541f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -541,6 +541,15 @@ struct qcm_process_device {
+ void *ib_kaddr;
+ };
+
++/* KFD Memory Eviction */
++/* Appox. wait time before attempting to restore evicted BOs */
++#define PROCESS_RESTORE_TIME_MS 2000
++/* Approx. back off time if restore fails due to lack of memory */
++#define PROCESS_BACK_OFF_TIME_MS 1000
++
++void kfd_restore_bo_worker(struct work_struct *work);
++
++
+ /*8 byte handle containing GPU ID in the most significant 4 bytes and
+ * idr_handle in the least significant 4 bytes*/
+ #define MAKE_HANDLE(gpu_id, idr_handle) (((uint64_t)(gpu_id) << 32) + idr_handle)
+@@ -671,6 +680,9 @@ struct kfd_process {
+ struct rb_root bo_interval_tree;
+
+ void *master_vm;
++
++ /* For restoring BOs after eviction */
++ struct delayed_work restore_work;
+ };
+
+ /**
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 19e3af9..542f0df 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -509,6 +509,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+ p = container_of(mn, struct kfd_process, mmu_notifier);
+ BUG_ON(p->mm != mm);
+
++ cancel_delayed_work_sync(&p->restore_work);
++
+ mutex_lock(&kfd_processes_mutex);
+ hash_del_rcu(&p->kfd_processes);
+ mutex_unlock(&kfd_processes_mutex);
+@@ -677,6 +679,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
+ if (err)
+ goto err_init_cwsr;
+
++ INIT_DELAYED_WORK(&process->restore_work, kfd_restore_bo_worker);
+ return process;
+
+ err_init_cwsr:
+--
+2.7.4
+