aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch262
1 files changed, 262 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch
new file mode 100644
index 00000000..a48c5d45
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch
@@ -0,0 +1,262 @@
+From e79bd47c809916d7671512b2bcfbaefe00557375 Mon Sep 17 00:00:00 2001
+From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Date: Fri, 2 Sep 2016 15:01:41 -0400
+Subject: [PATCH 1517/4131] drm/amdkfd: Add kgd2kfd
+ schedule_evict_and_restore_process
+
+Change-Id: I27860af58c54449a9ba1fc0a04e0436edb7fae8b
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+
+ Conflicts:
+ drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 116 ++++++++++++++++++++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 +
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 11 ++-
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +
+ drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 6 ++
+ 5 files changed, 136 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 0ce9572..6acc5fc 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -26,6 +26,7 @@
+ #include <linux/slab.h>
+ #include <linux/highmem.h>
+ #include <linux/debugfs.h>
++#include <linux/fence.h>
+ #include "kfd_priv.h"
+ #include "kfd_device_queue_manager.h"
+ #include "kfd_pm4_headers.h"
+@@ -745,6 +746,42 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm)
+ return r;
+ }
+
++/* quiesce_process_mm -
++ * Quiesce all user queues that belongs to given process p
++ */
++static int quiesce_process_mm(struct kfd_process *p)
++{
++ struct kfd_process_device *pdd;
++ int r = 0;
++ unsigned int n_evicted = 0;
++
++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
++ r = process_evict_queues(pdd->dev->dqm, &pdd->qpd);
++ if (r != 0) {
++ pr_err("Failed to evict process queues\n");
++ goto fail;
++ }
++ n_evicted++;
++ }
++
++ return r;
++
++fail:
++ /* To keep state consistent, roll back partial eviction by
++ * restoring queues
++ */
++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
++ if (n_evicted == 0)
++ break;
++ if (process_restore_queues(pdd->dev->dqm, &pdd->qpd))
++ pr_err("Failed to restore queues\n");
++
++ n_evicted--;
++ }
++
++ return r;
++}
++
+ /* resume_process_mm -
+ * Resume all user queues that belongs to given process p. The caller must
+ * ensure that process p context is valid.
+@@ -827,6 +864,85 @@ void kfd_restore_bo_worker(struct work_struct *work)
+ pr_err("Failed to resume user queues\n");
+ }
+
++/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
++ * prepare for safe eviction of KFD BOs that belong to the specified
++ * process.
++ *
++ * @mm: mm_struct that identifies the specified KFD process
++ * @fence: eviction fence attached to KFD process BOs
++ *
++ */
++int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
++ struct fence *fence)
++{
++ struct kfd_process *p;
++
++ if (!fence)
++ return -EINVAL;
++
++ if (fence_is_signaled(fence))
++ return 0;
++
++ p = kfd_lookup_process_by_mm(mm);
++ if (!p)
++ return -ENODEV;
++
++ if (work_pending(&p->eviction_work.work)) {
++ /* It is possible has TTM has lined up couple of BOs of the same
++ * process to be evicted. Check if the fence is same which
++ * indicates that previous work item scheduled is not complted
++ */
++ if (p->eviction_work.eviction_fence == fence)
++ goto out;
++ else {
++ WARN(1, "Starting new evict with previous evict is not completed\n");
++ cancel_work_sync(&p->eviction_work.work);
++ }
++ }
++
++ /* During process initialization eviction_work.work is initialized
++ * to kfd_evict_bo_worker
++ */
++ p->eviction_work.eviction_fence = fence_get(fence);
++ schedule_work(&p->eviction_work.work);
++out:
++ kfd_unref_process(p);
++ return 0;
++}
++
++void kfd_evict_bo_worker(struct work_struct *work)
++{
++ int ret;
++ struct kfd_process *p;
++ struct kfd_eviction_work *eviction_work;
++
++ eviction_work = container_of(work, struct kfd_eviction_work,
++ work);
++
++ /* Process termination destroys this worker thread. So during the
++ * lifetime of this thread, kfd_process p will be valid
++ */
++ p = container_of(eviction_work, struct kfd_process, eviction_work);
++
++ /* Narrow window of overlap between restore and evict work item is
++ * possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos unreserves
++ * KFD BOs, it is possible to evicted again. But restore has few more
++ * steps of finish. So lets wait for the restore work to complete
++ */
++ if (delayed_work_pending(&p->restore_work))
++ flush_delayed_work(&p->restore_work);
++
++ ret = quiesce_process_mm(p);
++ if (!ret) {
++ fence_signal(eviction_work->eviction_fence);
++ fence_put(eviction_work->eviction_fence);
++ kfd_schedule_restore_bos_and_queues(p);
++ } else {
++ pr_err("Failed to quiesce user queues. Cannot evict BOs\n");
++ }
++
++}
++
+ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+ unsigned int chunk_size)
+ {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+index 914a0cd..42c559b 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+@@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
+ .restore = kgd2kfd_restore,
+ .quiesce_mm = kgd2kfd_quiesce_mm,
+ .resume_mm = kgd2kfd_resume_mm,
++ .schedule_evict_and_restore_process =
++ kgd2kfd_schedule_evict_and_restore_process,
+ };
+
+ int sched_policy = KFD_SCHED_POLICY_HWS;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index eb9541f..182d065 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -542,12 +542,20 @@ struct qcm_process_device {
+ };
+
+ /* KFD Memory Eviction */
++struct kfd_eviction_work {
++ struct work_struct work;
++ struct fence *eviction_fence;
++};
++
+ /* Appox. wait time before attempting to restore evicted BOs */
+ #define PROCESS_RESTORE_TIME_MS 2000
+ /* Approx. back off time if restore fails due to lack of memory */
+ #define PROCESS_BACK_OFF_TIME_MS 1000
+
++void kfd_evict_bo_worker(struct work_struct *work);
+ void kfd_restore_bo_worker(struct work_struct *work);
++int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
++ struct fence *fence);
+
+
+ /*8 byte handle containing GPU ID in the most significant 4 bytes and
+@@ -681,7 +689,8 @@ struct kfd_process {
+
+ void *master_vm;
+
+- /* For restoring BOs after eviction */
++ /* Work items for evicting and restoring BOs */
++ struct kfd_eviction_work eviction_work;
+ struct delayed_work restore_work;
+ };
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 542f0df..54ed2a1 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -509,6 +509,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+ p = container_of(mn, struct kfd_process, mmu_notifier);
+ BUG_ON(p->mm != mm);
+
++ cancel_work_sync(&p->eviction_work.work);
+ cancel_delayed_work_sync(&p->restore_work);
+
+ mutex_lock(&kfd_processes_mutex);
+@@ -679,6 +680,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
+ if (err)
+ goto err_init_cwsr;
+
++ INIT_WORK(&process->eviction_work.work, kfd_evict_bo_worker);
+ INIT_DELAYED_WORK(&process->restore_work, kfd_restore_bo_worker);
+ return process;
+
+diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+index c38e707..d344496 100644
+--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+@@ -30,6 +30,7 @@
+
+ #include <linux/types.h>
+ #include <linux/bitmap.h>
++#include <linux/fence.h>
+
+ struct pci_dev;
+
+@@ -388,6 +389,9 @@ struct kfd2kgd_calls {
+ *
+ * @resume_mm: Resume user queue access to specified MM address space
+ *
++ * @schedule_evict_and_restore_process: Schedules work queue that will prepare
++ * for safe eviction of KFD BOs that belong to the specified process.
++ *
+ * This structure contains function callback pointers so the kgd driver
+ * will notify to the amdkfd about certain status changes.
+ *
+@@ -406,6 +410,8 @@ struct kgd2kfd_calls {
+ int (*restore)(struct kfd_dev *kfd);
+ int (*quiesce_mm)(struct kfd_dev *kfd, struct mm_struct *mm);
+ int (*resume_mm)(struct kfd_dev *kfd, struct mm_struct *mm);
++ int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
++ struct fence *fence);
+ };
+
+ int kgd2kfd_init(unsigned interface_version,
+--
+2.7.4
+