diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch new file mode 100644 index 00000000..a48c5d45 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1517-drm-amdkfd-Add-kgd2kfd-schedule_evict_and_restore_pr.patch @@ -0,0 +1,262 @@ +From e79bd47c809916d7671512b2bcfbaefe00557375 Mon Sep 17 00:00:00 2001 +From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +Date: Fri, 2 Sep 2016 15:01:41 -0400 +Subject: [PATCH 1517/4131] drm/amdkfd: Add kgd2kfd + schedule_evict_and_restore_process + +Change-Id: I27860af58c54449a9ba1fc0a04e0436edb7fae8b +Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> + + Conflicts: + drivers/gpu/drm/amd/include/kgd_kfd_interface.h +--- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 116 ++++++++++++++++++++++++ + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 + + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 11 ++- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 + + drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 6 ++ + 5 files changed, 136 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 0ce9572..6acc5fc 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -26,6 +26,7 @@ + #include <linux/slab.h> + #include <linux/highmem.h> + #include <linux/debugfs.h> ++#include <linux/fence.h> + #include "kfd_priv.h" + #include "kfd_device_queue_manager.h" + #include "kfd_pm4_headers.h" +@@ -745,6 +746,42 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm) + return r; + } + ++/* quiesce_process_mm - ++ * Quiesce all user queues that belongs to given process p ++ */ ++static int quiesce_process_mm(struct kfd_process *p) ++{ ++ struct kfd_process_device *pdd; ++ int r = 0; ++ unsigned int n_evicted = 0; ++ ++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { ++ r = process_evict_queues(pdd->dev->dqm, &pdd->qpd); ++ if (r != 0) { ++ pr_err("Failed to evict process queues\n"); ++ goto fail; ++ } ++ n_evicted++; ++ } ++ ++ return r; ++ ++fail: ++ /* To keep state consistent, roll back partial eviction by ++ * restoring queues ++ */ ++ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { ++ if (n_evicted == 0) ++ break; ++ if (process_restore_queues(pdd->dev->dqm, &pdd->qpd)) ++ pr_err("Failed to restore queues\n"); ++ ++ n_evicted--; ++ } ++ ++ return r; ++} ++ + /* resume_process_mm - + * Resume all user queues that belongs to given process p. The caller must + * ensure that process p context is valid. +@@ -827,6 +864,85 @@ void kfd_restore_bo_worker(struct work_struct *work) + pr_err("Failed to resume user queues\n"); + } + ++/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will ++ * prepare for safe eviction of KFD BOs that belong to the specified ++ * process. ++ * ++ * @mm: mm_struct that identifies the specified KFD process ++ * @fence: eviction fence attached to KFD process BOs ++ * ++ */ ++int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, ++ struct fence *fence) ++{ ++ struct kfd_process *p; ++ ++ if (!fence) ++ return -EINVAL; ++ ++ if (fence_is_signaled(fence)) ++ return 0; ++ ++ p = kfd_lookup_process_by_mm(mm); ++ if (!p) ++ return -ENODEV; ++ ++ if (work_pending(&p->eviction_work.work)) { ++ /* It is possible has TTM has lined up couple of BOs of the same ++ * process to be evicted. Check if the fence is same which ++ * indicates that previous work item scheduled is not complted ++ */ ++ if (p->eviction_work.eviction_fence == fence) ++ goto out; ++ else { ++ WARN(1, "Starting new evict with previous evict is not completed\n"); ++ cancel_work_sync(&p->eviction_work.work); ++ } ++ } ++ ++ /* During process initialization eviction_work.work is initialized ++ * to kfd_evict_bo_worker ++ */ ++ p->eviction_work.eviction_fence = fence_get(fence); ++ schedule_work(&p->eviction_work.work); ++out: ++ kfd_unref_process(p); ++ return 0; ++} ++ ++void kfd_evict_bo_worker(struct work_struct *work) ++{ ++ int ret; ++ struct kfd_process *p; ++ struct kfd_eviction_work *eviction_work; ++ ++ eviction_work = container_of(work, struct kfd_eviction_work, ++ work); ++ ++ /* Process termination destroys this worker thread. So during the ++ * lifetime of this thread, kfd_process p will be valid ++ */ ++ p = container_of(eviction_work, struct kfd_process, eviction_work); ++ ++ /* Narrow window of overlap between restore and evict work item is ++ * possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos unreserves ++ * KFD BOs, it is possible to evicted again. But restore has few more ++ * steps of finish. So lets wait for the restore work to complete ++ */ ++ if (delayed_work_pending(&p->restore_work)) ++ flush_delayed_work(&p->restore_work); ++ ++ ret = quiesce_process_mm(p); ++ if (!ret) { ++ fence_signal(eviction_work->eviction_fence); ++ fence_put(eviction_work->eviction_fence); ++ kfd_schedule_restore_bos_and_queues(p); ++ } else { ++ pr_err("Failed to quiesce user queues. Cannot evict BOs\n"); ++ } ++ ++} ++ + static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, + unsigned int chunk_size) + { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +index 914a0cd..42c559b 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +@@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = { + .restore = kgd2kfd_restore, + .quiesce_mm = kgd2kfd_quiesce_mm, + .resume_mm = kgd2kfd_resume_mm, ++ .schedule_evict_and_restore_process = ++ kgd2kfd_schedule_evict_and_restore_process, + }; + + int sched_policy = KFD_SCHED_POLICY_HWS; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index eb9541f..182d065 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -542,12 +542,20 @@ struct qcm_process_device { + }; + + /* KFD Memory Eviction */ ++struct kfd_eviction_work { ++ struct work_struct work; ++ struct fence *eviction_fence; ++}; ++ + /* Appox. wait time before attempting to restore evicted BOs */ + #define PROCESS_RESTORE_TIME_MS 2000 + /* Approx. back off time if restore fails due to lack of memory */ + #define PROCESS_BACK_OFF_TIME_MS 1000 + ++void kfd_evict_bo_worker(struct work_struct *work); + void kfd_restore_bo_worker(struct work_struct *work); ++int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, ++ struct fence *fence); + + + /*8 byte handle containing GPU ID in the most significant 4 bytes and +@@ -681,7 +689,8 @@ struct kfd_process { + + void *master_vm; + +- /* For restoring BOs after eviction */ ++ /* Work items for evicting and restoring BOs */ ++ struct kfd_eviction_work eviction_work; + struct delayed_work restore_work; + }; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 542f0df..54ed2a1 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -509,6 +509,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, + p = container_of(mn, struct kfd_process, mmu_notifier); + BUG_ON(p->mm != mm); + ++ cancel_work_sync(&p->eviction_work.work); + cancel_delayed_work_sync(&p->restore_work); + + mutex_lock(&kfd_processes_mutex); +@@ -679,6 +680,7 @@ static struct kfd_process *create_process(const struct task_struct *thread, + if (err) + goto err_init_cwsr; + ++ INIT_WORK(&process->eviction_work.work, kfd_evict_bo_worker); + INIT_DELAYED_WORK(&process->restore_work, kfd_restore_bo_worker); + return process; + +diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +index c38e707..d344496 100644 +--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h ++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +@@ -30,6 +30,7 @@ + + #include <linux/types.h> + #include <linux/bitmap.h> ++#include <linux/fence.h> + + struct pci_dev; + +@@ -388,6 +389,9 @@ struct kfd2kgd_calls { + * + * @resume_mm: Resume user queue access to specified MM address space + * ++ * @schedule_evict_and_restore_process: Schedules work queue that will prepare ++ * for safe eviction of KFD BOs that belong to the specified process. ++ * + * This structure contains function callback pointers so the kgd driver + * will notify to the amdkfd about certain status changes. + * +@@ -406,6 +410,8 @@ struct kgd2kfd_calls { + int (*restore)(struct kfd_dev *kfd); + int (*quiesce_mm)(struct kfd_dev *kfd, struct mm_struct *mm); + int (*resume_mm)(struct kfd_dev *kfd, struct mm_struct *mm); ++ int (*schedule_evict_and_restore_process)(struct mm_struct *mm, ++ struct fence *fence); + }; + + int kgd2kfd_init(unsigned interface_version, +-- +2.7.4 + |