diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1381-drm-amdgpu-Add-MMU-notifier-for-HSA-userptr-buffers.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1381-drm-amdgpu-Add-MMU-notifier-for-HSA-userptr-buffers.patch | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1381-drm-amdgpu-Add-MMU-notifier-for-HSA-userptr-buffers.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1381-drm-amdgpu-Add-MMU-notifier-for-HSA-userptr-buffers.patch new file mode 100644 index 00000000..04f86f2b --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1381-drm-amdgpu-Add-MMU-notifier-for-HSA-userptr-buffers.patch @@ -0,0 +1,303 @@ +From 4e57129b464425b2fb9618d3fdaca75a22cbc577 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Tue, 15 Mar 2016 02:37:53 -0400 +Subject: [PATCH 1381/4131] drm/amdgpu: Add MMU notifier for HSA userptr + buffers + +Evicts userptr buffers temporarily when the kernel invalidates the +user VM mapping. Buffer eviction calls into KFD for quiescing queues +accessing the affected VM. After the invalidation, evicted buffers +are restored, which pins them, presumably using an updated user +mapping. A short delay avoids excessive repeated unpinning and +repinning of the same buffer. + +Change-Id: I889ee367605dd0121fe4dab6a24e55441094588a +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> + + Conflicts: + drivers/gpu/drm/amd/amdgpu/amdgpu.h + drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + + drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 143 ++++++++++++++++++++++++++--- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- + 4 files changed, 132 insertions(+), 17 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index d8c5c45..ab784c9 100755 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -391,6 +391,8 @@ struct amdgpu_gem_object { + struct amdgpu_bo *bo; + }; + ++struct kgd_mem; ++ + #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_gem_object, base)->bo + + void amdgpu_gem_object_free(struct drm_gem_object *obj); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +index 430c622..87bb00d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +@@ -36,12 +36,19 @@ + #include <drm/drm.h> + + #include "amdgpu.h" ++#include "amdgpu_amdkfd.h" ++ ++enum amdgpu_mn_type { ++ AMDGPU_MN_TYPE_GFX, ++ AMDGPU_MN_TYPE_HSA, ++}; + + struct amdgpu_mn { + /* constant after initialisation */ + struct amdgpu_device *adev; + struct mm_struct *mm; + struct mmu_notifier mn; ++ enum amdgpu_mn_type type; + + /* only used on destruction */ + struct work_struct work; +@@ -81,6 +88,9 @@ static void amdgpu_mn_destroy(struct work_struct *work) + list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { + bo->mn = NULL; + list_del_init(&bo->mn_list); ++ if (rmn->type == AMDGPU_MN_TYPE_HSA) ++ amdgpu_amdkfd_cancel_restore_mem( ++ adev, bo->kfd_bo); + } + kfree(node); + } +@@ -147,7 +157,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, + } + + /** +- * amdgpu_mn_invalidate_range_start - callback to notify about mm change ++ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about +@@ -157,10 +167,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, + * We block for all BOs between start and end to be idle and + * unmap them by move them into system domain again. + */ +-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, +- struct mm_struct *mm, +- unsigned long start, +- unsigned long end) ++static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long start, ++ unsigned long end) + { + struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; +@@ -183,22 +193,122 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, + mutex_unlock(&rmn->lock); + } + +-static const struct mmu_notifier_ops amdgpu_mn_ops = { +- .release = amdgpu_mn_release, +- .invalidate_range_start = amdgpu_mn_invalidate_range_start, ++/** ++ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change ++ * ++ * @mn: our notifier ++ * @mn: the mm this callback is about ++ * @start: start of updated range ++ * @end: end of updated range ++ * ++ * We temporarily evict all BOs between start and end. This ++ * necessitates evicting all user-mode queues of the process. The BOs ++ * are restorted in amdgpu_mn_invalidate_range_end_hsa. ++ */ ++static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long start, ++ unsigned long end) ++{ ++ struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); ++ struct interval_tree_node *it; ++ ++ /* notification is exclusive, but interval is inclusive */ ++ end -= 1; ++ ++ it = interval_tree_iter_first(&rmn->objects, start, end); ++ while (it) { ++ struct amdgpu_mn_node *node; ++ struct amdgpu_bo *bo; ++ ++ node = container_of(it, struct amdgpu_mn_node, it); ++ it = interval_tree_iter_next(it, start, end); ++ ++ list_for_each_entry(bo, &node->bos, mn_list) { ++ struct kgd_mem *mem = bo->kfd_bo; ++ ++ if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, ++ start, end)) ++ amdgpu_amdkfd_evict_mem(bo->adev, mem, mm); ++ } ++ } ++} ++ ++/** ++ * amdgpu_mn_invalidate_range_end_hsa - callback to notify about mm change ++ * ++ * @mn: our notifier ++ * @mn: the mm this callback is about ++ * @start: start of updated range ++ * @end: end of updated range ++ * ++ * Restore BOs between start and end. Once the last BO is restored, ++ * the queues can be reenabled. Restoring a BO can itself trigger ++ * another recursive MMU notifier. Therefore this needs to be ++ * scheduled in a worker thread. Adding a slight delay (1 jiffy) ++ * avoids excessive repeated evictions. ++ */ ++static void amdgpu_mn_invalidate_range_end_hsa(struct mmu_notifier *mn, ++ struct mm_struct *mm, ++ unsigned long start, ++ unsigned long end) ++{ ++ struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); ++ struct interval_tree_node *it; ++ ++ /* notification is exclusive, but interval is inclusive */ ++ end -= 1; ++ ++ it = interval_tree_iter_first(&rmn->objects, start, end); ++ while (it) { ++ struct amdgpu_mn_node *node; ++ struct amdgpu_bo *bo; ++ ++ node = container_of(it, struct amdgpu_mn_node, it); ++ it = interval_tree_iter_next(it, start, end); ++ ++ list_for_each_entry(bo, &node->bos, mn_list) { ++ struct kgd_mem *mem = bo->kfd_bo; ++ ++ if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, ++ start, end)) ++ amdgpu_amdkfd_schedule_restore_mem(bo->adev, ++ mem, mm, 1); ++ } ++ } ++} ++ ++static const struct mmu_notifier_ops amdgpu_mn_ops[] = { ++ [AMDGPU_MN_TYPE_GFX] = { ++ .release = amdgpu_mn_release, ++ .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, ++ }, ++ [AMDGPU_MN_TYPE_HSA] = { ++ .release = amdgpu_mn_release, ++ .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, ++ .invalidate_range_end = amdgpu_mn_invalidate_range_end_hsa, ++ }, + }; + ++/* Low bits of any reasonable mm pointer will be unused due to struct ++ * alignment. Use these bits to make a unique key from the mm pointer ++ * and notifier type. */ ++#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) ++ + /** + * amdgpu_mn_get - create notifier context + * + * @adev: amdgpu device pointer ++ * @type: type of MMU notifier context + * + * Creates a notifier context for current->mm. + */ +-static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) ++static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, ++ enum amdgpu_mn_type type) + { + struct mm_struct *mm = current->mm; + struct amdgpu_mn *rmn; ++ unsigned long key = AMDGPU_MN_KEY(mm, type); + int r; + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) + struct hlist_node *node; +@@ -215,11 +325,11 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) + #endif + + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) +- hash_for_each_possible(adev->mn_hash, rmn, node, node, (unsigned long)mm) ++ hash_for_each_possible(adev->mn_hash, rmn, node, node, key) + #else +- hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) ++ hash_for_each_possible(adev->mn_hash, rmn, node, key) + #endif +- if (rmn->mm == mm) ++ if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) + goto release_locks; + + rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); +@@ -230,7 +340,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) + + rmn->adev = adev; + rmn->mm = mm; +- rmn->mn.ops = &amdgpu_mn_ops; ++ rmn->type = type; ++ rmn->mn.ops = &amdgpu_mn_ops[type]; + mutex_init(&rmn->lock); + rmn->objects = RB_ROOT; + +@@ -238,7 +349,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) + if (r) + goto free_rmn; + +- hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); ++ hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); + + release_locks: + up_write(&mm->mmap_sem); +@@ -267,12 +378,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) + { + unsigned long end = addr + amdgpu_bo_size(bo) - 1; + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); ++ enum amdgpu_mn_type type = ++ bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; + struct amdgpu_mn *rmn; + struct amdgpu_mn_node *node = NULL; + struct list_head bos; + struct interval_tree_node *it; + +- rmn = amdgpu_mn_get(adev); ++ rmn = amdgpu_mn_get(adev, type); + if (IS_ERR(rmn)) + return PTR_ERR(rmn); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +index fabddbb..e53b70a 100755 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +@@ -83,7 +83,7 @@ struct amdgpu_bo { + + struct ttm_bo_kmap_obj dma_buf_vmap; + struct amdgpu_mn *mn; +- bool is_kfd_bo; ++ struct kgd_mem *kfd_bo; + struct kfd_process_device *pdd; + + union { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +index cb6aaea..2b866be 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +@@ -286,7 +286,7 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) + * Don't verify access for KFD BO as it doesn't necessary has + * KGD file pointer + */ +- if (!abo || abo->is_kfd_bo || !filp) ++ if (!abo || abo->kfd_bo || !filp) + return 0; + file_priv = filp->private_data; + +-- +2.7.4 + |