From 86922491a22c1d248435465d25697e5cfaf144a4 Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Thu, 17 May 2018 17:11:08 +0530 Subject: [PATCH 3445/4131] Revert "compilation fix for raven rocm" This reverts commit b8ab947de73c88baaf2f1ae4aecdf420cdeb0181. --- drivers/gpu/drm/amd/amdgpu/Makefile | 0 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 152 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 69 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 43 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 39 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 33 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 44 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1188 ++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 119 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 21 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 28 +- .../drm/amd/powerplay/hwmgr/cz_clockpowergating.c | 8 +- include/uapi/linux/kfd_ioctl.h | 28 +- 16 files changed, 920 insertions(+), 887 deletions(-) mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/Makefile mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu.h mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c mode change 100755 => 100644 drivers/gpu/drm/amd/amdkfd/kfd_device.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile old mode 100644 new mode 100755 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h old mode 100644 new mode 100755 index 18478d4..e8017ee --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -130,7 +130,6 @@ extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; -extern int amdgpu_emu_mode; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -193,8 +192,8 @@ struct amdgpu_cs_parser; struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; -struct amdgpu_bo_va_mapping; struct kfd_vm_fault_info; +struct amdgpu_bo_va_mapping; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -412,8 +411,6 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); -//extern const struct dma_buf_ops amdgpu_dmabuf_ops; - /* sub-allocation manager, it has to be protected by another lock. * By conception this is an helper for other part of the driver * like the indirect buffer or semaphore, which both have their diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c old mode 100644 new mode 100755 index c24a2f4..fdaf5b3 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#undef pr_fmt #define pr_fmt(fmt) "kfd2kgd: " fmt #include "amdgpu_amdkfd.h" @@ -29,10 +30,12 @@ #include "amdgpu_gfx.h" #include +#define AMDKFD_SKIP_UNCOMPILED_CODE 1 + const struct kgd2kfd_calls *kgd2kfd; bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); -static unsigned int compute_vmid_bitmap = 0xFF00; +unsigned int global_compute_vmid_bitmap = 0xFF00; int amdgpu_amdkfd_init(void) { @@ -95,6 +98,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) break; case CHIP_VEGA10: case CHIP_RAVEN: + if (adev->asic_type == CHIP_RAVEN) { + dev_dbg(adev->dev, "DKMS installed kfd does not support Raven for kernel < 4.16\n"); + return; + } kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); break; default: @@ -146,12 +153,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = compute_vmid_bitmap, + .compute_vmid_bitmap = global_compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, - .gpuvm_size = min(adev->vm_manager.max_pfn - << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_VA_HOLE_START), + .gpuvm_size = (uint64_t)amdgpu_vm_size << 30, .drm_render_minor = adev->ddev->render->index }; @@ -268,6 +273,61 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) amdgpu_device_gpu_recover(adev, NULL, false); } +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct amdgpu_ring *ring; + struct dma_fence *f = NULL; + int ret; + + switch (engine) { + case KGD_ENGINE_MEC1: + ring = &adev->gfx.compute_ring[0]; + break; + case KGD_ENGINE_SDMA1: + ring = &adev->sdma.instance[0].ring; + break; + case KGD_ENGINE_SDMA2: + ring = &adev->sdma.instance[1].ring; + break; + default: + pr_err("Invalid engine in IB submission: %d\n", engine); + ret = -EINVAL; + goto err; + } + + ret = amdgpu_job_alloc(adev, 1, &job, NULL); + if (ret) + goto err; + + ib = &job->ibs[0]; + memset(ib, 0, sizeof(struct amdgpu_ib)); + + ib->gpu_addr = gpu_addr; + ib->ptr = ib_cmd; + ib->length_dw = ib_len; + /* This works for NO_HWS. TODO: need to handle without knowing VMID */ + job->vmid = vmid; + + ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); + if (ret) { + DRM_ERROR("amdgpu: failed to schedule IB.\n"); + goto err_ib_sched; + } + + ret = dma_fence_wait(f, false); + +err_ib_sched: + dma_fence_put(f); + amdgpu_job_free(job); +err: + return ret; +} + u32 pool_to_domain(enum kgd_memory_pool p) { switch (p) { @@ -356,7 +416,8 @@ void get_local_mem_info(struct kgd_dev *kgd, aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; memset(mem_info, 0, sizeof(*mem_info)); - if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { + if (!(adev->gmc.aper_base & address_mask || + aper_limit & address_mask)) { mem_info->local_mem_size_public = adev->gmc.visible_vram_size; mem_info->local_mem_size_private = adev->gmc.real_vram_size - adev->gmc.visible_vram_size; @@ -371,11 +432,6 @@ void get_local_mem_info(struct kgd_dev *kgd, mem_info->local_mem_size_public, mem_info->local_mem_size_private); - if (amdgpu_emu_mode == 1) { - mem_info->mem_clk_max = 100; - return; - } - if (amdgpu_sriov_vf(adev)) mem_info->mem_clk_max = adev->clock.default_mclk / 100; else @@ -396,9 +452,6 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* the sclk is in quantas of 10kHz */ - if (amdgpu_emu_mode == 1) - return 100; - if (amdgpu_sriov_vf(adev)) return adev->clock.default_sclk / 100; @@ -458,8 +511,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, adev = obj->dev->dev_private; bo = gem_to_amdgpu_bo(obj); if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT))) - /* Only VRAM and GTT BOs are supported */ + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_DGMA))) + /* Only VRAM, GTT and DGMA BOs are supported */ goto out_put; r = 0; @@ -473,9 +527,12 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, metadata_size, &metadata_flags); if (flags) { - *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? - ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; - + /* If the preferred domain is DGMA, set flags to VRAM because + * KFD doesn't support allocating DGMA memory + */ + *flags = (bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_DGMA)) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) *flags |= ALLOC_MEM_FLAGS_PUBLIC; } @@ -493,66 +550,11 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) return usage; } -int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, - uint32_t vmid, uint64_t gpu_addr, - uint32_t *ib_cmd, uint32_t ib_len) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - struct amdgpu_job *job; - struct amdgpu_ib *ib; - struct amdgpu_ring *ring; - struct dma_fence *f = NULL; - int ret; - - switch (engine) { - case KGD_ENGINE_MEC1: - ring = &adev->gfx.compute_ring[0]; - break; - case KGD_ENGINE_SDMA1: - ring = &adev->sdma.instance[0].ring; - break; - case KGD_ENGINE_SDMA2: - ring = &adev->sdma.instance[1].ring; - break; - default: - pr_err("Invalid engine in IB submission: %d\n", engine); - ret = -EINVAL; - goto err; - } - - ret = amdgpu_job_alloc(adev, 1, &job, NULL); - if (ret) - goto err; - - ib = &job->ibs[0]; - memset(ib, 0, sizeof(struct amdgpu_ib)); - - ib->gpu_addr = gpu_addr; - ib->ptr = ib_cmd; - ib->length_dw = ib_len; - /* This works for NO_HWS. TODO: need to handle without knowing VMID */ - job->vmid = vmid; - - ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); - if (ret) { - DRM_ERROR("amdgpu: failed to schedule IB.\n"); - goto err_ib_sched; - } - - ret = dma_fence_wait(f, false); - -err_ib_sched: - dma_fence_put(f); - amdgpu_job_free(job); -err: - return ret; -} - bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { if (adev->kfd) { - if ((1 << vmid) & compute_vmid_bitmap) + if ((1 << vmid) & global_compute_vmid_bitmap) return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f0efde7..1fb4915 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -55,6 +55,7 @@ struct kgd_mem { struct ttm_validate_buffer resv_list; uint32_t domain; unsigned int mapped_to_gpu_memory; + void *kptr; uint64_t va; uint32_t mapping_flags; @@ -65,21 +66,24 @@ struct kgd_mem { struct amdgpu_sync sync; - bool aql_queue; + /* flags bitfied */ + bool coherent : 1; + bool no_substitute : 1; + bool aql_queue : 1; }; /* KFD Memory Eviction */ struct amdgpu_amdkfd_fence { struct dma_fence base; - struct mm_struct *mm; + void *mm; spinlock_t lock; char timeline_name[TASK_COMM_LEN]; }; struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); -bool amd_kfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); + void *mm); +bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); struct amdkfd_process_info { @@ -104,6 +108,27 @@ struct amdkfd_process_info { struct pid *pid; }; +/* struct amdkfd_vm - + * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs + * belonging to a KFD process. All the VMs belonging to the same process point + * to the same amdkfd_process_info. + */ +struct amdkfd_vm { + /* Keep base as the first parameter for pointer compatibility between + * amdkfd_vm and amdgpu_vm. + */ + struct amdgpu_vm base; + + /* List node in amdkfd_process_info.vm_list_head*/ + struct list_head vm_list_node; + + struct amdgpu_device *adev; + /* Points to the KFD process VM info*/ + struct amdkfd_process_info *process_info; + + uint64_t pd_phys_addr; +}; + int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); @@ -119,6 +144,8 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, + struct dma_fence **ef); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); @@ -137,6 +164,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); /* Shared API */ +int map_bo(struct amdgpu_device *rdev, uint64_t va, void *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va); int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr); @@ -170,38 +199,31 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); }) /* GPUVM API */ -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - void **process_info, - struct dma_fence **ef); -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, - struct file *filp, - void **vm, void **process_info, - struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm); -void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); -uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, void *vm, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem); + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); -int amdgpu_amdkfd_gpuvm_sync_memory( - struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, - struct kgd_mem *mem, void **kptr, uint64_t *size); -int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, - struct dma_fence **ef); +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef); +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); + +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr); int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t offset, @@ -216,9 +238,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm, struct kgd_mem *mem, struct dma_buf **dmabuf); +int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm); +int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm); void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); - #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index cf2f1e9..3961937 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -1,5 +1,5 @@ /* - * Copyright 2016-2018 Advanced Micro Devices, Inc. + * Copyright 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,18 +20,18 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include #include #include #include #include #include -#include #include "amdgpu_amdkfd.h" const struct dma_fence_ops amd_kfd_fence_ops; static atomic_t fence_seq = ATOMIC_INIT(0); +static int amd_kfd_fence_signal(struct dma_fence *f); + /* Eviction Fence * Fence helper functions to deal with KFD memory eviction. * Big Idea - Since KFD submissions are done by user queues, a BO cannot be @@ -60,7 +60,7 @@ static atomic_t fence_seq = ATOMIC_INIT(0); */ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm) + void *mm) { struct amdgpu_amdkfd_fence *fence = NULL; @@ -68,8 +68,10 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, if (fence == NULL) return NULL; - /* This reference gets released in amd_kfd_fence_release */ - mmgrab(mm); + /* mm_struct mm is used as void pointer to identify the parent + * KFD process. Don't dereference it. Fence and any threads using + * mm is guranteed to be released before process termination. + */ fence->mm = mm; get_task_comm(fence->timeline_name, current); spin_lock_init(&fence->lock); @@ -122,31 +124,45 @@ static bool amd_kfd_fence_enable_signaling(struct dma_fence *f) if (dma_fence_is_signaled(f)) return true; - if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) + if (!kgd2kfd->schedule_evict_and_restore_process( + (struct mm_struct *)fence->mm, f)) return true; return false; } +static int amd_kfd_fence_signal(struct dma_fence *f) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(f->lock, flags); + /* Set enabled bit so cb will called */ + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &f->flags); + ret = dma_fence_signal_locked(f); + spin_unlock_irqrestore(f->lock, flags); + + return ret; +} + /** * amd_kfd_fence_release - callback that fence can be freed * * @fence: fence * * This function is called when the reference count becomes zero. - * Drops the mm_struct reference and RCU schedules freeing up the fence. - */ + * It just RCU schedules freeing up the fence. +*/ static void amd_kfd_fence_release(struct dma_fence *f) { struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); - /* Unconditionally signal the fence. The process is getting * terminated. */ if (WARN_ON(!fence)) return; /* Not an amdgpu_amdkfd_fence */ - mmdrop(fence->mm); + amd_kfd_fence_signal(f); kfree_rcu(f, rcu); } @@ -156,8 +172,8 @@ static void amd_kfd_fence_release(struct dma_fence *f) * * @f: [IN] fence * @mm: [IN] mm that needs to be verified - */ -bool amd_kfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +*/ +bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm) { struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); @@ -177,3 +193,4 @@ const struct dma_fence_ops amd_kfd_fence_ops = { .wait = dma_fence_default_wait, .release = amd_kfd_fence_release, }; + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index c541656..fcc1add 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#undef pr_fmt #define pr_fmt(fmt) "kfd2kgd: " fmt #include @@ -41,6 +42,8 @@ #include "gmc/gmc_7_1_sh_mask.h" #include "cik_structs.h" +#define AMDKFD_SKIP_UNCOMPILED_CODE 1 + enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, @@ -89,6 +92,9 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem); + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); /* @@ -100,6 +106,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -140,6 +148,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req); static int alloc_memory_of_scratch(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, @@ -170,6 +179,7 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, config->num_macro_tile_configs = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + return 0; } @@ -180,13 +190,14 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .open_graphic_handle = open_graphic_handle, .alloc_pasid = amdgpu_pasid_alloc, .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -213,6 +224,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, .get_fw_version = get_fw_version, + .set_num_of_requests = set_num_of_requests, .get_cu_info = get_cu_info, .alloc_memory_of_scratch = alloc_memory_of_scratch, .write_config_static_mem = write_config_static_mem, @@ -236,6 +248,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions() return (struct kfd2kgd_calls *)&kfd2kgd; } +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem) +{ + return 0; +} + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -319,6 +337,13 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + /* amdgpu owns the per-pipe state */ + return 0; +} + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -927,6 +952,18 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req) +{ + uint32_t value; + struct amdgpu_device *adev = get_amdgpu_device(dev); + + value = RREG32(mmATC_ATS_DEBUG); + value &= ~ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR_MASK; + value |= (num_of_req << ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR__SHIFT); + + WREG32(mmATC_ATS_DEBUG, value); +} + static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint32_t page_table_base) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index dfd0026..ea8e948 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#undef pr_fmt #define pr_fmt(fmt) "kfd2kgd: " fmt #include @@ -56,10 +57,15 @@ static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { }; +struct vi_sdma_mqd; + static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size, void *vm, struct kgd_mem **mem); static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem); +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem); + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); /* @@ -72,6 +78,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -111,6 +119,8 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); +static void set_num_of_requests(struct kgd_dev *kgd, + uint8_t num_of_requests); static int alloc_memory_of_scratch(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, @@ -152,15 +162,16 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .create_process_gpumem = create_process_gpumem, .destroy_process_gpumem = destroy_process_gpumem, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .open_graphic_handle = open_graphic_handle, .alloc_pasid = amdgpu_pasid_alloc, .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -186,6 +197,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, .get_fw_version = get_fw_version, + .set_num_of_requests = set_num_of_requests, .get_cu_info = get_cu_info, .alloc_memory_of_scratch = alloc_memory_of_scratch, .write_config_static_mem = write_config_static_mem, @@ -221,6 +233,12 @@ static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem) } +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem) +{ + return 0; +} + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -305,6 +323,13 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + /* amdgpu owns the per-pipe state */ + return 0; +} + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -998,6 +1023,12 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_num_of_requests(struct kgd_dev *kgd, + uint8_t num_of_requests) +{ + pr_debug("This is a stub\n"); +} + static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint32_t page_table_base) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index f044739..2b74a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,7 +19,7 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - +#undef pr_fmt #define pr_fmt(fmt) "kfd2kgd: " fmt #include @@ -80,9 +80,6 @@ #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 -#define V9_PIPE_PER_MEC (4) -#define V9_QUEUES_PER_PIPE_MEC (8) - enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, @@ -102,6 +99,9 @@ static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size, void *vm, struct kgd_mem **mem); static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem); +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem); + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); /* @@ -114,6 +114,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -154,6 +156,8 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); +static void set_num_of_requests(struct kgd_dev *kgd, + uint8_t num_of_requests); static int alloc_memory_of_scratch(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, @@ -202,15 +206,16 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .create_process_gpumem = create_process_gpumem, .destroy_process_gpumem = destroy_process_gpumem, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .open_graphic_handle = open_graphic_handle, .program_sh_mem_settings = kgd_program_sh_mem_settings, .alloc_pasid = amdgpu_pasid_alloc, .free_pasid = amdgpu_pasid_free, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -236,6 +241,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, .get_fw_version = get_fw_version, + .set_num_of_requests = set_num_of_requests, .get_cu_info = get_cu_info, .alloc_memory_of_scratch = alloc_memory_of_scratch, .write_config_static_mem = write_config_static_mem, @@ -271,6 +277,12 @@ static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem) } +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem) +{ + return 0; +} + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -307,7 +319,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, static uint32_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + unsigned int bit = (pipe_id * adev->gfx.mec.num_pipe_per_mec + queue_id) & 31; return ((uint32_t)1) << bit; @@ -392,6 +404,13 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + /* amdgpu owns the per-pipe state */ + return 0; +} + /* TODO - RING0 form of field is obsolete, seems to date back to SI * but still works */ @@ -908,7 +927,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; - mutex_lock(&adev->srbm_mutex); + spin_lock(&adev->tlb_invalidation_lock); /* Use legacy mode tlb invalidation. * @@ -950,9 +969,8 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & (1 << vmid))) cpu_relax(); - - mutex_unlock(&adev->srbm_mutex); - + + spin_unlock(&adev->tlb_invalidation_lock); } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) @@ -1181,6 +1199,12 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_num_of_requests(struct kgd_dev *kgd, + uint8_t num_of_requests) +{ + pr_debug("This is a stub\n"); +} + static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint32_t page_table_base) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f42a891..8f0aa93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -20,14 +20,27 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#undef pr_fmt #define pr_fmt(fmt) "kfd2kgd: " fmt +#include +#include +#include +#include #include #include #include -#include "amdgpu_object.h" -#include "amdgpu_vm.h" +#include +#include #include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "gca/gfx_8_0_sh_mask.h" +#include "gca/gfx_8_0_d.h" +#include "gca/gfx_8_0_enum.h" +#include "oss/oss_3_0_sh_mask.h" +#include "oss/oss_3_0_d.h" +#include "gmc/gmc_8_1_sh_mask.h" +#include "gmc/gmc_8_1_d.h" /* Special VM and GART address alignment needed for VI pre-Fiji due to * a HW bug. @@ -38,13 +51,15 @@ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) /* Impose limit on how much memory KFD can use */ -static struct { +struct kfd_mem_usage_limit { uint64_t max_system_mem_limit; uint64_t max_userptr_mem_limit; int64_t system_mem_used; int64_t userptr_mem_used; spinlock_t mem_limit_lock; -} kfd_mem_limit; +}; + +static struct kfd_mem_usage_limit kfd_mem_limit; /* Struct used for amdgpu_amdkfd_bo_validate */ struct amdgpu_vm_parser { @@ -167,8 +182,7 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT && - !bo->tbo.sg) { + } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); } @@ -255,6 +269,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, /* Alloc memory for count number of eviction fence pointers. Fill the * ef_list array and ef_count */ + fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), GFP_KERNEL); if (!fence_list) @@ -321,7 +336,6 @@ static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, bool wait) { - struct ttm_operation_ctx ctx = { false, false }; int ret; if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), @@ -357,23 +371,6 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); } -static u64 get_vm_pd_gpu_offset(struct amdgpu_vm *vm) -{ - struct amdgpu_device *adev = - amdgpu_ttm_adev(vm->root.base.bo->tbo.bdev); - u64 offset; - uint64_t flags = AMDGPU_PTE_VALID; - - offset = amdgpu_bo_gpu_offset(vm->root.base.bo); - - /* On some ASICs the FB doesn't start at 0. Adjust FB offset - * to an actual MC address. - */ - adev->gmc.gmc_funcs->get_vm_pde(adev, -1, &offset, &flags); - - return offset; -} - /* vm_validate_pt_pd_bos - Validate page table and directory BOs * * Page directories are not updated here because huge page handling @@ -381,17 +378,18 @@ static u64 get_vm_pd_gpu_offset(struct amdgpu_vm *vm) * again. Page directories are only updated after updating page * tables. */ -static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) +static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) { - struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_bo *pd = vm->base.root.base.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); struct amdgpu_vm_parser param; + uint64_t addr, flags = AMDGPU_PTE_VALID; int ret; param.domain = AMDGPU_GEM_DOMAIN_VRAM; param.wait = false; - ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, + ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, ¶m); if (ret) { pr_err("amdgpu: failed to validate PT BOs\n"); @@ -404,9 +402,11 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return ret; } - vm->pd_phys_addr = get_vm_pd_gpu_offset(vm); + addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); + amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); + vm->pd_phys_addr = addr; - if (vm->use_cpu_for_update) { + if (vm->base.use_cpu_for_update) { ret = amdgpu_bo_kmap(pd, NULL); if (ret) { pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); @@ -417,6 +417,23 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return 0; } +static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + struct dma_fence *f) +{ + int ret = amdgpu_sync_fence(adev, sync, f, false); + + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. + */ + if (sync->last_vm_update) { + dma_fence_put(sync->last_vm_update); + sync->last_vm_update = NULL; + } + + return ret; +} + static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) { struct amdgpu_bo *pd = vm->root.base.bo; @@ -427,7 +444,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(NULL, sync, vm->last_update, false); + return sync_vm_fence(adev, sync, vm->last_update); } /* add_bo_to_vm - Add a BO to a VM @@ -443,12 +460,14 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) * 4a. Validate new page tables and directories */ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, - struct amdgpu_vm *vm, bool is_aql, + struct amdgpu_vm *avm, bool is_aql, struct kfd_bo_va_list **p_bo_va_entry) { int ret; struct kfd_bo_va_list *bo_va_entry; - struct amdgpu_bo *pd = vm->root.base.bo; + struct amdkfd_vm *kvm = container_of(avm, + struct amdkfd_vm, base); + struct amdgpu_bo *pd = avm->root.base.bo; struct amdgpu_bo *bo = mem->bo; uint64_t va = mem->va; struct list_head *list_bo_va = &mem->bo_va_list; @@ -467,11 +486,11 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, return -ENOMEM; pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, - va + bo_size, vm); + va + bo_size, avm); /* Add BO to VM internal data structures*/ - bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); - if (!bo_va_entry->bo_va) { + bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); + if (bo_va_entry->bo_va == NULL) { ret = -EINVAL; pr_err("Failed to add BO object to VM. ret == %d\n", ret); @@ -493,28 +512,28 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, * fence, so remove it temporarily. */ amdgpu_amdkfd_remove_eviction_fence(pd, - vm->process_info->eviction_fence, + kvm->process_info->eviction_fence, NULL, NULL); - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); + ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); if (ret) { pr_err("Failed to allocate pts, err=%d\n", ret); goto err_alloc_pts; } - ret = vm_validate_pt_pd_bos(vm); - if (ret) { + ret = vm_validate_pt_pd_bos(kvm); + if (ret != 0) { pr_err("validate_pt_pd_bos() failed\n"); goto err_alloc_pts; } /* Add the eviction fence back */ - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); return 0; err_alloc_pts: - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); list_del(&bo_va_entry->bo_list); err_vmadd: @@ -568,7 +587,6 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; - struct ttm_operation_ctx ctx = { true, false }; int ret = 0; mutex_lock(&process_info->lock); @@ -633,25 +651,134 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, return ret; } +static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, + uint64_t size, void *vm, struct kgd_mem **mem, + uint64_t *offset, u32 domain, u64 flags, + struct sg_table *sg, bool aql_queue, + bool readonly, bool execute, bool coherent, bool no_sub, + bool userptr) +{ + struct amdgpu_device *adev; + int ret; + struct amdgpu_bo *bo; + uint64_t user_addr = 0; + int byte_align; + u32 alloc_domain; + uint32_t mapping_flags; + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + + if (aql_queue) + size = size >> 1; + if (userptr) { + if (!offset || !*offset) + return -EINVAL; + user_addr = *offset; + } + + adev = get_amdgpu_device(kgd); + byte_align = (adev->family == AMDGPU_FAMILY_VI && + adev->asic_type != CHIP_FIJI && + adev->asic_type != CHIP_POLARIS10 && + adev->asic_type != CHIP_POLARIS11) ? + VI_BO_SIZE_ALIGN : 1; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (*mem == NULL) { + ret = -ENOMEM; + goto err; + } + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->coherent = coherent; + (*mem)->no_substitute = no_sub; + (*mem)->aql_queue = aql_queue; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (!readonly) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (execute) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + (*mem)->mapping_flags = mapping_flags; + + alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain; + + amdgpu_sync_create(&(*mem)->sync); + + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + if (ret) { + pr_debug("Insufficient system memory\n"); + goto err_bo_create; + } + + pr_debug("\t create BO VA 0x%llx size 0x%llx domain %s\n", + va, size, domain_string(alloc_domain)); + + /* Allocate buffer object. Userptr objects need to start out + * in the CPU domain, get moved to GTT when pinned. + */ + ret = amdgpu_bo_create(adev, size, byte_align, false, + alloc_domain, + flags, sg, NULL, &bo); + if (ret != 0) { + pr_debug("Failed to create BO on domain %s. ret %d\n", + domain_string(alloc_domain), ret); + unreserve_system_mem_limit(adev, size, alloc_domain); + goto err_bo_create; + } + bo->kfd_bo = *mem; + (*mem)->bo = bo; + if (userptr) + bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; + + (*mem)->va = va; + (*mem)->domain = domain; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = kfd_vm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, userptr); + + if (userptr) { + ret = init_user_pages(*mem, current->mm, user_addr); + if (ret) { + mutex_lock(&kfd_vm->process_info->lock); + list_del(&(*mem)->validate_list.head); + mutex_unlock(&kfd_vm->process_info->lock); + goto allocate_init_user_pages_failed; + } + } + + if (offset) + *offset = amdgpu_bo_mmap_offset(bo); + + return 0; + +allocate_init_user_pages_failed: + amdgpu_bo_unref(&bo); +err_bo_create: + kfree(*mem); +err: + return ret; +} + /* Reserving a BO and its page table BOs must happen atomically to - * avoid deadlocks. Some operations update multiple VMs at once. Track - * all the reservation info in a context structure. Optionally a sync - * object can track VM updates. + * avoid deadlocks. When updating userptrs we need to temporarily + * back-off the reservation and then reacquire it. Track all the + * reservation info in a context structure. Buffers can be mapped to + * multiple VMs simultaneously (buffers being restored on multiple + * GPUs). */ struct bo_vm_reservation_context { - struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ - unsigned int n_vms; /* Number of VMs reserved */ - struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ - struct ww_acquire_ctx ticket; /* Reservation ticket */ - struct list_head list, duplicates; /* BO lists */ - struct amdgpu_sync *sync; /* Pointer to sync object */ - bool reserved; /* Whether BOs are reserved */ -}; - -enum bo_vm_match { - BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ - BO_VM_MAPPED, /* Match VMs where a BO is mapped */ - BO_VM_ALL, /* Match all VMs a BO was added to */ + struct amdgpu_bo_list_entry kfd_bo; + unsigned int n_vms; + struct amdgpu_bo_list_entry *vm_pd; + struct ww_acquire_ctx ticket; + struct list_head list, duplicates; + struct amdgpu_sync *sync; + bool reserved; }; /** @@ -676,8 +803,9 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, INIT_LIST_HEAD(&ctx->list); INIT_LIST_HEAD(&ctx->duplicates); - ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); - if (!ctx->vm_pd) + ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry) + * ctx->n_vms, GFP_KERNEL); + if (ctx->vm_pd == NULL) return -ENOMEM; ctx->kfd_bo.robj = bo; @@ -693,8 +821,10 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, false, &ctx->duplicates); if (!ret) ctx->reserved = true; - else { + else pr_err("Failed to reserve buffers in ttm\n"); + + if (ret) { kfree(ctx->vm_pd); ctx->vm_pd = NULL; } @@ -702,19 +832,24 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, return ret; } +enum VA_TYPE { + VA_NOT_MAPPED = 0, + VA_MAPPED, + VA_DO_NOT_CARE, +}; + /** - * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally + * reserve_bo_and_vm - reserve a BO and some VMs that the BO has been added + * to, conditionally based on map_type. * @mem: KFD BO structure. * @vm: the VM to reserve. If NULL, then all VMs associated with the BO * is used. Otherwise, a single VM associated with the BO. * @map_type: the mapping status that will be used to filter the VMs. * @ctx: the struct that will be used in unreserve_bo_and_vms(). - * - * Returns 0 for success, negative for failure. */ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, - struct amdgpu_vm *vm, enum bo_vm_match map_type, - struct bo_vm_reservation_context *ctx) + struct amdgpu_vm *vm, enum VA_TYPE map_type, + struct bo_vm_reservation_context *ctx) { struct amdgpu_bo *bo = mem->bo; struct kfd_bo_va_list *entry; @@ -732,16 +867,16 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, list_for_each_entry(entry, &mem->bo_va_list, bo_list) { if ((vm && vm != entry->bo_va->base.vm) || (entry->is_mapped != map_type - && map_type != BO_VM_ALL)) + && map_type != VA_DO_NOT_CARE)) continue; ctx->n_vms++; } if (ctx->n_vms != 0) { - ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), - GFP_KERNEL); - if (!ctx->vm_pd) + ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry) + * ctx->n_vms, GFP_KERNEL); + if (ctx->vm_pd == NULL) return -ENOMEM; } @@ -756,7 +891,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, list_for_each_entry(entry, &mem->bo_va_list, bo_list) { if ((vm && vm != entry->bo_va->base.vm) || (entry->is_mapped != map_type - && map_type != BO_VM_ALL)) + && map_type != VA_DO_NOT_CARE)) continue; amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, @@ -779,16 +914,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, return ret; } -/** - * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context - * @ctx: Reservation context to unreserve - * @wait: Optionally wait for a sync object representing pending VM updates - * @intr: Whether the wait is interruptible - * - * Also frees any resources allocated in - * reserve_bo_and_(cond_)vm(s). Returns the status from - * amdgpu_sync_wait. - */ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, bool wait, bool intr) { @@ -815,25 +940,25 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_vm *vm = bo_va->base.vm; + struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); struct amdgpu_bo *pd = vm->root.base.bo; - /* Remove eviction fence from PD (and thereby from PTs too as - * they share the resv. object). Otherwise during PT update - * job (see amdgpu_vm_bo_update_mapping), eviction fence would - * get added to job->sync object and job execution would - * trigger the eviction fence. + /* Remove eviction fence from PD (and thereby from PTs too as they + * share the resv. object. Otherwise during PT update job (see + * amdgpu_vm_bo_update_mapping), eviction fence will get added to + * job->sync object */ amdgpu_amdkfd_remove_eviction_fence(pd, - vm->process_info->eviction_fence, + kvm->process_info->eviction_fence, NULL, NULL); amdgpu_vm_bo_unmap(adev, bo_va, entry->va); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); /* Add the eviction fence back */ - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); - amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + sync_vm_fence(adev, sync, bo_va->last_pt_update); return 0; } @@ -853,12 +978,12 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, /* Update the page tables */ ret = amdgpu_vm_bo_update(adev, bo_va, false); - if (ret) { + if (ret != 0) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; } - return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + return sync_vm_fence(adev, sync, bo_va->last_pt_update); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -869,9 +994,8 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, /* Set virtual address for the allocation */ ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, - amdgpu_bo_size(entry->bo_va->base.bo), - entry->pte_flags); - if (ret) { + amdgpu_bo_size(entry->bo_va->base.bo), entry->pte_flags); + if (ret != 0) { pr_err("Failed to map VA 0x%llx in vm. ret %d\n", entry->va, ret); return ret; @@ -881,7 +1005,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, return 0; ret = update_gpuvm_pte(adev, entry, sync); - if (ret) { + if (ret != 0) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; } @@ -911,424 +1035,116 @@ static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) return sg; } -static int process_validate_vms(struct amdkfd_process_info *process_info) -{ - struct amdgpu_vm *peer_vm; - int ret; - - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) { - ret = vm_validate_pt_pd_bos(peer_vm); - if (ret) - return ret; - } - - return 0; -} - -static int process_sync_pds_resv(struct amdkfd_process_info *process_info, - struct amdgpu_sync *sync) -{ - struct amdgpu_vm *peer_vm; - int ret; - - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) { - struct amdgpu_bo *pd = peer_vm->root.base.bo; - - ret = amdgpu_sync_resv(NULL, - sync, pd->tbo.resv, - AMDGPU_FENCE_OWNER_UNDEFINED, false); - if (ret) - return ret; - } - - return 0; -} - -static int process_update_pds(struct amdkfd_process_info *process_info, - struct amdgpu_sync *sync) -{ - struct amdgpu_vm *peer_vm; - int ret; - - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) { - ret = vm_update_pds(peer_vm, sync); - if (ret) - return ret; - } - - return 0; -} - -static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, - struct dma_fence **ef) -{ - struct amdkfd_process_info *info = NULL; - int ret; - - if (!*process_info) { - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - mutex_init(&info->lock); - INIT_LIST_HEAD(&info->vm_list_head); - INIT_LIST_HEAD(&info->kfd_bo_list); - INIT_LIST_HEAD(&info->userptr_valid_list); - INIT_LIST_HEAD(&info->userptr_inval_list); - - info->eviction_fence = - amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), - current->mm); - if (!info->eviction_fence) { - pr_err("Failed to create eviction fence\n"); - ret = -ENOMEM; - goto create_evict_fence_fail; - } - - info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); - atomic_set(&info->evicted_bos, 0); - INIT_DELAYED_WORK(&info->work, - amdgpu_amdkfd_restore_userptr_worker); - - *process_info = info; - *ef = dma_fence_get(&info->eviction_fence->base); - } - - vm->process_info = *process_info; - - /* Validate page directory and attach eviction fence */ - ret = amdgpu_bo_reserve(vm->root.base.bo, true); - if (ret) - goto reserve_pd_fail; - ret = vm_validate_pt_pd_bos(vm); - if (ret) { - pr_err("validate_pt_pd_bos() failed\n"); - goto validate_pd_fail; - } - amdgpu_bo_fence(vm->root.base.bo, - &vm->process_info->eviction_fence->base, true); - amdgpu_bo_unreserve(vm->root.base.bo); - - /* Update process info */ - mutex_lock(&vm->process_info->lock); - list_add_tail(&vm->vm_list_node, - &(vm->process_info->vm_list_head)); - vm->process_info->n_vms++; - mutex_unlock(&vm->process_info->lock); - - return 0; - -validate_pd_fail: - amdgpu_bo_unreserve(vm->root.base.bo); -reserve_pd_fail: - vm->process_info = NULL; - if (info) { - /* Two fence references: one in info and one in *ef */ - dma_fence_put(&info->eviction_fence->base); - dma_fence_put(*ef); - *ef = NULL; - *process_info = NULL; -create_evict_fence_fail: - kfree(info); - } - return ret; -} - -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - void **process_info, - struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *new_vm; - int ret; - - new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); - if (!new_vm) - return -ENOMEM; - - /* Initialize AMDGPU part of the VM */ - ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); - if (ret) { - pr_err("Failed init vm ret %d\n", ret); - goto amdgpu_vm_init_fail; - } - - /* Initialize KFD part of the VM and process info */ - ret = init_kfd_vm(new_vm, process_info, ef); - if (ret) - goto init_kfd_vm_fail; + int ret = 0; + struct amdgpu_sync sync; + struct amdgpu_device *adev; - *vm = (void *) new_vm; + adev = get_amdgpu_device(kgd); + amdgpu_sync_create(&sync); - return 0; + mutex_lock(&mem->lock); + amdgpu_sync_clone(adev, &mem->sync, &sync); + mutex_unlock(&mem->lock); -init_kfd_vm_fail: - amdgpu_vm_fini(adev, new_vm); -amdgpu_vm_init_fail: - kfree(new_vm); + ret = amdgpu_sync_wait(&sync, intr); + amdgpu_sync_free(&sync); return ret; } -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, - struct file *filp, - void **vm, void **process_info, - struct dma_fence **ef) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct drm_file *drm_priv = filp->private_data; - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; - struct amdgpu_vm *avm = &drv_priv->vm; - int ret; - - /* Convert VM into a compute VM */ - ret = amdgpu_vm_make_compute(adev, avm); - if (ret) - return ret; - - /* Initialize KFD part of the VM and process info */ - ret = init_kfd_vm(avm, process_info, ef); - if (ret) - return ret; - - *vm = (void *)avm; - - return 0; -} - -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.base.bo; - - if (vm->vm_context != AMDGPU_VM_CONTEXT_COMPUTE) - return; - - /* Release eviction fence from PD */ - amdgpu_bo_reserve(pd, false); - amdgpu_bo_fence(pd, NULL, false); - amdgpu_bo_unreserve(pd); - - if (!process_info) - return; - - /* Update process info */ - mutex_lock(&process_info->lock); - process_info->n_vms--; - list_del(&vm->vm_list_node); - mutex_unlock(&process_info->lock); - - /* Release per-process resources when last compute VM is destroyed */ - if (!process_info->n_vms) { - WARN_ON(!list_empty(&process_info->kfd_bo_list)); - WARN_ON(!list_empty(&process_info->userptr_valid_list)); - WARN_ON(!list_empty(&process_info->userptr_inval_list)); - - dma_fence_put(&process_info->eviction_fence->base); - cancel_delayed_work_sync(&process_info->work); - put_pid(process_info->pid); - kfree(process_info); - } -} - -void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; - - if (WARN_ON(!kgd || !vm)) - return; - - pr_debug("Destroying process vm %p\n", vm); - - /* Release the VM context */ - amdgpu_vm_fini(adev, avm); - kfree(vm); -} - -uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) -{ - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; - - return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; -} +#define BOOL_TO_STR(b) (b == true) ? "true" : "false" int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, void *vm, struct kgd_mem **mem, uint64_t *offset, uint32_t flags) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; - uint64_t user_addr = 0; + bool aql_queue, public, readonly, execute, coherent, no_sub, userptr; + u64 alloc_flag; + uint32_t domain; struct sg_table *sg = NULL; - enum ttm_bo_type bo_type = ttm_bo_type_device; - struct amdgpu_bo *bo; - int byte_align; - u32 domain, alloc_domain; - u64 alloc_flags; - uint32_t mapping_flags; - int ret; + + if (!(flags & ALLOC_MEM_FLAGS_NONPAGED)) { + pr_debug("current hw doesn't support paged memory\n"); + return -EINVAL; + } + + domain = 0; + alloc_flag = 0; + + aql_queue = (flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) ? true : false; + public = (flags & ALLOC_MEM_FLAGS_PUBLIC) ? true : false; + readonly = (flags & ALLOC_MEM_FLAGS_READONLY) ? true : false; + execute = (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS) ? true : false; + coherent = (flags & ALLOC_MEM_FLAGS_COHERENT) ? true : false; + no_sub = (flags & ALLOC_MEM_FLAGS_NO_SUBSTITUTE) ? true : false; + userptr = (flags & ALLOC_MEM_FLAGS_USERPTR) ? true : false; /* * Check on which domain to allocate BO */ if (flags & ALLOC_MEM_FLAGS_VRAM) { - domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; - alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : - AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - } else if (flags & ALLOC_MEM_FLAGS_GTT) { - domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; - alloc_flags = 0; - } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + domain = AMDGPU_GEM_DOMAIN_VRAM; + alloc_flag = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + if (public) { + alloc_flag = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } + alloc_flag |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + } else if (flags & (ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_USERPTR)) { domain = AMDGPU_GEM_DOMAIN_GTT; - alloc_domain = AMDGPU_GEM_DOMAIN_CPU; - alloc_flags = 0; - if (!offset || !*offset) - return -EINVAL; - user_addr = *offset; + alloc_flag = 0; } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { domain = AMDGPU_GEM_DOMAIN_GTT; - alloc_domain = AMDGPU_GEM_DOMAIN_CPU; - alloc_flags = 0; + alloc_flag = 0; if (size > UINT_MAX) return -EINVAL; sg = create_doorbell_sg(*offset, size); if (!sg) return -ENOMEM; - bo_type = ttm_bo_type_sg; - } else { - return -EINVAL; - } - - *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) { - ret = -ENOMEM; - goto err; } - INIT_LIST_HEAD(&(*mem)->bo_va_list); - mutex_init(&(*mem)->lock); - (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); - - /* Workaround for AQL queue wraparound bug. Map the same - * memory twice. That means we only actually allocate half - * the memory. - */ - if ((*mem)->aql_queue) - size = size >> 1; - /* Workaround for TLB bug on older VI chips */ - byte_align = (adev->family == AMDGPU_FAMILY_VI && - adev->asic_type != CHIP_FIJI && - adev->asic_type != CHIP_POLARIS10 && - adev->asic_type != CHIP_POLARIS11) ? - VI_BO_SIZE_ALIGN : 1; + if (offset && !userptr) + *offset = 0; - mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (!(flags & ALLOC_MEM_FLAGS_READONLY)) - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS) - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - if (flags & ALLOC_MEM_FLAGS_COHERENT) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - (*mem)->mapping_flags = mapping_flags; + pr_debug("Allocate VA 0x%llx - 0x%llx domain %s aql %s\n", + va, va + size, domain_string(domain), + BOOL_TO_STR(aql_queue)); - amdgpu_sync_create(&(*mem)->sync); - - if (!sg) { - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, - alloc_domain); - if (ret) { - pr_debug("Insufficient system memory\n"); - goto err_reserve_limit; - } - } - - pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", - va, size, domain_string(alloc_domain)); - - /* Allocate buffer object. Userptr objects need to start out - * in the CPU domain, get moved to GTT when pinned. - */ -#if 0 - ret = amdgpu_bo_create(adev, size, byte_align, alloc_domain, - alloc_flags, bo_type, NULL, &bo); -#else - ret = amdgpu_bo_create(adev, size, byte_align, false , alloc_domain, - alloc_flags, sg , NULL, &bo); -#endif - if (ret) { - pr_debug("Failed to create BO on domain %s. ret %d\n", - domain_string(alloc_domain), ret); - goto err_bo_create; - } - if (bo_type == ttm_bo_type_sg) { - bo->tbo.sg = sg; - bo->tbo.ttm->sg = sg; - } - bo->kfd_bo = *mem; - (*mem)->bo = bo; - if (user_addr) - bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; - - (*mem)->va = va; - (*mem)->domain = domain; - (*mem)->mapped_to_gpu_memory = 0; - (*mem)->process_info = avm->process_info; - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); - - if (user_addr) { - ret = init_user_pages(*mem, current->mm, user_addr); - if (ret) { - mutex_lock(&avm->process_info->lock); - list_del(&(*mem)->validate_list.head); - mutex_unlock(&avm->process_info->lock); - goto allocate_init_user_pages_failed; - } - } - - if (offset) - *offset = amdgpu_bo_mmap_offset(bo); - - return 0; - -allocate_init_user_pages_failed: - amdgpu_bo_unref(&bo); -err_bo_create: - if (!sg) - unreserve_system_mem_limit(adev, size, alloc_domain); -err_reserve_limit: - kfree(*mem); -err: - if (sg) { - sg_free_table(sg); - kfree(sg); - } - return ret; + pr_debug("\t alloc_flag 0x%llx public %s readonly %s execute %s coherent %s no_sub %s\n", + alloc_flag, BOOL_TO_STR(public), + BOOL_TO_STR(readonly), BOOL_TO_STR(execute), + BOOL_TO_STR(coherent), BOOL_TO_STR(no_sub)); + + return __alloc_memory_of_gpu(kgd, va, size, vm, mem, + offset, domain, + alloc_flag, sg, + aql_queue, readonly, execute, + coherent, no_sub, userptr); } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem) + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) { - struct amdkfd_process_info *process_info = mem->process_info; - unsigned long bo_size = mem->bo->tbo.mem.size; + struct amdgpu_device *adev; struct kfd_bo_va_list *entry, *tmp; struct bo_vm_reservation_context ctx; + int ret = 0; struct ttm_validate_buffer *bo_list_entry; - int ret; + struct amdkfd_process_info *process_info; + unsigned long bo_size; + + adev = get_amdgpu_device(kgd); + process_info = ((struct amdkfd_vm *)vm)->process_info; + + bo_size = mem->bo->tbo.mem.size; mutex_lock(&mem->lock); if (mem->mapped_to_gpu_memory > 0) { - pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", - mem->va, bo_size); + pr_debug("BO VA 0x%llx size 0x%lx is already mapped to vm %p.\n", + mem->va, bo_size, vm); mutex_unlock(&mem->lock); return -EBUSY; } @@ -1356,8 +1172,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( kvfree(mem->user_pages); } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); - if (unlikely(ret)) + ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx); + if (unlikely(ret != 0)) return ret; /* The eviction fence should be removed by the last unmap. @@ -1371,9 +1187,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mem->va + bo_size * (1 + mem->aql_queue)); /* Remove from VM internal data structures */ - list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) + list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) { remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, entry, bo_size); + } ret = unreserve_bo_and_vms(&ctx, false, false); @@ -1398,8 +1215,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_device *adev; int ret; struct amdgpu_bo *bo; uint32_t domain; @@ -1407,14 +1223,11 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct bo_vm_reservation_context ctx; struct kfd_bo_va_list *bo_va_entry = NULL; struct kfd_bo_va_list *bo_va_entry_aql = NULL; + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; unsigned long bo_size; - bool is_invalid_userptr = false; + bool is_invalid_userptr; - bo = mem->bo; - if (!bo) { - pr_err("Invalid BO when mapping memory to GPU\n"); - return -EINVAL; - } + adev = get_amdgpu_device(kgd); /* Make sure restore is not running concurrently. Since we * don't map invalid userptr BOs, we rely on the next restore @@ -1426,14 +1239,20 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( * sure that the MMU notifier is no longer running * concurrently and the queues are actually stopped */ - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - down_write(¤t->mm->mmap_sem); - is_invalid_userptr = atomic_read(&mem->invalid); - up_write(¤t->mm->mmap_sem); - } + down_read(¤t->mm->mmap_sem); + is_invalid_userptr = atomic_read(&mem->invalid); + up_read(¤t->mm->mmap_sem); mutex_lock(&mem->lock); + bo = mem->bo; + + if (!bo) { + pr_err("Invalid BO when mapping memory to GPU\n"); + ret = -EINVAL; + goto out; + } + domain = mem->domain; bo_size = bo->tbo.mem.size; @@ -1443,7 +1262,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( vm, domain_string(domain)); ret = reserve_bo_and_vm(mem, vm, &ctx); - if (unlikely(ret)) + if (unlikely(ret != 0)) goto out; /* Userptr can be marked as "not invalid", but not actually be @@ -1454,20 +1273,20 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) is_invalid_userptr = true; - if (check_if_add_bo_to_vm(avm, mem)) { - ret = add_bo_to_vm(adev, mem, avm, false, + if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, &bo_va_entry); - if (ret) + if (ret != 0) goto add_bo_to_vm_failed; if (mem->aql_queue) { - ret = add_bo_to_vm(adev, mem, avm, + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, true, &bo_va_entry_aql); - if (ret) + if (ret != 0) goto add_bo_to_vm_failed_aql; } } else { - ret = vm_validate_pt_pd_bos(avm); - if (unlikely(ret)) + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret != 0)) goto add_bo_to_vm_failed; } @@ -1492,7 +1311,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( ret = map_bo_to_gpuvm(adev, entry, ctx.sync, is_invalid_userptr); - if (ret) { + if (ret != 0) { pr_err("Failed to map radeon bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; } @@ -1510,9 +1329,15 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( } } - if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) + if (mem->domain & AMDGPU_GEM_DOMAIN_DGMA) { + ret = amdgpu_bo_pin(bo, mem->domain, NULL); + if (ret != 0) { + pr_err("Unable to pin DGMA BO\n"); + goto map_bo_to_gpuvm_failed; + } + } else if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) amdgpu_bo_fence(bo, - &avm->process_info->eviction_fence->base, + &kfd_vm->process_info->eviction_fence->base, true); ret = unreserve_bo_and_vms(&ctx, false, false); @@ -1533,30 +1358,200 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( return ret; } +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef) +{ + int ret; + struct amdkfd_vm *new_vm; + struct amdkfd_process_info *info; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); + if (new_vm == NULL) + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ + ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); + if (ret != 0) { + pr_err("Failed init vm ret %d\n", ret); + /* Undo everything related to the new VM context */ + goto vm_init_fail; + } + new_vm->adev = adev; + + if (!*process_info) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + pr_err("Failed to create amdkfd_process_info"); + ret = -ENOMEM; + goto alloc_process_info_fail; + } + + mutex_init(&info->lock); + INIT_LIST_HEAD(&info->vm_list_head); + INIT_LIST_HEAD(&info->kfd_bo_list); + INIT_LIST_HEAD(&info->userptr_valid_list); + INIT_LIST_HEAD(&info->userptr_inval_list); + + info->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + current->mm); + if (info->eviction_fence == NULL) { + pr_err("Failed to create eviction fence\n"); + goto create_evict_fence_fail; + } + + info->pid = get_task_pid(current->group_leader, + PIDTYPE_PID); + atomic_set(&info->evicted_bos, 0); + INIT_DELAYED_WORK(&info->work, + amdgpu_amdkfd_restore_userptr_worker); + + *process_info = info; + *ef = dma_fence_get(&info->eviction_fence->base); + } + + new_vm->process_info = *process_info; + + mutex_lock(&new_vm->process_info->lock); + list_add_tail(&new_vm->vm_list_node, + &(new_vm->process_info->vm_list_head)); + new_vm->process_info->n_vms++; + mutex_unlock(&new_vm->process_info->lock); + + *vm = (void *) new_vm; + + pr_debug("Created process vm %p\n", *vm); + + return ret; + +create_evict_fence_fail: + kfree(info); +alloc_process_info_fail: + amdgpu_vm_fini(adev, &new_vm->base); +vm_init_fail: + kfree(new_vm); + return ret; + +} + +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; + struct amdgpu_vm *avm = &kfd_vm->base; + struct amdgpu_bo *pd; + struct amdkfd_process_info *process_info; + + if (WARN_ON(!kgd || !vm)) + return; + + pr_debug("Destroying process vm %p\n", vm); + /* Release eviction fence from PD */ + pd = avm->root.base.bo; + amdgpu_bo_reserve(pd, false); + amdgpu_bo_fence(pd, NULL, false); + amdgpu_bo_unreserve(pd); + + process_info = kfd_vm->process_info; + + mutex_lock(&process_info->lock); + process_info->n_vms--; + list_del(&kfd_vm->vm_list_node); + mutex_unlock(&process_info->lock); + + /* Release per-process resources */ + if (!process_info->n_vms) { + WARN_ON(!list_empty(&process_info->kfd_bo_list)); + WARN_ON(!list_empty(&process_info->userptr_valid_list)); + WARN_ON(!list_empty(&process_info->userptr_inval_list)); + + dma_fence_put(&process_info->eviction_fence->base); + cancel_delayed_work_sync(&process_info->work); + put_pid(process_info->pid); + kfree(process_info); + } + + /* Release the VM context */ + amdgpu_vm_fini(adev, avm); + kfree(vm); +} + +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +{ + struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; + + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; +} + +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *mem) +{ + struct amdgpu_device *adev; + + adev = (struct amdgpu_device *) kgd; + if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + *mem = *adev->gmc.vm_fault_info; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + } + return 0; +} + +static bool is_mem_on_local_device(struct kgd_dev *kgd, + struct list_head *bo_va_list, void *vm) +{ + struct kfd_bo_va_list *entry; + + list_for_each_entry(entry, bo_va_list, bo_list) { + if (entry->kgd_dev == kgd && entry->bo_va->base.vm == vm) + return true; + } + + return false; +} + int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdkfd_process_info *process_info = - ((struct amdgpu_vm *)vm)->process_info; - unsigned long bo_size = mem->bo->tbo.mem.size; struct kfd_bo_va_list *entry; + struct amdgpu_device *adev; + unsigned int mapped_before; + int ret = 0; struct bo_vm_reservation_context ctx; - int ret; + struct amdkfd_process_info *process_info; + unsigned long bo_size; + + adev = (struct amdgpu_device *) kgd; + process_info = ((struct amdkfd_vm *)vm)->process_info; + + bo_size = mem->bo->tbo.mem.size; mutex_lock(&mem->lock); - ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); - if (unlikely(ret)) + /* + * Make sure that this BO mapped on KGD before unmappping it + */ + if (!is_mem_on_local_device(kgd, &mem->bo_va_list, vm)) { + ret = -EINVAL; goto out; - /* If no VMs were reserved, it means the BO wasn't actually mapped */ - if (ctx.n_vms == 0) { + } + + if (mem->mapped_to_gpu_memory == 0) { + pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n", + mem->va, bo_size, vm); ret = -EINVAL; - goto unreserve_out; + goto out; } + mapped_before = mem->mapped_to_gpu_memory; - ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); - if (unlikely(ret)) + ret = reserve_bo_and_cond_vms(mem, vm, VA_MAPPED, &ctx); + if (unlikely(ret != 0)) + goto out; + + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret != 0)) goto unreserve_out; pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", @@ -1589,11 +1584,20 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( /* If BO is unmapped from all VMs, unfence it. It can be evicted if * required. */ - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) - amdgpu_amdkfd_remove_eviction_fence(mem->bo, + if (mem->mapped_to_gpu_memory == 0) { + if (mem->domain & AMDGPU_GEM_DOMAIN_DGMA) + amdgpu_bo_unpin(mem->bo); + else if (!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) + amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence, - NULL, NULL); + NULL, NULL); + } + + if (mapped_before == mem->mapped_to_gpu_memory) { + pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n", + mem->va, bo_size, vm); + ret = -EINVAL; + } unreserve_out: unreserve_bo_and_vms(&ctx, false, false); @@ -1602,28 +1606,8 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( return ret; } -int amdgpu_amdkfd_gpuvm_sync_memory( - struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) -{ - struct amdgpu_sync sync; - int ret; - struct amdgpu_device *adev; - - adev = get_amdgpu_device(kgd); - - amdgpu_sync_create(&sync); - - mutex_lock(&mem->lock); - amdgpu_sync_clone(adev , &mem->sync, &sync); - mutex_unlock(&mem->lock); - - ret = amdgpu_sync_wait(&sync, intr); - amdgpu_sync_free(&sync); - return ret; -} - int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, - struct kgd_mem *mem, void **kptr, uint64_t *size) + struct kgd_mem *mem, void **kptr) { int ret; struct amdgpu_bo *bo = mem->bo; @@ -1660,11 +1644,10 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, bo, mem->process_info->eviction_fence, NULL, NULL); list_del_init(&mem->validate_list.head); - if (size) - *size = amdgpu_bo_size(bo); - amdgpu_bo_unreserve(bo); + mem->kptr = *kptr; + mutex_unlock(&mem->process_info->lock); return 0; @@ -1678,27 +1661,13 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, return ret; } -int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, - struct kfd_vm_fault_info *mem) -{ - struct amdgpu_device *adev; - - adev = (struct amdgpu_device *) kgd; - if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { - *mem = *adev->gmc.vm_fault_info; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 0); - } - return 0; -} - static int pin_bo_wo_map(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; int ret = 0; ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret)) + if (unlikely(ret != 0)) return ret; ret = amdgpu_bo_pin(bo, mem->domain, NULL); @@ -1713,7 +1682,7 @@ static void unpin_bo_wo_map(struct kgd_mem *mem) int ret = 0; ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret)) + if (unlikely(ret != 0)) return; amdgpu_bo_unpin(bo); @@ -1758,8 +1727,7 @@ static int get_sg_table(struct amdgpu_device *adev, goto out; if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) { - bus_addr = amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start - + adev->gmc.aper_base + offset; + bus_addr = bo->tbo.offset + adev->gmc.aper_base + offset; for_each_sg(sg->sgl, s, sg->orig_nents, i) { uint64_t chunk_size, length; @@ -1814,7 +1782,7 @@ int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd, struct amdgpu_device *adev; ret = pin_bo_wo_map(mem); - if (unlikely(ret)) + if (unlikely(ret != 0)) return ret; adev = get_amdgpu_device(kgd); @@ -1844,7 +1812,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct drm_gem_object *obj; struct amdgpu_bo *bo; - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; if (dma_buf->ops != &drm_gem_prime_dmabuf_ops) /* Can't handle non-graphics buffers */ @@ -1857,12 +1825,13 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, bo = gem_to_amdgpu_bo(obj); if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT))) + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_DGMA))) /* Only VRAM and GTT BOs are supported */ return -EINVAL; *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) + if (*mem == NULL) return -ENOMEM; if (size) @@ -1879,11 +1848,15 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, (*mem)->bo = amdgpu_bo_ref(bo); (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) + (*mem)->domain = AMDGPU_GEM_DOMAIN_VRAM; + else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) + (*mem)->domain = AMDGPU_GEM_DOMAIN_GTT; + else + (*mem)->domain = AMDGPU_GEM_DOMAIN_DGMA; (*mem)->mapped_to_gpu_memory = 0; - (*mem)->process_info = avm->process_info; - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); + (*mem)->process_info = kfd_vm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, false); amdgpu_sync_create(&(*mem)->sync); return 0; @@ -1913,6 +1886,37 @@ int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm, return 0; } +static int process_validate_vms(struct amdkfd_process_info *process_info) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_validate_pt_pd_bos(peer_vm); + if (ret) + return ret; + } + + return 0; +} + +static int process_update_pds(struct amdkfd_process_info *process_info, + struct amdgpu_sync *sync) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_update_pds(&peer_vm->base, sync); + if (ret) + return ret; + } + + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it @@ -1936,7 +1940,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, if (evicted_bos == 1) { /* First eviction, stop the queues */ r = kgd2kfd->quiesce_mm(NULL, mm); - if (r) + if (r != 0) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->work, 1); } @@ -1955,7 +1959,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, { struct kgd_mem *mem, *tmp_mem; struct amdgpu_bo *bo; - struct ttm_operation_ctx ctx = { false, false }; int invalid, ret; /* Move all invalidated BOs to the userptr_inval_list and @@ -2002,8 +2005,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, if (!mem->user_pages) { mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); if (!mem->user_pages) { pr_err("%s: Failed to allocate pages array\n", __func__); @@ -2034,7 +2037,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) return -EAGAIN; } - return 0; } @@ -2051,10 +2053,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) struct ww_acquire_ctx ticket; struct amdgpu_sync sync; - struct amdgpu_vm *peer_vm; + struct amdkfd_vm *peer_vm; struct kgd_mem *mem, *tmp_mem; struct amdgpu_bo *bo; - struct ttm_operation_ctx ctx = { false, false }; int i, ret; pd_bo_list_entries = kcalloc(process_info->n_vms, @@ -2072,7 +2073,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) i = 0; list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) - amdgpu_vm_get_pd_bo(peer_vm, &resv_list, + amdgpu_vm_get_pd_bo(&peer_vm->base, &resv_list, &pd_bo_list_entries[i++]); /* Add the userptr_inval_list entries to resv_list */ list_for_each_entry(mem, &process_info->userptr_inval_list, @@ -2096,7 +2097,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) - amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, + amdgpu_amdkfd_remove_eviction_fence(peer_vm->base.root.base.bo, process_info->eviction_fence, NULL, NULL); @@ -2162,7 +2163,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) unreserve_out: list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) - amdgpu_bo_fence(peer_vm->root.base.bo, + amdgpu_bo_fence(peer_vm->base.root.base.bo, &process_info->eviction_fence->base, true); ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); @@ -2265,7 +2266,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) { struct amdgpu_bo_list_entry *pd_bo_list; struct amdkfd_process_info *process_info = info; - struct amdgpu_vm *peer_vm; + struct amdkfd_vm *peer_vm; struct kgd_mem *mem; struct bo_vm_reservation_context ctx; struct amdgpu_amdkfd_fence *new_fence; @@ -2280,14 +2281,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pd_bo_list = kcalloc(process_info->n_vms, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL); - if (!pd_bo_list) + if (pd_bo_list == NULL) return -ENOMEM; i = 0; mutex_lock(&process_info->lock); list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) - amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); + amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, + &pd_bo_list[i++]); /* Reserve all BOs and page tables/directory. Add all BOs from * kfd_bo_list to ctx.list @@ -2308,16 +2310,20 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } amdgpu_sync_create(&sync_obj); + ctx.sync = &sync_obj; /* Validate PDs and PTs */ ret = process_validate_vms(process_info); if (ret) goto validate_map_fail; - ret = process_sync_pds_resv(process_info, &sync_obj); - if (ret) { - pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); - goto validate_map_fail; + /* Wait for PD/PTs validate to finish */ + /* FIXME: I think this isn't needed */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + ttm_bo_wait(&bo->tbo, false, false); } /* Validate BOs and map them to GPUVM (update VM page tables). */ @@ -2333,17 +2339,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); - if (ret) { - pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); - goto validate_map_fail; - } + list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { ret = update_gpuvm_pte((struct amdgpu_device *) bo_va_entry->kgd_dev, bo_va_entry, - &sync_obj); + ctx.sync); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; @@ -2352,14 +2354,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } /* Update page directories */ - ret = process_update_pds(process_info, &sync_obj); + ret = process_update_pds(process_info, ctx.sync); if (ret) { pr_debug("Memory eviction: update PDs failed. Try again\n"); goto validate_map_fail; } - /* Wait for validate and PT updates to finish */ - amdgpu_sync_wait(&sync_obj, false); + amdgpu_sync_wait(ctx.sync, false); /* Release old eviction fence and create new one, because fence only * goes from unsignaled to signaled, fence cannot be reused. @@ -2377,7 +2378,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(&new_fence->base); - /* Attach new eviction fence to all BOs */ + /* Wait for validate to finish and attach new eviction fence */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + ttm_bo_wait(&mem->bo->tbo, false, false); list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list.head) amdgpu_bo_fence(mem->bo, @@ -2386,7 +2390,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { - struct amdgpu_bo *bo = peer_vm->root.base.bo; + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7ac07a3..6414b50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -132,7 +132,6 @@ int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ -int amdgpu_emu_mode = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -291,9 +290,6 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); -MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); -module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); - #ifdef CONFIG_DRM_AMDGPU_SI int amdgpu_si_support = 1; @@ -573,7 +569,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, { struct drm_device *dev; unsigned long flags = ent->driver_data; - int ret, retry = 0; + int ret; bool supports_atomic = false; if (!amdgpu_virtual_display && @@ -618,14 +614,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); -retry_init: ret = drm_dev_register(dev, ent->driver_data); - if (ret == -EAGAIN && ++retry <= 3) { - DRM_INFO("retry init %d\n", retry); - /* Don't request EX mode too frequently which is attacking */ - msleep(5000); - goto retry_init; - } else if (ret) + if (ret) goto err_pci; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ef9a24d..00477a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -32,7 +32,6 @@ #include #include "amdgpu.h" #include "amdgpu_trace.h" -#include "amdgpu_amdkfd.h" /* * GPUVM @@ -2336,22 +2335,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, adev->vm_manager.fragment_size); } -static void amdgpu_inc_compute_vms(struct amdgpu_device *adev) -{ - /* Temporary use only the first VM manager */ - unsigned int vmhub = 0; /*ring->funcs->vmhub;*/ - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - - mutex_lock(&id_mgr->lock); - if ((adev->vm_manager.n_compute_vms++ == 0) && - (!amdgpu_sriov_vf(adev))) { - /* First Compute VM: enable compute power profile */ - if (adev->powerplay.pp_funcs->switch_power_profile) - amdgpu_dpm_switch_power_profile(adev,PP_SMC_POWER_PROFILE_COMPUTE); - } - mutex_unlock(&id_mgr->lock); -} - /** * amdgpu_vm_init - initialize a vm instance * @@ -2456,8 +2439,21 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->fault_credit = 16; vm->vm_context = vm_context; - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) - amdgpu_inc_compute_vms(adev); + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { + struct amdgpu_vmid_mgr *id_mgr = + &adev->vm_manager.id_mgr[AMDGPU_GFXHUB]; + + mutex_lock(&id_mgr->lock); + + if ((adev->vm_manager.n_compute_vms++ == 0) && + (!amdgpu_sriov_vf(adev))) { + /* First Compute VM: enable compute power profile */ + if (adev->powerplay.pp_funcs->switch_power_profile) + amdgpu_dpm_switch_power_profile(adev, + AMD_PP_COMPUTE_PROFILE); + } + mutex_unlock(&id_mgr->lock); + } return 0; @@ -2476,86 +2472,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /** - * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM - * - * This only works on GFX VMs that don't have any BOs added and no - * page tables allocated yet. - * - * Changes the following VM parameters: - * - vm_context - * - use_cpu_for_update - * - pte_supports_ats - * - pasid (old PASID is released, because compute manages its own PASIDs) - * - * Reinitializes the page directory to reflect the changed ATS - * setting. May also switch to the compute power profile if this is - * the first compute VM. May leave behind an unused shadow BO for the - * page directory when switching from SDMA updates to CPU updates. - * - * Returns 0 for success, -errno for errors. - */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) -{ - bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); - int r; - - r = amdgpu_bo_reserve(vm->root.base.bo, true); - if (r) - return r; - - /* Sanity checks */ - if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { - /* Can happen if ioctl is interrupted by a signal after - * this function already completed. Just return success. - */ - r = 0; - goto error; - } - if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { - r = -EINVAL; - goto error; - } - - /* Check if PD needs to be reinitialized and do it before - * changing any other state, in case it fails. - */ - if (pte_support_ats != vm->pte_support_ats) { - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, - adev->vm_manager.root_level, - pte_support_ats); - if (r) - goto error; - } - - /* Update VM state */ - vm->vm_context = AMDGPU_VM_CONTEXT_COMPUTE; - vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & - AMDGPU_VM_USE_CPU_FOR_COMPUTE); - vm->pte_support_ats = pte_support_ats; - DRM_DEBUG_DRIVER("VM update mode is %s\n", - vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), - "CPU update of VM recommended only for large BAR system\n"); - - if (vm->pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - - vm->pasid = 0; - } - - /* Count the new compute VM */ - amdgpu_inc_compute_vms(adev); - -error: - amdgpu_bo_unreserve(vm->root.base.bo); - return r; -} - -/** * amdgpu_vm_free_levels - free PD/PT levels * * @adev: amdgpu device structure @@ -2616,7 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { struct amdgpu_vmid_mgr *id_mgr = - &adev->vm_manager.id_mgr[AMDGPU_GFXHUB]; + &adev->vm_manager.id_mgr[AMDGPU_GFXHUB]; + mutex_lock(&id_mgr->lock); WARN(adev->vm_manager.n_compute_vms == 0, "Unbalanced number of Compute VMs"); @@ -2737,9 +2654,9 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->vm_manager.vm_update_mode = 0; #endif + adev->vm_manager.n_compute_vms = 0; idr_init(&adev->vm_manager.pasid_idr); spin_lock_init(&adev->vm_manager.pasid_lock); - adev->vm_manager.n_compute_vms = 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index beba1a5..beee443 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -199,6 +199,9 @@ struct amdgpu_vm { /* dedicated to vm */ struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; + /* Whether this is a Compute or GFX Context */ + int vm_context; + /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; @@ -210,18 +213,6 @@ struct amdgpu_vm { /* Limit non-retry fault storms */ unsigned int fault_credit; - - /* Whether this is a Compute or GFX Context */ - int vm_context; - - /* Points to the KFD process VM info */ - struct amdkfd_process_info *process_info; - - /* List node in amdkfd_process_info.vm_list_head */ - struct list_head vm_list_node; - - /* Valid while the PD is reserved or fenced */ - uint64_t pd_phys_addr; }; struct amdgpu_vm_manager { @@ -254,22 +245,20 @@ struct amdgpu_vm_manager { * BIT1[= 0] Compute updated by SDMA [= 1] by CPU */ int vm_update_mode; + /* Number of Compute VMs, used for detecting Compute activity */ + unsigned n_compute_vms; /* PASID to VM mapping, will be used in interrupt context to * look up VM of a page fault */ struct idr pasid_idr; spinlock_t pasid_lock; - - /* Number of Compute VMs, used for detecting Compute activity */ - unsigned n_compute_vms; }; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid); -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, unsigned int pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c old mode 100755 new mode 100644 index 52f456e..47dfce9 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -366,14 +366,14 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, * 32 and 64-bit requests are possible and must be * supported. */ - - if (pci_enable_atomic_ops_to_root(pdev) < 0) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics", - pdev->vendor, pdev->device); - return NULL; - } - + if (pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics", + pdev->vendor, pdev->device); + return NULL; + } } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index e164abb..4dcc7d0 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -29,11 +29,8 @@ #define KGD_KFD_INTERFACE_H_INCLUDED #include -#include -#include -#include -#include #include +#include struct pci_dev; @@ -200,6 +197,8 @@ struct tile_config { * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp * scheduling mode. Only used for no cp scheduling mode. * + * @init_pipeline: Initialized the compute pipelines. + * * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp * sceduling mode. * @@ -227,6 +226,9 @@ struct tile_config { * * @get_fw_version: Returns FW versions from the header * + * @set_num_of_requests: Sets number of Peripheral Page Request (PPR) sent to + * IOMMU when address translation failed + * * @get_cu_info: Retrieves activated cu info * * @get_dmabuf_info: Returns information about a dmabuf if it was @@ -261,15 +263,13 @@ struct kfd2kgd_calls { void(*get_local_mem_info)(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info); + uint64_t (*get_vmem_size)(struct kgd_dev *kgd); uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); int (*create_process_vm)(struct kgd_dev *kgd, void **vm, void **process_info, struct dma_fence **ef); - int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp, - void **vm, void **process_info, - struct dma_fence **ef); void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); int (*create_process_gpumem)(struct kgd_dev *kgd, uint64_t va, size_t size, void *vm, struct kgd_mem **mem); @@ -277,6 +277,8 @@ struct kfd2kgd_calls { uint32_t (*get_process_page_dir)(void *vm); + int (*open_graphic_handle)(struct kgd_dev *kgd, uint64_t va, void *vm, int fd, uint32_t handle, struct kgd_mem **mem); + int (*alloc_pasid)(unsigned int bits); void (*free_pasid)(unsigned int pasid); @@ -288,6 +290,9 @@ struct kfd2kgd_calls { int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); + int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); + int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); @@ -337,6 +342,8 @@ struct kfd2kgd_calls { uint16_t (*get_atc_vmid_pasid_mapping_pasid)( struct kgd_dev *kgd, uint8_t vmid); + void (*write_vmid_invalidate_request)(struct kgd_dev *kgd, + uint8_t vmid); uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); @@ -348,7 +355,8 @@ struct kfd2kgd_calls { uint64_t size, void *vm, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); - int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem); + int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm); int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, @@ -357,6 +365,8 @@ struct kfd2kgd_calls { uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); + void (*set_num_of_requests)(struct kgd_dev *kgd, + uint8_t num_of_requests); int (*alloc_memory_of_scratch)(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable, @@ -364,7 +374,7 @@ struct kfd2kgd_calls { void (*get_cu_info)(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, - struct kgd_mem *mem, void **kptr, uint64_t *size); + struct kgd_mem *mem, void **kptr); void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid, uint32_t page_table_base); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c index 416abeb..44de087 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c @@ -166,10 +166,10 @@ void cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) cz_dpm_powerup_uvd(hwmgr); cgs_set_clockgating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + AMD_CG_STATE_UNGATE); cz_dpm_update_uvd_dpm(hwmgr, false); } @@ -197,11 +197,11 @@ void cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) cgs_set_clockgating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); cgs_set_powergating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + AMD_CG_STATE_UNGATE); cz_dpm_update_vce_dpm(hwmgr); cz_enable_disable_vce_dpm(hwmgr, true); } diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index ec0574e..26e0abc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -208,7 +208,7 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_IOC_WAIT_RESULT_TIMEOUT 1 #define KFD_IOC_WAIT_RESULT_FAIL 2 -#define KFD_SIGNAL_EVENT_LIMIT 4096 +#define KFD_SIGNAL_EVENT_LIMIT (4096 + 512) struct kfd_ioctl_create_event_args { uint64_t event_page_offset; /* from KFD */ @@ -278,11 +278,6 @@ struct kfd_ioctl_alloc_memory_of_scratch_args { uint32_t pad; }; -struct kfd_ioctl_acquire_vm_args { - uint32_t drm_fd; /* to KFD */ - uint32_t gpu_id; /* to KFD */ -}; - /* Allocation flags: memory types */ #define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) #define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) @@ -366,22 +361,22 @@ struct kfd_ioctl_ipc_import_handle_args { struct kfd_ioctl_get_tile_config_args { /* to KFD: pointer to tile array */ - __u64 tile_config_ptr; + uint64_t tile_config_ptr; /* to KFD: pointer to macro tile array */ - __u64 macro_tile_config_ptr; + uint64_t macro_tile_config_ptr; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - __u32 num_tile_configs; + uint32_t num_tile_configs; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - __u32 num_macro_tile_configs; + uint32_t num_macro_tile_configs; - __u32 gpu_id; /* to KFD */ - __u32 gb_addr_config; /* from KFD */ - __u32 num_banks; /* from KFD */ - __u32 num_ranks; /* from KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t gb_addr_config; /* from KFD */ + uint32_t num_banks; /* from KFD */ + uint32_t num_ranks; /* from KFD */ /* struct size can be extended later if needed * without breaking ABI compatibility */ @@ -522,10 +517,7 @@ struct kfd_ioctl_cross_memory_copy_args { #define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ AMDKFD_IOWR(0x20, struct kfd_ioctl_get_queue_wave_state_args) -#define AMDKFD_IOC_ACQUIRE_VM \ - AMDKFD_IOW(0x21, struct kfd_ioctl_acquire_vm_args) - #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x21 #endif -- 2.7.4