diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2932-drm-amdkfd-Implement-kfd2kgd_calls-for-Arcturus.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2932-drm-amdkfd-Implement-kfd2kgd_calls-for-Arcturus.patch | 829 |
1 files changed, 829 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2932-drm-amdkfd-Implement-kfd2kgd_calls-for-Arcturus.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2932-drm-amdkfd-Implement-kfd2kgd_calls-for-Arcturus.patch new file mode 100644 index 00000000..7062e3c1 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2932-drm-amdkfd-Implement-kfd2kgd_calls-for-Arcturus.patch @@ -0,0 +1,829 @@ +From c7d9d27b4c5fafada1b631f791231772bff40ecf Mon Sep 17 00:00:00 2001 +From: Oak Zeng <Oak.Zeng@amd.com> +Date: Tue, 9 Jul 2019 09:59:30 -0500 +Subject: [PATCH 2932/2940] drm/amdkfd: Implement kfd2kgd_calls for Arcturus + +Arcturus shares most of the kfd2kgd_calls with gfx9. But due to +SDMA register address change, it can't share SDMA related functions. +Export gfx9 kfd2kgd_calls and implement SDMA related functions +for Arcturus. + +Change-Id: I509342712ea3854587b9b0048b3cc55ddfaba300 +Signed-off-by: Oak Zeng <Oak.Zeng@amd.com> +Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + + .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 324 ++++++++++++++++++ + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 184 +++------- + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 83 +++++ + 6 files changed, 466 insertions(+), 133 deletions(-) + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h + +diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile +index 38a0a0e8677d..34de2df3ad9b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/Makefile ++++ b/drivers/gpu/drm/amd/amdgpu/Makefile +@@ -167,7 +167,8 @@ amdgpu-y += \ + amdgpu_amdkfd_fence.o \ + amdgpu_amdkfd_gpuvm.o \ + amdgpu_amdkfd_gfx_v8.o \ +- amdgpu_amdkfd_gfx_v9.o ++ amdgpu_amdkfd_gfx_v9.o \ ++ amdgpu_amdkfd_arcturus.o + + ifneq ($(CONFIG_DRM_AMDGPU_CIK),) + amdgpu-y += amdgpu_amdkfd_gfx_v7.o +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +index fed37feaff34..bb634e0985bd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -92,9 +92,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) + case CHIP_VEGA12: + case CHIP_VEGA20: + case CHIP_RAVEN: +- case CHIP_ARCTURUS: + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; ++ case CHIP_ARCTURUS: ++ kfd2kgd = amdgpu_amdkfd_arcturus_get_functions(); ++ break; + default: + dev_info(adev->dev, "kfd not supported on this ASIC\n"); + return; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index bdc550dcc2ca..cec816566833 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -141,6 +141,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); ++struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void); + int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, + uint64_t src_offset, struct kgd_mem *dst_mem, + uint64_t dest_offset, uint64_t size, struct dma_fence **f, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +new file mode 100644 +index 000000000000..4d9101834ba7 +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +@@ -0,0 +1,324 @@ ++/* ++ * Copyright 2019 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#undef pr_fmt ++#define pr_fmt(fmt) "kfd2kgd: " fmt ++ ++#include <linux/module.h> ++#include <linux/fdtable.h> ++#include <linux/uaccess.h> ++#include <linux/mmu_context.h> ++#include <linux/firmware.h> ++#include <drm/drmP.h> ++#include "amdgpu.h" ++#include "amdgpu_amdkfd.h" ++#include "sdma0/sdma0_4_2_2_offset.h" ++#include "sdma0/sdma0_4_2_2_sh_mask.h" ++#include "sdma1/sdma1_4_2_2_offset.h" ++#include "sdma1/sdma1_4_2_2_sh_mask.h" ++#include "sdma2/sdma2_4_2_2_offset.h" ++#include "sdma2/sdma2_4_2_2_sh_mask.h" ++#include "sdma3/sdma3_4_2_2_offset.h" ++#include "sdma3/sdma3_4_2_2_sh_mask.h" ++#include "sdma4/sdma4_4_2_2_offset.h" ++#include "sdma4/sdma4_4_2_2_sh_mask.h" ++#include "sdma5/sdma5_4_2_2_offset.h" ++#include "sdma5/sdma5_4_2_2_sh_mask.h" ++#include "sdma6/sdma6_4_2_2_offset.h" ++#include "sdma6/sdma6_4_2_2_sh_mask.h" ++#include "sdma7/sdma7_4_2_2_offset.h" ++#include "sdma7/sdma7_4_2_2_sh_mask.h" ++#include "v9_structs.h" ++#include "soc15.h" ++#include "soc15d.h" ++#include "amdgpu_amdkfd_gfx_v9.h" ++ ++#define HQD_N_REGS 56 ++#define DUMP_REG(addr) do { \ ++ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ ++ break; \ ++ (*dump)[i][0] = (addr) << 2; \ ++ (*dump)[i++][1] = RREG32(addr); \ ++ } while (0) ++ ++static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) ++{ ++ return (struct amdgpu_device *)kgd; ++} ++ ++static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) ++{ ++ return (struct v9_sdma_mqd *)mqd; ++} ++ ++static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, ++ unsigned int engine_id, ++ unsigned int queue_id) ++{ ++ uint32_t base[8] = { ++ SOC15_REG_OFFSET(SDMA0, 0, ++ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, ++ SOC15_REG_OFFSET(SDMA1, 0, ++ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, ++ SOC15_REG_OFFSET(SDMA2, 0, ++ mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, ++ SOC15_REG_OFFSET(SDMA3, 0, ++ mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, ++ SOC15_REG_OFFSET(SDMA4, 0, ++ mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, ++ SOC15_REG_OFFSET(SDMA5, 0, ++ mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, ++ SOC15_REG_OFFSET(SDMA6, 0, ++ mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, ++ SOC15_REG_OFFSET(SDMA7, 0, ++ mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL ++ }; ++ uint32_t retval; ++ ++ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - ++ mmSDMA0_RLC0_RB_CNTL); ++ ++ pr_debug("sdma base address: 0x%x\n", retval); ++ ++ return retval; ++} ++ ++static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, ++ u32 instance, u32 offset) ++{ ++ switch (instance) { ++ case 0: ++ return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); ++ case 1: ++ return (adev->reg_offset[SDMA1_HWIP][0][1] + offset); ++ case 2: ++ return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); ++ case 3: ++ return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); ++ case 4: ++ return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); ++ case 5: ++ return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); ++ case 6: ++ return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); ++ case 7: ++ return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); ++ default: ++ break; ++ } ++ return 0; ++} ++ ++static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, ++ uint32_t __user *wptr, struct mm_struct *mm) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct v9_sdma_mqd *m; ++ uint32_t sdma_base_addr, sdmax_gfx_context_cntl; ++ unsigned long end_jiffies; ++ uint32_t data; ++ uint64_t data64; ++ uint64_t __user *wptr64 = (uint64_t __user *)wptr; ++ ++ m = get_sdma_mqd(mqd); ++ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, ++ m->sdma_queue_id); ++ sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev, ++ m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL); ++ ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, ++ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); ++ ++ end_jiffies = msecs_to_jiffies(2000) + jiffies; ++ while (true) { ++ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); ++ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) ++ break; ++ if (time_after(jiffies, end_jiffies)) ++ return -ETIME; ++ usleep_range(500, 1000); ++ } ++ data = RREG32(sdmax_gfx_context_cntl); ++ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, ++ RESUME_CTX, 0); ++ WREG32(sdmax_gfx_context_cntl, data); ++ ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, ++ m->sdmax_rlcx_doorbell_offset); ++ ++ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ++ ENABLE, 1); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, ++ m->sdmax_rlcx_rb_rptr_hi); ++ ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); ++ if (read_user_wptr(mm, wptr64, data64)) { ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, ++ lower_32_bits(data64)); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, ++ upper_32_bits(data64)); ++ } else { ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, ++ m->sdmax_rlcx_rb_rptr); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, ++ m->sdmax_rlcx_rb_rptr_hi); ++ } ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); ++ ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, ++ m->sdmax_rlcx_rb_base_hi); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, ++ m->sdmax_rlcx_rb_rptr_addr_lo); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, ++ m->sdmax_rlcx_rb_rptr_addr_hi); ++ ++ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, ++ RB_ENABLE, 1); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); ++ ++ return 0; ++} ++ ++static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, ++ uint32_t engine_id, uint32_t queue_id, ++ uint32_t (**dump)[2], uint32_t *n_regs) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); ++ uint32_t i = 0, reg; ++#undef HQD_N_REGS ++#define HQD_N_REGS (19+6+7+10) ++ ++ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); ++ if (*dump == NULL) ++ return -ENOMEM; ++ ++ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) ++ DUMP_REG(sdma_base_addr + reg); ++ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) ++ DUMP_REG(sdma_base_addr + reg); ++ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; ++ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) ++ DUMP_REG(sdma_base_addr + reg); ++ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; ++ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) ++ DUMP_REG(sdma_base_addr + reg); ++ ++ WARN_ON_ONCE(i != HQD_N_REGS); ++ *n_regs = i; ++ ++ return 0; ++} ++ ++static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct v9_sdma_mqd *m; ++ uint32_t sdma_base_addr; ++ uint32_t sdma_rlc_rb_cntl; ++ ++ m = get_sdma_mqd(mqd); ++ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, ++ m->sdma_queue_id); ++ ++ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); ++ ++ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) ++ return true; ++ ++ return false; ++} ++ ++static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, ++ unsigned int utimeout) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct v9_sdma_mqd *m; ++ uint32_t sdma_base_addr; ++ uint32_t temp; ++ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; ++ ++ m = get_sdma_mqd(mqd); ++ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, ++ m->sdma_queue_id); ++ ++ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); ++ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); ++ ++ while (true) { ++ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); ++ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) ++ break; ++ if (time_after(jiffies, end_jiffies)) ++ return -ETIME; ++ usleep_range(500, 1000); ++ } ++ ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, ++ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | ++ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); ++ ++ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); ++ m->sdmax_rlcx_rb_rptr_hi = ++ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); ++ ++ return 0; ++} ++ ++static const struct kfd2kgd_calls kfd2kgd = { ++ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, ++ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, ++ .init_interrupts = kgd_gfx_v9_init_interrupts, ++ .hqd_load = kgd_gfx_v9_hqd_load, ++ .hqd_sdma_load = kgd_hqd_sdma_load, ++ .hqd_dump = kgd_gfx_v9_hqd_dump, ++ .hqd_sdma_dump = kgd_hqd_sdma_dump, ++ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, ++ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, ++ .hqd_destroy = kgd_gfx_v9_hqd_destroy, ++ .hqd_sdma_destroy = kgd_hqd_sdma_destroy, ++ .address_watch_disable = kgd_gfx_v9_address_watch_disable, ++ .address_watch_execute = kgd_gfx_v9_address_watch_execute, ++ .wave_control_execute = kgd_gfx_v9_wave_control_execute, ++ .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, ++ .get_atc_vmid_pasid_mapping_pasid = ++ kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, ++ .get_atc_vmid_pasid_mapping_valid = ++ kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, ++ .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, ++ .get_tile_config = kgd_gfx_v9_get_tile_config, ++ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, ++ .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, ++ .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, ++ .get_hive_id = amdgpu_amdkfd_get_hive_id, ++}; ++ ++struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) ++{ ++ return (struct kfd2kgd_calls *)&kfd2kgd; ++} ++ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +index 821951993314..35f845a6b1c3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +@@ -68,77 +68,6 @@ static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { + mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL + }; + +-/* +- * Register access functions +- */ +- +-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +- uint32_t sh_mem_config, +- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, +- uint32_t sh_mem_bases); +-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, +- unsigned int vmid); +-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, +- uint32_t wptr_shift, uint32_t wptr_mask, +- struct mm_struct *mm); +-static int kgd_hqd_dump(struct kgd_dev *kgd, +- uint32_t pipe_id, uint32_t queue_id, +- uint32_t (**dump)[2], uint32_t *n_regs); +-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +- uint32_t __user *wptr, struct mm_struct *mm); +-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +- uint32_t engine_id, uint32_t queue_id, +- uint32_t (**dump)[2], uint32_t *n_regs); +-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, +- uint32_t pipe_id, uint32_t queue_id); +-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +- enum kfd_preempt_type reset_type, +- unsigned int utimeout, uint32_t pipe_id, +- uint32_t queue_id); +-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +- unsigned int utimeout); +-static int kgd_address_watch_disable(struct kgd_dev *kgd); +-static int kgd_address_watch_execute(struct kgd_dev *kgd, +- unsigned int watch_point_id, +- uint32_t cntl_val, +- uint32_t addr_hi, +- uint32_t addr_lo); +-static int kgd_wave_control_execute(struct kgd_dev *kgd, +- uint32_t gfx_index_val, +- uint32_t sq_cmd); +-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +- unsigned int watch_point_id, +- unsigned int reg_offset); +- +-static uint32_t kgd_enable_debug_trap(struct kgd_dev *kgd, +- uint32_t trap_debug_wave_launch_mode, +- uint32_t vmid); +-static uint32_t kgd_disable_debug_trap(struct kgd_dev *kgd); +-static uint32_t kgd_set_debug_trap_data(struct kgd_dev *kgd, +- int trap_data0, +- int trap_data1); +-static uint32_t kgd_set_wave_launch_trap_override(struct kgd_dev *kgd, +- uint32_t trap_override, +- uint32_t trap_mask); +-static uint32_t kgd_set_wave_launch_mode(struct kgd_dev *kgd, +- uint8_t wave_launch_mode, +- uint32_t vmid); +-static void kgd_get_iq_wait_times(struct kgd_dev *kgd, +- uint32_t *wait_times); +-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, +- uint8_t vmid); +-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, +- uint8_t vmid); +-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, +- uint64_t page_table_base); +-static void set_scratch_backing_va(struct kgd_dev *kgd, +- uint64_t va, uint32_t vmid); +-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +- + static void kgd_build_grace_period_packet_info(struct kgd_dev *kgd, + uint32_t wait_times, + uint32_t grace_period, +@@ -148,7 +77,7 @@ static void kgd_build_grace_period_packet_info(struct kgd_dev *kgd, + /* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +-static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, ++int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) + { + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; +@@ -166,46 +95,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + return 0; + } + +-static const struct kfd2kgd_calls kfd2kgd = { +- .program_sh_mem_settings = kgd_program_sh_mem_settings, +- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, +- .init_interrupts = kgd_init_interrupts, +- .hqd_load = kgd_hqd_load, +- .hqd_sdma_load = kgd_hqd_sdma_load, +- .hqd_dump = kgd_hqd_dump, +- .hqd_sdma_dump = kgd_hqd_sdma_dump, +- .hqd_is_occupied = kgd_hqd_is_occupied, +- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, +- .hqd_destroy = kgd_hqd_destroy, +- .hqd_sdma_destroy = kgd_hqd_sdma_destroy, +- .address_watch_disable = kgd_address_watch_disable, +- .address_watch_execute = kgd_address_watch_execute, +- .wave_control_execute = kgd_wave_control_execute, +- .address_watch_get_offset = kgd_address_watch_get_offset, +- .get_atc_vmid_pasid_mapping_pasid = +- get_atc_vmid_pasid_mapping_pasid, +- .get_atc_vmid_pasid_mapping_valid = +- get_atc_vmid_pasid_mapping_valid, +- .set_scratch_backing_va = set_scratch_backing_va, +- .get_tile_config = amdgpu_amdkfd_get_tile_config, +- .set_vm_context_page_table_base = set_vm_context_page_table_base, +- .invalidate_tlbs = invalidate_tlbs, +- .invalidate_tlbs_vmid = invalidate_tlbs_vmid, +- .get_hive_id = amdgpu_amdkfd_get_hive_id, +- .enable_debug_trap = kgd_enable_debug_trap, +- .disable_debug_trap = kgd_disable_debug_trap, +- .set_debug_trap_data = kgd_set_debug_trap_data, +- .set_wave_launch_trap_override = kgd_set_wave_launch_trap_override, +- .set_wave_launch_mode = kgd_set_wave_launch_mode, +- .get_iq_wait_times = kgd_get_iq_wait_times, +- .build_grace_period_packet_info = kgd_build_grace_period_packet_info, +-}; +- +-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +-{ +- return (struct kfd2kgd_calls *)&kfd2kgd; +-} +- + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) + { + return (struct amdgpu_device *)kgd; +@@ -253,7 +142,7 @@ static void release_queue(struct kgd_dev *kgd) + unlock_srbm(kgd); + } + +-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, ++void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, +@@ -270,7 +159,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + unlock_srbm(kgd); + } + +-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, ++int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); +@@ -331,7 +220,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + * but still works + */ + +-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) ++int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; +@@ -391,7 +280,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) + return (struct v9_sdma_mqd *)mqd; + } + +-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, ++int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +@@ -486,7 +375,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + return 0; + } + +-static int kgd_hqd_dump(struct kgd_dev *kgd, ++int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) + { +@@ -623,7 +512,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + return 0; + } + +-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, ++bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); +@@ -664,7 +553,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) + return false; + } + +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, ++int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +@@ -752,7 +641,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + return 0; + } + +-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, ++bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) + { + uint32_t reg; +@@ -763,7 +652,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; + } + +-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, ++uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) + { + uint32_t reg; +@@ -802,7 +691,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, + return 0; + } + +-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) ++int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) + { + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; +@@ -821,8 +710,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; +- if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { +- if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) ++ if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { ++ if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, flush_type); + break; +@@ -833,7 +722,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) + return 0; + } + +-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) ++int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) + { + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + +@@ -861,7 +750,7 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) + return 0; + } + +-static int kgd_address_watch_disable(struct kgd_dev *kgd) ++int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + union TCP_WATCH_CNTL_BITS cntl; +@@ -885,7 +774,7 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) + return 0; + } + +-static int kgd_address_watch_execute(struct kgd_dev *kgd, ++int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, +@@ -920,7 +809,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, + return 0; + } + +-static int kgd_wave_control_execute(struct kgd_dev *kgd, ++int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) + { +@@ -945,7 +834,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, + return 0; + } + +-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, ++uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) + { +@@ -1109,7 +998,7 @@ static void kgd_get_iq_wait_times(struct kgd_dev *kgd, + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); + } + +-static void set_scratch_backing_va(struct kgd_dev *kgd, ++void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) + { + /* No longer needed on GFXv9. The scratch base address is +@@ -1118,7 +1007,7 @@ static void set_scratch_backing_va(struct kgd_dev *kgd, + */ + } + +-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, ++void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); +@@ -1158,3 +1047,36 @@ static void kgd_build_grace_period_packet_info(struct kgd_dev *kgd, + + *reg_offset = mmCP_IQ_WAIT_TIME2; + } ++ ++static const struct kfd2kgd_calls kfd2kgd = { ++ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, ++ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, ++ .init_interrupts = kgd_gfx_v9_init_interrupts, ++ .hqd_load = kgd_gfx_v9_hqd_load, ++ .hqd_sdma_load = kgd_hqd_sdma_load, ++ .hqd_dump = kgd_gfx_v9_hqd_dump, ++ .hqd_sdma_dump = kgd_hqd_sdma_dump, ++ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, ++ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, ++ .hqd_destroy = kgd_gfx_v9_hqd_destroy, ++ .hqd_sdma_destroy = kgd_hqd_sdma_destroy, ++ .address_watch_disable = kgd_gfx_v9_address_watch_disable, ++ .address_watch_execute = kgd_gfx_v9_address_watch_execute, ++ .wave_control_execute = kgd_gfx_v9_wave_control_execute, ++ .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, ++ .get_atc_vmid_pasid_mapping_pasid = ++ kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, ++ .get_atc_vmid_pasid_mapping_valid = ++ kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, ++ .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, ++ .get_tile_config = kgd_gfx_v9_get_tile_config, ++ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, ++ .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, ++ .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, ++ .get_hive_id = amdgpu_amdkfd_get_hive_id, ++}; ++ ++struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) ++{ ++ return (struct kfd2kgd_calls *)&kfd2kgd; ++} +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +new file mode 100644 +index 000000000000..b79d2a629768 +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +@@ -0,0 +1,83 @@ ++/* ++ * Copyright 2019 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++ ++ ++void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, ++ uint32_t sh_mem_config, ++ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, ++ uint32_t sh_mem_bases); ++int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, ++ unsigned int vmid); ++int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); ++int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, ++ uint32_t queue_id, uint32_t __user *wptr, ++ uint32_t wptr_shift, uint32_t wptr_mask, ++ struct mm_struct *mm); ++int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, ++ uint32_t pipe_id, uint32_t queue_id, ++ uint32_t (**dump)[2], uint32_t *n_regs); ++bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, ++ uint32_t pipe_id, uint32_t queue_id); ++int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, ++ enum kfd_preempt_type reset_type, ++ unsigned int utimeout, uint32_t pipe_id, ++ uint32_t queue_id); ++int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); ++int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, ++ unsigned int watch_point_id, ++ uint32_t cntl_val, ++ uint32_t addr_hi, ++ uint32_t addr_lo); ++int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, ++ uint32_t gfx_index_val, ++ uint32_t sq_cmd); ++uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, ++ unsigned int watch_point_id, ++ unsigned int reg_offset); ++ ++uint32_t kgd_gfx_v9_enable_debug_trap(struct kgd_dev *kgd, ++ uint32_t trap_debug_wave_launch_mode, ++ uint32_t vmid); ++uint32_t kgd_gfx_v9_disable_debug_trap(struct kgd_dev *kgd); ++uint32_t kgd_gfx_v9_set_debug_trap_data(struct kgd_dev *kgd, ++ int trap_data0, ++ int trap_data1); ++uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct kgd_dev *kgd, ++ uint32_t trap_override, ++ uint32_t trap_mask); ++uint32_t kgd_gfx_v9_set_wave_launch_mode(struct kgd_dev *kgd, ++ uint8_t wave_launch_mode, ++ uint32_t vmid); ++ ++bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, ++ uint8_t vmid); ++uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, ++ uint8_t vmid); ++void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, ++ uint64_t page_table_base); ++void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, ++ uint64_t va, uint32_t vmid); ++int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); ++int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); ++int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, ++ struct tile_config *config); +-- +2.17.1 + |