diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1692-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1692-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch | 4587 |
1 files changed, 4587 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1692-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1692-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch new file mode 100644 index 00000000..5ba0cb31 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1692-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch @@ -0,0 +1,4587 @@ +From fc0872ed7666e177af85412cd69d22774fc8340f Mon Sep 17 00:00:00 2001 +From: Kent Russell <kent.russell@amd.com> +Date: Tue, 6 Dec 2016 13:10:34 -0500 +Subject: [PATCH 1692/4131] drm/amdkfd: Clean up KFD style errors and warnings + +Using checkpatch.pl -f <file> showed a number of style issues. This +patch addresses as many of them as possible. Some long lines have been +left for readability, but attempts to minimize them have been made. +Also clean up usage of do..while(0) loops, which are mostly for +debugging anyways + +Change-Id: Ie8511447981a051f01b16a06833a70d9df0a85df +Signed-off-by: Kent Russell <kent.russell@amd.com> + + Conflicts: + drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 4 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 81 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 47 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 84 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +- + drivers/gpu/drm/amd/amdkfd/cik_int.h | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 280 +++---- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 187 ++--- + drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 +- + drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 803 +++++++++++---------- + drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 12 +- + drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 288 ++++---- + drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 292 ++++---- + drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 6 +- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 +- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- + .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 6 +- + .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 6 +- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 137 ++-- + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 1 + + drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 8 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 5 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 5 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 + + drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 1 - + drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h | 16 +- + drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h | 10 +- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 58 +- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 28 +- + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_rdma.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 71 +- + 36 files changed, 1312 insertions(+), 1182 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +index 30e5893..d41cebf 100755 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -34,7 +34,7 @@ + + const struct kfd2kgd_calls *kfd2kgd; + const struct kgd2kfd_calls *kgd2kfd; +-bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); ++bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); + + unsigned int global_compute_vmid_bitmap = 0xFF00; + +@@ -43,7 +43,7 @@ int amdgpu_amdkfd_init(void) + int ret; + + #if defined(CONFIG_HSA_AMD_MODULE) +- int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); ++ int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); + + kgd2kfd_init_p = symbol_request(kgd2kfd_init); + +@@ -404,7 +404,8 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) + + cu_info->cu_active_number = acu_info.number; + cu_info->cu_ao_mask = acu_info.ao_cu_mask; +- memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], sizeof(acu_info.bitmap)); ++ memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], ++ sizeof(acu_info.bitmap)); + cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; + cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; + cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +index ac167c8..0c797bc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +@@ -153,7 +153,7 @@ static int amd_kfd_fence_signal(struct fence *f) + * + * This function is called when the reference count becomes zero. + * It just RCU schedules freeing up the fence. +-*/ ++ */ + static void amd_kfd_fence_release(struct fence *f) + { + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); +@@ -173,7 +173,7 @@ static void amd_kfd_fence_release(struct fence *f) + * + * @f: [IN] fence + * @mm: [IN] mm that needs to be verified +-*/ ++ */ + bool amd_kfd_fence_check_mm(struct fence *f, void *mm) + { + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index 5387fca..4adbf0b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -68,7 +68,7 @@ enum { + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENTION = 0x03000000, +- /* extend the mask to 26 bits in order to match the low address field. */ ++ /* extend the mask to 26 bits in order to match the low address field */ + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF + }; +@@ -93,7 +93,8 @@ union TCP_WATCH_CNTL_BITS { + float f32All; + }; + +-static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, int fd, uint32_t handle, struct kgd_mem **mem); ++static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, ++ int fd, uint32_t handle, struct kgd_mem **mem); + + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + +@@ -101,10 +102,13 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + * Register access functions + */ + +-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config, +- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); +-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); +-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); ++static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, ++ uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, ++ uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); ++static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, ++ unsigned int vmid); ++static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, ++ uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, +@@ -312,11 +316,12 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + + /* + * We have to assume that there is no outstanding mapping. +- * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a mapping +- * is in progress or because a mapping finished and the SW cleared it. +- * So the protocol is to always wait & clear. ++ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a ++ * mapping is in progress or because a mapping finished and the SW ++ * cleared it. So the protocol is to always wait & clear. + */ +- uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID0_PASID_MAPPING__VALID_MASK; ++ uint32_t pasid_mapping = (pasid == 0) ? 0 : ++ (uint32_t)pasid | ATC_VMID0_PASID_MAPPING__VALID_MASK; + + WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); + +@@ -502,12 +507,15 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdma_rlc_rb_rptr); + +- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, ++ m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); +- ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, ++ m->sdma_rlc_rb_base_hi); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, ++ m->sdma_rlc_rb_rptr_addr_lo); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, ++ m->sdma_rlc_rb_rptr_addr_hi); + data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); +@@ -638,7 +646,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, + pr_debug("IQ timer is active\n"); + } else + break; +- loop: ++loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; +@@ -728,8 +736,9 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) + + /* Turning off this address until we set all the registers */ + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) +- WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], +- cntl.u32All); ++ WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_CNTL], ++ cntl.u32All); + + return 0; + } +@@ -747,19 +756,23 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, + + /* Turning off this watch point until we set all the registers */ + cntl.bitfields.valid = 0; +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_ADDR_HI], + addr_hi); + +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_ADDR_LO], + addr_lo); + + /* Enable the watch point */ + cntl.bitfields.valid = 1; + +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + return 0; +@@ -883,43 +896,35 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + + switch (type) { + case KGD_ENGINE_PFP: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.pfp_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.me_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.ce_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec2_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.rlc_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: +- hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[0].fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: +- hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[1].fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; + break; + + default: +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index 833eba2..9f80b1e5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -517,11 +517,15 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + +- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdmax_rlcx_virtual_addr); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, ++ m->sdmax_rlcx_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, ++ m->sdmax_rlcx_rb_base_hi); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, ++ m->sdmax_rlcx_rb_rptr_addr_lo); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, ++ m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); +@@ -660,7 +664,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, + pr_debug("IQ timer is active\n"); + } else + break; +- loop: ++loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; +@@ -840,8 +844,9 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) + + /* Turning off this address until we set all the registers */ + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) +- WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], +- cntl.u32All); ++ WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_CNTL], ++ cntl.u32All); + + return 0; + } +@@ -859,19 +864,23 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, + + /* Turning off this watch point until we set all the registers */ + cntl.bitfields.valid = 0; +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_ADDR_HI], + addr_hi); + +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_ADDR_LO], + addr_lo); + + /* Enable the watch point */ + cntl.bitfields.valid = 1; + +- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], ++ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX ++ + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + return 0; +@@ -945,42 +954,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.pfp_fw->data; ++ adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.me_fw->data; ++ adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.ce_fw->data; ++ adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec_fw->data; ++ adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec2_fw->data; ++ adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.rlc_fw->data; ++ adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[0].fw->data; ++ adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[1].fw->data; ++ adev->sdma.instance[1].fw->data; + break; + + default: +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +index e315dc7..53a0e52 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +@@ -644,7 +644,8 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, ++ m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { +@@ -661,9 +662,12 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); +- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, ++ m->sdmax_rlcx_rb_base_hi); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, ++ m->sdmax_rlcx_rb_rptr_addr_lo); ++ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, ++ m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); +@@ -718,7 +722,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && +- high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) ++ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); +@@ -927,22 +931,30 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ +- WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), 0xffffffff); +- WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), 0x0000001f); +- +- WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), 0xffffffff); +- WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), 0x0000001f); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), ++ 0xffffffff); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), ++ 0x0000001f); ++ ++ WREG32(SOC15_REG_OFFSET(MMHUB, 0, ++ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), ++ 0xffffffff); ++ WREG32(SOC15_REG_OFFSET(MMHUB, 0, ++ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), ++ 0x0000001f); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); + +- WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), req); ++ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), ++ req); + + while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & +- (1 << vmid))) ++ (1 << vmid))) + cpu_relax(); + +- while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & +- (1 << vmid))) ++ while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, ++ mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & ++ (1 << vmid))) + cpu_relax(); + } + +@@ -1034,19 +1046,13 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, + + /* Turning off this watch point until we set all the registers */ + cntl.bitfields.valid = 0; +- WREG32(watch_base_addr + +- watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + +- ADDRESS_WATCH_REG_CNTL], ++ WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + +- WREG32(watch_base_addr + +- watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + +- ADDRESS_WATCH_REG_ADDR_HI], ++ WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI], + addr_hi); + +- WREG32(watch_base_addr + +- watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + +- ADDRESS_WATCH_REG_ADDR_LO], ++ WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO], + addr_lo); + + /* Enable the watch point */ +@@ -1121,43 +1127,35 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + + switch (type) { + case KGD_ENGINE_PFP: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.pfp_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.me_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.ce_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec2_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: +- hdr = (const union amdgpu_firmware_header *) +- adev->gfx.rlc_fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: +- hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[0].fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: +- hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[1].fw->data; ++ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; + break; + + default: +@@ -1199,9 +1197,9 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), +- lower_32_bits(adev->vm_manager.max_pfn - 1)); ++ lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), +- upper_32_bits(adev->vm_manager.max_pfn - 1)); ++ upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +@@ -1210,9 +1208,9 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), +- lower_32_bits(adev->vm_manager.max_pfn - 1)); ++ lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), +- upper_32_bits(adev->vm_manager.max_pfn - 1)); ++ upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index 3129383..1b84fc9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -42,7 +42,8 @@ + #include "gmc/gmc_8_1_d.h" + + /* Special VM and GART address alignment needed for VI pre-Fiji due to +- * a HW bug. */ ++ * a HW bug. ++ */ + #define VI_BO_SIZE_ALIGN (0x8000) + + /* BO flag to indicate a KFD userptr BO */ +@@ -663,7 +664,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, + va, size, domain_string(alloc_domain)); + + /* Allocate buffer object. Userptr objects need to start out +- * in the CPU domain, get moved to GTT when pinned. */ ++ * in the CPU domain, get moved to GTT when pinned. ++ */ + ret = amdgpu_bo_create(adev, size, byte_align, false, + alloc_domain, + flags, sg, NULL, &bo); +@@ -719,7 +721,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, + * back-off the reservation and then reacquire it. Track all the + * reservation info in a context structure. Buffers can be mapped to + * multiple VMs simultaneously (buffers being restored on multiple +- * GPUs). */ ++ * GPUs). ++ */ + struct bo_vm_reservation_context { + struct amdgpu_bo_list_entry kfd_bo; + unsigned int n_vms; +@@ -802,7 +805,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + { + struct amdgpu_bo *bo = mem->bo; + struct kfd_bo_va_list *entry; +- unsigned i; ++ unsigned int i; + int ret; + + ctx->reserved = false; +@@ -1158,7 +1161,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + + mutex_unlock(&mem->lock); + /* lock is not needed after this, since mem is unused and will +- * be freed anyway */ ++ * be freed anyway ++ */ + + /* No more MMU notifiers */ + amdgpu_mn_unregister(mem->bo); +@@ -1527,7 +1531,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + { + struct kfd_bo_va_list *entry; + struct amdgpu_device *adev; +- unsigned mapped_before; ++ unsigned int mapped_before; + int ret = 0; + struct bo_vm_reservation_context ctx; + struct amdkfd_process_info *process_info; +diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h +index 9054068..ff8255d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h ++++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h +@@ -34,7 +34,8 @@ struct cik_ih_ring_entry { + uint32_t reserved3:4; + + /* pipeid, meid and unused3 are officially called RINGID, +- * but for our purposes, they always decode into pipe and ME. */ ++ * but for our purposes, they always decode into pipe and ME. ++ */ + uint32_t pipeid:2; + uint32_t meid:2; + uint32_t reserved4:4; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 0c4ea11..8b35b70 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -557,14 +557,17 @@ kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { +- dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__); ++ dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", ++ __func__); + return status; + } + + down_write(&p->lock); + mutex_lock(get_dbgmgr_mutex()); + +- /* make sure that we have pdd, if this the first queue created for this process */ ++ /* make sure that we have pdd, if this the first queue created for ++ * this process ++ */ + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + mutex_unlock(get_dbgmgr_mutex()); +@@ -599,12 +602,10 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); +- if (dev == NULL) +- return -EINVAL; +- +- if (dev->device_info->asic_family == CHIP_CARRIZO) { +- pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); +- return -EINVAL; ++ if (!dev) { ++ dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", ++ __func__); ++ return status; + } + + mutex_lock(get_dbgmgr_mutex()); +@@ -646,92 +647,90 @@ kfd_ioctl_dbg_address_watch(struct file *filep, + + memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); + +- do { +- dev = kfd_device_by_id(args->gpu_id); +- if (!dev) { +- dev_info(NULL, +- "Error! kfd: In func %s >> get device by id failed\n", +- __func__); +- break; +- } +- +- cmd_from_user = (void __user *) args->content_ptr; +- +- if (args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) { +- status = -EINVAL; +- break; +- } +- +- if (args->buf_size_in_bytes <= sizeof(*args)) { +- status = -EINVAL; +- break; +- } ++ dev = kfd_device_by_id(args->gpu_id); ++ if (!dev) { ++ dev_info(NULL, ++ "Error! kfd: In func %s >> get device by id failed\n", ++ __func__); ++ return -EFAULT; ++ } + +- /* this is the actual buffer to work with */ ++ cmd_from_user = (void __user *) args->content_ptr; + +- args_buff = memdup_user(cmd_from_user, +- args->buf_size_in_bytes - sizeof(*args)); +- if (IS_ERR(args_buff)) +- return PTR_ERR(args_buff); ++ if (args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE || ++ (args->buf_size_in_bytes <= sizeof(*args))) ++ return -EINVAL; + +- aw_info.process = p; ++ /* this is the actual buffer to work with */ ++ args_buff = memdup_user(cmd_from_user, ++ args->buf_size_in_bytes - sizeof(*args)); + +- aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); +- args_idx += sizeof(aw_info.num_watch_points); ++ if (IS_ERR(args_buff)) ++ return PTR_ERR(args_buff); + +- aw_info.watch_mode = (HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; +- args_idx += sizeof(HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; ++ aw_info.process = p; + +- /* set watch address base pointer to point on the array base within args_buff */ ++ aw_info.num_watch_points = ++ *((uint32_t *)(&args_buff[args_idx])); ++ args_idx += sizeof(aw_info.num_watch_points); + +- aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; ++ aw_info.watch_mode = ++ (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; ++ args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * ++ aw_info.num_watch_points; + +- /*skip over the addresses buffer */ +- args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; ++ /* set watch address base pointer to point on the array base ++ * within args_buff ++ */ + +- if (args_idx >= args->buf_size_in_bytes) { +- status = -EINVAL; +- break; +- } ++ aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; + +- watch_mask_value = (uint64_t) args_buff[args_idx]; ++ /* skip over the addresses buffer */ ++ args_idx += sizeof(aw_info.watch_address) * ++ aw_info.num_watch_points; + +- if (watch_mask_value > 0) { +- /* there is an array of masks */ ++ if (args_idx >= args->buf_size_in_bytes) { ++ status = -EINVAL; ++ goto out; ++ } + +- /* set watch mask base pointer to point on the array base within args_buff */ +- aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; ++ watch_mask_value = (uint64_t) args_buff[args_idx]; + +- /*skip over the masks buffer */ +- args_idx += sizeof(aw_info.watch_mask) * aw_info.num_watch_points; +- } ++ if (watch_mask_value > 0) { ++ /* there is an array of masks */ + +- else +- /* just the NULL mask, set to NULL and skip over it */ +- { +- aw_info.watch_mask = NULL; +- args_idx += sizeof(aw_info.watch_mask); +- } ++ /* set watch mask base pointer to point on the array ++ * base within args_buff ++ */ ++ aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; + +- if (args_idx > args->buf_size_in_bytes) { +- status = -EINVAL; +- break; +- } ++ /* skip over the masks buffer */ ++ args_idx += sizeof(aw_info.watch_mask) * ++ aw_info.num_watch_points; ++ } + +- aw_info.watch_event = NULL; /* Currently HSA Event is not supported for DBG */ +- status = 0; ++ else ++ /* just the NULL mask, set to NULL and skip over it */ ++ { ++ aw_info.watch_mask = NULL; ++ args_idx += sizeof(aw_info.watch_mask); ++ } + +- } while (0); ++ if (args_idx > args->buf_size_in_bytes) { ++ status = -EINVAL; ++ goto out; ++ } + +- if (status == 0) { +- mutex_lock(get_dbgmgr_mutex()); ++ /* Currently HSA Event is not supported for DBG */ ++ aw_info.watch_event = NULL; + +- status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); ++ mutex_lock(get_dbgmgr_mutex()); + +- mutex_unlock(get_dbgmgr_mutex()); ++ status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); + +- } ++ mutex_unlock(get_dbgmgr_mutex()); + ++out: + kfree(args_buff); + + return status; +@@ -739,11 +738,13 @@ kfd_ioctl_dbg_address_watch(struct file *filep, + + /* + * Parse and generate fixed size data structure for wave control. +- * Buffer is generated in a "packed" form, for avoiding structure packing/pending dependencies. ++ * Buffer is generated in a "packed" form, for avoiding structure ++ * packing/pending dependencies. + */ + + static int +-kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data) ++kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, ++ void *data) + { + long status = -EFAULT; + struct kfd_ioctl_dbg_wave_control_args *args = data; +@@ -768,78 +769,76 @@ kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data + + dev_info(NULL, "kfd: In func %s - start\n", __func__); + +- do { +- dev = kfd_device_by_id(args->gpu_id); +- if (!dev) { +- dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__); +- break; +- } +- +- /* input size must match the computed "compact" size */ +- +- if (args->buf_size_in_bytes != computed_buff_size) { +- dev_info(NULL, +- "Error! kfd: In func %s >> size mismatch, computed : actual %u : %u\n", +- __func__, args->buf_size_in_bytes, computed_buff_size); +- status = -EINVAL; +- break; +- } +- +- cmd_from_user = (void __user *) args->content_ptr; +- +- /* copy the entire buffer from user */ ++ dev = kfd_device_by_id(args->gpu_id); ++ if (!dev) { ++ dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", ++ __func__); ++ return -EFAULT; ++ } + +- args_buff = memdup_user(cmd_from_user, +- args->buf_size_in_bytes - sizeof(*args)); +- if (IS_ERR(args_buff)) +- return PTR_ERR(args_buff); ++ /* input size must match the computed "compact" size */ + +- if (copy_from_user(args_buff, +- (void __user *) args->content_ptr, +- args->buf_size_in_bytes - sizeof(*args))) { +- dev_info(NULL, +- "Error! kfd: In func %s >> copy_from_user failed\n", +- __func__); +- break; +- } ++ if (args->buf_size_in_bytes != computed_buff_size) { ++ dev_info(NULL, ++ "Error! kfd: In func %s >> size mismatch, computed : actual %u : %u\n", ++ __func__, args->buf_size_in_bytes, ++ computed_buff_size); ++ return -EINVAL; ++ } + +- /* move ptr to the start of the "pay-load" area */ ++ cmd_from_user = (void __user *) args->content_ptr; + ++ /* copy the entire buffer from user */ + +- wac_info.process = p; ++ args_buff = memdup_user(cmd_from_user, ++ args->buf_size_in_bytes - sizeof(*args)); ++ if (IS_ERR(args_buff)) ++ return PTR_ERR(args_buff); + +- wac_info.operand = (HSA_DBG_WAVEOP) *((HSA_DBG_WAVEOP *)(&args_buff[args_idx])); +- args_idx += sizeof(wac_info.operand); ++ if (copy_from_user(args_buff, ++ (void __user *) args->content_ptr, ++ args->buf_size_in_bytes - sizeof(*args))) { ++ dev_info(NULL, ++ "Error! kfd: In func %s >> copy_from_user failed\n", ++ __func__); ++ goto out; ++ } + +- wac_info.mode = (HSA_DBG_WAVEMODE) *((HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); +- args_idx += sizeof(wac_info.mode); ++ /* move ptr to the start of the "pay-load" area */ + +- wac_info.trapId = (uint32_t) *((uint32_t *)(&args_buff[args_idx])); +- args_idx += sizeof(wac_info.trapId); ++ wac_info.process = p; + +- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = *((uint32_t *)(&args_buff[args_idx])); +- wac_info.dbgWave_msg.MemoryVA = NULL; ++ wac_info.operand = ++ *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); ++ args_idx += sizeof(wac_info.operand); + ++ wac_info.mode = ++ *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); ++ args_idx += sizeof(wac_info.mode); + +- status = 0; ++ wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); ++ args_idx += sizeof(wac_info.trapId); + +- } while (0); +- if (status == 0) { +- mutex_lock(get_dbgmgr_mutex()); ++ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = ++ *((uint32_t *)(&args_buff[args_idx])); ++ wac_info.dbgWave_msg.MemoryVA = NULL; + +- dev_info(NULL, +- "kfd: In func %s >> calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", +- __func__, wac_info.process, wac_info.operand, wac_info.mode, wac_info.trapId, +- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); ++ mutex_lock(get_dbgmgr_mutex()); + +- status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); ++ dev_info(NULL, ++ "kfd: In func %s >> calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", ++ __func__, wac_info.process, wac_info.operand, ++ wac_info.mode, wac_info.trapId, ++ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + +- dev_info(NULL, "kfd: In func %s >> returned status of dbg manager is %ld\n", __func__, status); ++ status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); + +- mutex_unlock(get_dbgmgr_mutex()); ++ dev_info(NULL, "kfd: In func %s >> returned status of dbg manager is %ld\n", ++ __func__, status); + +- } ++ mutex_unlock(get_dbgmgr_mutex()); + ++out: + kfree(args_buff); + + return status; +@@ -935,7 +934,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, + "scratch_limit %llX\n", pdd->scratch_limit); + + args->num_of_nodes++; +- } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && ++ } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != ++ NULL && + (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + } + +@@ -957,7 +957,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + + if (args->num_of_nodes == 0) { + /* Return number of nodes, so that user space can alloacate +- * sufficient memory */ ++ * sufficient memory ++ */ + down_write(&p->lock); + + if (!kfd_has_process_device_data(p)) { +@@ -978,7 +979,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + + /* Fill in process-aperture information for all available + * nodes, but not more than args->num_of_nodes as that is +- * the amount of memory allocated by user */ ++ * the amount of memory allocated by user ++ */ + pa = kzalloc((sizeof(struct kfd_process_device_apertures) * + args->num_of_nodes), GFP_KERNEL); + if (!pa) +@@ -1290,7 +1292,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, + pdd->vm); + + /* If freeing the buffer failed, leave the handle in place for +- * clean-up during process tear-down. */ ++ * clean-up during process tear-down. ++ */ + if (ret == 0) { + down_write(&p->lock); + kfd_process_device_remove_obj_handle( +@@ -1553,14 +1556,16 @@ static int kfd_ioctl_open_graphic_handle(struct file *filep, + + down_write(&p->lock); + /*TODO: When open_graphic_handle is implemented, we need to create +- * the corresponding interval tree. We need to know the size of +- * the buffer through open_graphic_handle(). We use 1 for now.*/ ++ * the corresponding interval tree. We need to know the size of ++ * the buffer through open_graphic_handle(). We use 1 for now. ++ */ + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + args->va_addr, 1, NULL); + up_write(&p->lock); + if (idr_handle < 0) { + /* FIXME: destroy_process_gpumem doesn't seem to be +- * implemented anywhere */ ++ * implemented anywhere ++ */ + dev->kfd2kgd->destroy_process_gpumem(dev->kgd, mem); + return -EFAULT; + } +@@ -1606,7 +1611,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, + struct kgd_dev *dma_buf_kgd; + void *metadata_buffer = NULL; + uint32_t flags; +- unsigned i; ++ unsigned int i; + int r; + + /* Find a KFD GPU device that supports the get_dmabuf_info query */ +@@ -2003,7 +2008,8 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep, + #endif + + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ +- [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} ++ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ ++ .cmd_drv = 0, .name = #ioctl} + + /** Ioctl table */ + static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +index ab35190..55c5e4e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +@@ -12,11 +12,13 @@ + /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. + * GPU processor ID are expressed with Bit[31]=1. + * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs +- * used in the CRAT. */ ++ * used in the CRAT. ++ */ + static uint32_t gpu_processor_id_low = 0x80001000; + + /* Return the next available gpu_processor_id and increment it for next GPU +- * @total_cu_count - Total CUs present in the GPU including ones masked off ++ * @total_cu_count - Total CUs present in the GPU including ones ++ * masked off + */ + static inline unsigned int get_and_inc_gpu_processor_id( + unsigned int total_cu_count) +@@ -33,7 +35,8 @@ struct kfd_gpu_cache_info { + uint32_t cache_level; + uint32_t flags; + /* Indicates how many Compute Units share this cache +- * Value = 1 indicates the cache is not shared */ ++ * Value = 1 indicates the cache is not shared ++ */ + uint32_t num_cu_shared; + }; + +@@ -104,7 +107,8 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { + }; + + /* NOTE: In future if more information is added to struct kfd_gpu_cache_info +- * the following ASICs may need a separate table. */ ++ * the following ASICs may need a separate table. ++ */ + #define hawaii_cache_info kaveri_cache_info + #define tonga_cache_info carrizo_cache_info + #define fiji_cache_info carrizo_cache_info +@@ -151,7 +155,7 @@ static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, + } + + /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct +- * topology device present in the device_list ++ * topology device present in the device_list + */ + static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, + struct list_head *device_list) +@@ -177,7 +181,7 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, + } + + /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct +- * topology device present in the device_list ++ * topology device present in the device_list + */ + static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, + struct list_head *device_list) +@@ -195,9 +199,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, + if (props == NULL) + return -ENOMEM; + +- /* +- * We're on GPU node +- */ ++ /* We're on GPU node */ + if (dev->node_props.cpu_cores_count == 0) { + /* APU */ + if (mem->visibility_type == 0) +@@ -206,8 +208,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, + /* dGPU */ + else + props->heap_type = mem->visibility_type; +- } +- else ++ } else + props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; + + if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) +@@ -231,7 +232,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, + } + + /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct +- * topology device present in the device_list ++ * topology device present in the device_list + */ + static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + struct list_head *device_list) +@@ -254,8 +255,8 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + * Compute Unit. So map the cache using CPU core Id or SIMD + * (GPU) ID. + * TODO: This works because currently we can safely assume that +- * Compute Units are parsed before caches are parsed. In future +- * remove this dependency ++ * Compute Units are parsed before caches are parsed. In ++ * future, remove this dependency + */ + if ((id >= dev->node_props.cpu_core_id_base && + id <= dev->node_props.cpu_core_id_base + +@@ -298,7 +299,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + } + + /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct +- * topology device present in the device_list ++ * topology device present in the device_list + */ + static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, + struct list_head *device_list) +@@ -313,7 +314,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, + id_from = iolink->proximity_domain_from; + id_to = iolink->proximity_domain_to; + +- pr_debug("Found IO link entry in CRAT table with id_from=%d\n", id_from); ++ pr_debug("Found IO link entry in CRAT table with id_from=%d\n", ++ id_from); + list_for_each_entry(dev, device_list, list) { + if (id_from == dev->proximity_domain) { + props = kfd_alloc_struct(props); +@@ -368,7 +370,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, + } + + /* kfd_parse_subtype - parse subtypes and attach it to correct topology device +- * present in the device_list ++ * present in the device_list + * @sub_type_hdr - subtype section of crat_image + * @device_list - list of topology devices present in this crat_image + */ +@@ -397,15 +399,11 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + ret = kfd_parse_subtype_cache(cache, device_list); + break; + case CRAT_SUBTYPE_TLB_AFFINITY: +- /* +- * For now, nothing to do here +- */ ++ /* For now, nothing to do here */ + pr_debug("Found TLB entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: +- /* +- * For now, nothing to do here +- */ ++ /* For now, nothing to do here */ + pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_IOLINK_AFFINITY: +@@ -421,12 +419,13 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + } + + /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT +- * create a kfd_topology_device and add in to device_list. Also parse +- * CRAT subtypes and attach it to appropriate kfd_topology_device ++ * create a kfd_topology_device and add in to device_list. Also parse ++ * CRAT subtypes and attach it to appropriate kfd_topology_device + * @crat_image - input image containing CRAT +- * @device_list - [OUT] list of kfd_topology_device generated after parsing +- * crat_image ++ * @device_list - [OUT] list of kfd_topology_device generated after ++ * parsing crat_image + * @proximity_domain - Proximity domain of the first device in the table ++ * + * Return - 0 if successful else -ve value + */ + int kfd_parse_crat_table(void *crat_image, +@@ -445,9 +444,8 @@ int kfd_parse_crat_table(void *crat_image, + if (!crat_image) + return -EINVAL; + +- if (!list_empty(device_list)) { ++ if (!list_empty(device_list)) + pr_warn("Error device list should be empty\n"); +- } + + num_nodes = crat_table->num_domains; + image_len = crat_table->length; +@@ -465,7 +463,8 @@ int kfd_parse_crat_table(void *crat_image, + return -ENOMEM; + + memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); +- memcpy(top_dev->oem_table_id, crat_table->oem_table_id, CRAT_OEMTABLEID_LENGTH); ++ memcpy(top_dev->oem_table_id, crat_table->oem_table_id, ++ CRAT_OEMTABLEID_LENGTH); + top_dev->oem_revision = crat_table->oem_revision; + + last_header_type = last_header_length = 0; +@@ -527,7 +526,8 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If +- * inactive active skip it*/ ++ * inactive active skip it ++ */ + if (first_active_cu) { + memset(pcache, 0, sizeof(struct crat_subtype_cache)); + pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; +@@ -539,7 +539,8 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, + pcache->cache_size = pcache_info[cache_type].cache_size; + + /* Sibling map is w.r.t processor_id_low, so shift out +- * inactive CU */ ++ * inactive CU ++ */ + cu_sibling_map_mask = + cu_sibling_map_mask >> (first_active_cu - 1); + +@@ -555,9 +556,12 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, + return 1; + } + +-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info tables ++/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info ++ * tables ++ * + * @kdev - [IN] GPU device +- * @gpu_processor_id - [IN] GPU processor ID to which these caches associate ++ * @gpu_processor_id - [IN] GPU processor ID to which these caches ++ * associate + * @available_size - [IN] Amount of memory available in pcache + * @cu_info - [IN] Compute Unit info obtained from KGD + * @pcache - [OUT] memory into which cache data is to be filled in. +@@ -674,15 +678,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, + + /* + * kfd_create_crat_image_acpi - Allocates memory for CRAT image and +- * copies CRAT from ACPI (if available). +- * ++ * copies CRAT from ACPI (if available). + * NOTE: Call kfd_destroy_crat_image to free CRAT image memory + * +- * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then +- * *crat_image will be NULL +- * @size: [OUT] size of crat_image ++ * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then ++ * crat_image will be NULL ++ * @size: [OUT] size of crat_image + * +- * Return 0 if successful else return -ve value ++ * Return 0 if successful else return -ve value + */ + #ifdef CONFIG_ACPI + int kfd_create_crat_image_acpi(void **crat_image, size_t *size) +@@ -696,15 +699,14 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) + + *crat_image = NULL; + +- /* +- * Fetch the CRAT table from ACPI +- */ ++ /* Fetch the CRAT table from ACPI */ + status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); + if (status == AE_NOT_FOUND) { + pr_warn("CRAT table not found\n"); + return -ENODATA; + } else if (ACPI_FAILURE(status)) { + const char *err = acpi_format_exception(status); ++ + pr_err("CRAT table error: %s\n", err); + return -EINVAL; + } +@@ -740,11 +742,11 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) + + /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node + * +- * @numa_node_id: CPU NUMA node id +- * @avail_size: Available size in the memory +- * @sub_type_hdr: Memory into which compute info will be filled in ++ * @numa_node_id: CPU NUMA node id ++ * @avail_size: Available size in the memory ++ * @sub_type_hdr: Memory into which compute info will be filled in + * +- * Return 0 if successful else return -ve value ++ * Return 0 if successful else return -ve value + */ + static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, +@@ -779,11 +781,11 @@ static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, + + /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node + * +- * @numa_node_id: CPU NUMA node id +- * @avail_size: Available size in the memory +- * @sub_type_hdr: Memory into which compute info will be filled in ++ * @numa_node_id: CPU NUMA node id ++ * @avail_size: Available size in the memory ++ * @sub_type_hdr: Memory into which compute info will be filled in + * +- * Return 0 if successful else return -ve value ++ * Return 0 if successful else return -ve value + */ + static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, +@@ -808,7 +810,8 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, + + /* Unlike si_meminfo, si_meminfo_node is not exported. So + * the following lines are duplicated from si_meminfo_node +- * function */ ++ * function ++ */ + pgdat = NODE_DATA(numa_node_id); + for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) +@@ -874,7 +877,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, + * + * @pcrat_image: Fill in VCRAT for CPU + * @size: [IN] allocated size of crat_image. +- * [OUT] actual size of data filled in crat_image ++ * [OUT] actual size of data filled in crat_image + */ + static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) + { +@@ -902,7 +905,8 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) + return -ENOMEM; + + memset(crat_table, 0, sizeof(struct crat_header)); +- memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature)); ++ memcpy(&crat_table->signature, CRAT_SIGNATURE, ++ sizeof(crat_table->signature)); + crat_table->length = sizeof(struct crat_header); + + #ifdef CONFIG_ACPI +@@ -911,8 +915,10 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) + pr_warn("DSDT table not found for OEM information\n"); + else { + crat_table->oem_revision = acpi_table->revision; +- memcpy(crat_table->oem_id, acpi_table->oem_id, CRAT_OEMID_LENGTH); +- memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, CRAT_OEMTABLEID_LENGTH); ++ memcpy(crat_table->oem_id, acpi_table->oem_id, ++ CRAT_OEMID_LENGTH); ++ memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, ++ CRAT_OEMTABLEID_LENGTH); + } + #else + crat_table->oem_revision = 0; +@@ -974,8 +980,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) + /* TODO: Add cache Subtype for CPU. + * Currently, CPU cache information is available in function + * detect_cache_attributes(cpu) defined in the file +- * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not exported +- * and to get the same information the code needs to be duplicated. ++ * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not ++ * exported and to get the same information the code needs to be ++ * duplicated. + */ + + *size = crat_table->length; +@@ -1014,14 +1021,13 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size, + } + + /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU +- * to its NUMA node +- * +- * @avail_size: Available size in the memory +- * @kdev - [IN] GPU device +- * @sub_type_hdr: Memory into which io link info will be filled in +- * @proximity_domain - proximity domain of the GPU node ++ * to its NUMA node ++ * @avail_size: Available size in the memory ++ * @kdev - [IN] GPU device ++ * @sub_type_hdr: Memory into which io link info will be filled in ++ * @proximity_domain - proximity domain of the GPU node + * +- * Return 0 if successful else return -ve value ++ * Return 0 if successful else return -ve value + */ + static int kfd_fill_gpu_direct_io_link(int *avail_size, + struct kfd_dev *kdev, +@@ -1040,7 +1046,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size, + sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill in IOLINK subtype. +- * TODO: Fill-in other fields of iolink subtype */ ++ * TODO: Fill-in other fields of iolink subtype ++ */ + sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; + sub_type_hdr->proximity_domain_from = proximity_domain; + #ifdef CONFIG_NUMA +@@ -1076,8 +1083,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + struct amd_iommu_device_info iommu_info; + const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | +- AMD_IOMMU_DEVICE_FLAG_PRI_SUP | +- AMD_IOMMU_DEVICE_FLAG_PASID_SUP; ++ AMD_IOMMU_DEVICE_FLAG_PRI_SUP | ++ AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + #endif + struct kfd_local_mem_info local_mem_info; + +@@ -1093,8 +1100,10 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + + memset(crat_table, 0, sizeof(struct crat_header)); + +- memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature)); +- crat_table->length = sizeof(struct crat_header); /* Change length as we add more subtypes*/ ++ memcpy(&crat_table->signature, CRAT_SIGNATURE, ++ sizeof(crat_table->signature)); ++ /* Change length as we add more subtypes*/ ++ crat_table->length = sizeof(struct crat_header); + crat_table->num_domains = 1; + crat_table->total_entries = 0; + +@@ -1135,11 +1144,13 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + cu->hsa_capability = 0; + + /* Check if this node supports IOMMU. During parsing this flag will +- * translate to HSA_CAP_ATS_PRESENT */ ++ * translate to HSA_CAP_ATS_PRESENT ++ */ + #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + iommu_info.flags = 0; +- if (0 == amd_iommu_device_info(kdev->pdev, &iommu_info)) { +- if ((iommu_info.flags & required_iommu_flags) == required_iommu_flags) ++ if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { ++ if ((iommu_info.flags & required_iommu_flags) == ++ required_iommu_flags) + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; + } + #endif +@@ -1150,7 +1161,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + /* Fill in Subtype: Memory. Only on systems with large BAR (no + * private FB), report memory as public. On other systems + * report the total FB size (public+private) as a single +- * private heap. */ ++ * private heap. ++ */ + kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); +@@ -1180,7 +1192,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + crat_table->total_entries++; + + /* TODO: Fill in cache information. This information is NOT readily +- * available in KGD */ ++ * available in KGD ++ */ + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, +@@ -1224,17 +1237,17 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + * NOTE: Call kfd_destroy_crat_image to free CRAT image memory + * + * @crat_image: VCRAT image created because ACPI does not have a +- * CRAT for this device ++ * CRAT for this device + * @size: [OUT] size of virtual crat_image + * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device +- * COMPUTE_UNIT_GPU - Create VCRAT for GPU +- * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU +- * -- this option is not currently implemented. The assumption +- * is that all AMD APUs will have CRAT ++ * COMPUTE_UNIT_GPU - Create VCRAT for GPU ++ * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU ++ * -- this option is not currently implemented. ++ * The assumption is that all AMD APUs will have CRAT + * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU + * +- * Return 0 if successful else return -ve value +-*/ ++ * Return 0 if successful else return -ve value ++ */ + int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + int flags, struct kfd_dev *kdev, uint32_t proximity_domain) + { +@@ -1269,8 +1282,8 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + ret = kfd_create_vcrat_image_gpu(pcrat_image, size, + kdev, proximity_domain); + break; +- case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) : +- /*TODO:*/ ++ case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): ++ /* TODO: */ + ret = -EINVAL; + pr_err("VCRAT not implemented for APU\n"); + break; +@@ -1287,12 +1300,10 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + + /* kfd_destroy_crat_image + * +- * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) ++ * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) + * + */ + void kfd_destroy_crat_image(void *crat_image) + { +- if (crat_image) +- kfree(crat_image); +- return; ++ kfree(crat_image); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +index f01aea2..00de41f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +@@ -46,8 +46,8 @@ + #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1) + + /* Compute Unit flags */ +-#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */ +-#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */ ++#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */ ++#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */ + + struct crat_header { + uint32_t signature; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +index 5fea0d3..0fdc147 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +@@ -64,104 +64,112 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + union ULARGE_INTEGER *largep; + union ULARGE_INTEGER addr; + +- do { +- if ((kq == NULL) || (packet_buff == NULL) || (size_in_bytes == 0)) { +- pr_debug("Error! kfd: In func %s >> Illegal packet parameters\n", __func__); +- status = -EINVAL; +- break; +- } +- /* todo - enter proper locking to be multithreaded safe */ +- +- /* We acquire a buffer from DIQ +- * The receive packet buff will be sitting on the Indirect Buffer +- * and in the PQ we put the IB packet + sync packet(s). +- */ +- if (sync) +- pq_packets_size_in_bytes += +- sizeof(struct pm4_mec_release_mem); +- status = kq->ops.acquire_packet_buffer(kq, pq_packets_size_in_bytes / sizeof(uint32_t), &ib_packet_buff); +- if (status != 0) { +- pr_debug("Error! kfd: In func %s >> acquire_packet_buffer failed\n", __func__); +- break; +- } +- +- memset(ib_packet_buff, 0, pq_packets_size_in_bytes); ++ if ((kq == NULL) || (packet_buff == NULL) || ++ (size_in_bytes == 0)) { ++ pr_debug("Error! kfd: In func %s >> Illegal packet parameters\n", ++ __func__); ++ return -EINVAL; ++ } ++ /* todo - enter proper locking to be multithreaded safe */ ++ ++ /* We acquire a buffer from DIQ ++ * The receive packet buff will be sitting on the Indirect ++ * Buffer and in the PQ we put the IB packet + sync packet(s). ++ */ ++ if (sync) ++ pq_packets_size_in_bytes += ++ sizeof(struct pm4_mec_release_mem); ++ status = kq->ops.acquire_packet_buffer(kq, ++ pq_packets_size_in_bytes / sizeof(uint32_t), ++ &ib_packet_buff); ++ if (status != 0) { ++ pr_debug("Error! kfd: In func %s >> acquire_packet_buffer failed\n", ++ __func__); ++ return status; ++ } + +- ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); ++ memset(ib_packet_buff, 0, pq_packets_size_in_bytes); + +- ib_packet->header.count = 3; +- ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; +- ib_packet->header.type = PM4_TYPE_3; ++ ib_packet = ++ (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); + +- largep = (union ULARGE_INTEGER *) &vmid0_address; ++ ib_packet->header.count = 3; ++ ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; ++ ib_packet->header.type = PM4_TYPE_3; + +- ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; +- ib_packet->bitfields3.ib_base_hi = largep->u.high_part; ++ largep = (union ULARGE_INTEGER *) &vmid0_address; + +- ib_packet->control = (1 << 23) | (1 << 31) | +- ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); ++ ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; ++ ib_packet->bitfields3.ib_base_hi = largep->u.high_part; + +- ib_packet->bitfields5.pasid = pasid; ++ ib_packet->control = (1 << 23) | (1 << 31) | ++ ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); + +- if (!sync) { +- kq->ops.submit_packet(kq); +- break; +- } ++ ib_packet->bitfields5.pasid = pasid; + +- /* +- * for now we use release mem for GPU-CPU synchronization +- * Consider WaitRegMem + WriteData as a better alternative +- * we get a GART allocations ( gpu/cpu mapping), +- * for the sync variable, and wait until: +- * (a) Sync with HW +- * (b) Sync var is written by CP to mem. +- */ +- rm_packet = (struct pm4_mec_release_mem *) (ib_packet_buff + +- (sizeof(struct pm4__indirect_buffer_pasid) / sizeof(unsigned int))); +- +- status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), +- &mem_obj); ++ if (!sync) { ++ kq->ops.submit_packet(kq); ++ return status; ++ } + +- if (status == 0) { ++ /* ++ * for now we use release mem for GPU-CPU synchronization ++ * Consider WaitRegMem + WriteData as a better alternative ++ * we get a GART allocations ( gpu/cpu mapping), ++ * for the sync variable, and wait until: ++ * (a) Sync with HW ++ * (b) Sync var is written by CP to mem. ++ */ ++ rm_packet = (struct pm4_mec_release_mem *) (ib_packet_buff + ++ (sizeof(struct pm4__indirect_buffer_pasid) / ++ sizeof(unsigned int))); ++ ++ status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), ++ &mem_obj); ++ ++ if (status != 0) { ++ pr_debug("Error! kfd: In func %s >> failed to allocate GART memory\n", ++ __func__); ++ return status; ++ } + +- rm_state = (uint64_t *) mem_obj->cpu_ptr; ++ rm_state = (uint64_t *) mem_obj->cpu_ptr; + +- *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; ++ *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; + +- rm_packet->header.opcode = IT_RELEASE_MEM; +- rm_packet->header.type = PM4_TYPE_3; +- rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int) - 2; ++ rm_packet->header.opcode = IT_RELEASE_MEM; ++ rm_packet->header.type = PM4_TYPE_3; ++ rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / ++ sizeof(unsigned int) - 2; + +- rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; +- rm_packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; +- rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; +- rm_packet->bitfields2.atc = 0; +- rm_packet->bitfields2.tc_wb_action_ena = 1; ++ rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; ++ rm_packet->bitfields2.event_index = ++ event_index___release_mem__end_of_pipe; ++ rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; ++ rm_packet->bitfields2.atc = 0; ++ rm_packet->bitfields2.tc_wb_action_ena = 1; + +- addr.quad_part = mem_obj->gpu_addr; ++ addr.quad_part = mem_obj->gpu_addr; + +- rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; +- rm_packet->address_hi = addr.u.high_part; ++ rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; ++ rm_packet->address_hi = addr.u.high_part; + +- rm_packet->bitfields3.data_sel = data_sel___release_mem__send_64_bit_data; +- rm_packet->bitfields3.int_sel = int_sel___release_mem__send_data_after_write_confirm; +- rm_packet->bitfields3.dst_sel = dst_sel___release_mem__memory_controller; ++ rm_packet->bitfields3.data_sel = ++ data_sel___release_mem__send_64_bit_data; ++ rm_packet->bitfields3.int_sel = ++ int_sel___release_mem__send_data_after_write_confirm; ++ rm_packet->bitfields3.dst_sel = ++ dst_sel___release_mem__memory_controller; + +- rm_packet->data_lo = QUEUESTATE__ACTIVE; ++ rm_packet->data_lo = QUEUESTATE__ACTIVE; + +- kq->ops.submit_packet(kq); ++ kq->ops.submit_packet(kq); + +- /* Wait till CP writes sync code: */ ++ /* Wait till CP writes sync code: */ + +- status = amdkfd_fence_wait_timeout( +- (unsigned int *) rm_state, ++ status = amdkfd_fence_wait_timeout((unsigned int *) rm_state, + QUEUESTATE__ACTIVE, 1500); + +- } else { +- pr_debug("Error! kfd: In func %s >> failed to allocate GART memory\n", __func__); +- } +- } while (false); +- + if (rm_state != NULL) + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + +@@ -170,7 +178,9 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + + static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) + { +- /* no action is needed in this case, just make sure diq will not be used */ ++ /* no action is needed in this case, just make sure diq will not ++ * be used ++ */ + + dbgdev->kq = NULL; + +@@ -186,37 +196,33 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) + unsigned int qid; + struct process_queue_manager *pqm = dbgdev->pqm; + +- do { +- +- if (!pqm) { +- pr_debug("Error! kfd: In func %s >> No PQM\n", __func__); +- status = -EFAULT; +- break; +- } +- +- properties.type = KFD_QUEUE_TYPE_DIQ; ++ if (!pqm) { ++ pr_debug("Error! kfd: In func %s >> No PQM\n", ++ __func__); ++ return -EFAULT; ++ } + +- status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, +- &properties, &qid); ++ properties.type = KFD_QUEUE_TYPE_DIQ; + +- if (status != 0) { +- pr_debug("Error! kfd: In func %s >> Create Queue failed\n", __func__); +- break; +- } ++ status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, ++ &properties, &qid); + +- pr_debug("kfd: DIQ Created with queue id: %d\n", qid); ++ if (status != 0) { ++ pr_debug("Error! kfd: In func %s >> Create Queue failed\n", ++ __func__); ++ return status; ++ } + +- kq = pqm_get_kernel_queue(dbgdev->pqm, qid); ++ pr_debug("kfd: DIQ Created with queue id: %d\n", qid); + +- if (kq == NULL) { +- pr_debug("Error! kfd: In func %s >> Error getting Kernel Queue\n", __func__); +- status = -ENOMEM; +- break; +- } ++ kq = pqm_get_kernel_queue(dbgdev->pqm, qid); + +- dbgdev->kq = kq; +- +- } while (false); ++ if (kq == NULL) { ++ pr_debug("Error! kfd: In func %s >> Error getting Kernel Queue\n", ++ __func__); ++ return -ENOMEM; ++ } ++ dbgdev->kq = kq; + + return status; + } +@@ -233,7 +239,9 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) + { + /* todo - if needed, kill wavefronts and disable watch */ + int status = 0; +- if ((dbgdev == NULL) || (dbgdev->pqm == NULL) || (dbgdev->kq == NULL)) { ++ ++ if ((dbgdev == NULL) || (dbgdev->pqm == NULL) || ++ (dbgdev->kq == NULL)) { + pr_debug("kfd Err:In func %s >> can't destroy diq\n", __func__); + status = -EFAULT; + } else { +@@ -260,13 +268,16 @@ static void dbgdev_address_watch_set_registers( + cntl->u32All = 0; + + if (adw_info->watch_mask != NULL) +- cntl->bitfields.mask = (uint32_t) (adw_info->watch_mask[index] & ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); ++ cntl->bitfields.mask = ++ (uint32_t) (adw_info->watch_mask[index] & ++ ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); + else + cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; + + addr.quad_part = (unsigned long long) adw_info->watch_address[index]; + +- addrHi->bitfields.addr = addr.u.high_part & ADDRESS_WATCH_REG_ADDHIGH_MASK; ++ addrHi->bitfields.addr = addr.u.high_part & ++ ADDRESS_WATCH_REG_ADDHIGH_MASK; + addrLo->bitfields.addr = + (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); + +@@ -276,8 +287,10 @@ static void dbgdev_address_watch_set_registers( + if (KFD_IS_DGPU(asic_family) == false) + cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; + pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); +- pr_debug("\t\t%20s %08x\n", "set reg add high :", addrHi->bitfields.addr); +- pr_debug("\t\t%20s %08x\n", "set reg add low :", addrLo->bitfields.addr); ++ pr_debug("\t\t%20s %08x\n", "set reg add high :", ++ addrHi->bitfields.addr); ++ pr_debug("\t\t%20s %08x\n", "set reg add low :", ++ addrLo->bitfields.addr); + + } + +@@ -285,8 +298,6 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info) + { + +- int status = 0; +- + union TCP_WATCH_ADDR_H_BITS addrHi; + union TCP_WATCH_ADDR_L_BITS addrLo; + union TCP_WATCH_CNTL_BITS cntl; +@@ -296,68 +307,67 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, + + struct kfd_process_device *pdd; + +- do { +- /* taking the vmid for that process on the safe way using pdd */ +- pdd = kfd_get_process_device_data(dbgdev->dev, +- adw_info->process); +- if (!pdd) { +- pr_debug("Error! kfd: In func %s >> no PDD available\n", __func__); +- status = -EFAULT; +- break; +- } +- +- addrHi.u32All = 0; +- addrLo.u32All = 0; +- cntl.u32All = 0; +- +- vmid = pdd->qpd.vmid; +- +- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) +- || (adw_info->num_watch_points == 0)) { +- status = -EINVAL; +- break; +- } +- +- if ((adw_info->watch_mode == NULL) || (adw_info->watch_address == NULL)) { +- status = -EINVAL; +- break; +- } ++ /* taking the vmid for that process on the safe way ++ * using pdd ++ */ ++ pdd = kfd_get_process_device_data(dbgdev->dev, ++ adw_info->process); ++ if (!pdd) { ++ pr_debug("Error! kfd: In func %s >> no PDD available\n", ++ __func__); ++ return -EFAULT; ++ } + +- for (i = 0; i < adw_info->num_watch_points; i++) { ++ addrHi.u32All = 0; ++ addrLo.u32All = 0; ++ cntl.u32All = 0; + +- dbgdev_address_watch_set_registers( +- adw_info, +- &addrHi, +- &addrLo, +- &cntl, +- i, +- vmid, +- dbgdev->dev->device_info->asic_family +- ); ++ vmid = pdd->qpd.vmid; + +- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); +- pr_debug("\t\t%20s %08x\n", "register index :", i); +- pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); +- pr_debug("\t\t%20s %08x\n", "Address Low is :", addrLo.bitfields.addr); +- pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr); +- pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr); +- pr_debug("\t\t%20s %08x\n", "Control Mask is :", cntl.bitfields.mask); +- pr_debug("\t\t%20s %08x\n", "Control Mode is :", cntl.bitfields.mode); +- pr_debug("\t\t%20s %08x\n", "Control Vmid is :", cntl.bitfields.vmid); +- pr_debug("\t\t%20s %08x\n", "Control atc is :", cntl.bitfields.atc); +- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); +- +- pdd->dev->kfd2kgd->address_watch_execute( +- dbgdev->dev->kgd, +- i, +- cntl.u32All, +- addrHi.u32All, +- addrLo.u32All); +- } ++ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || ++ (adw_info->num_watch_points == 0) || (adw_info->watch_mode == NULL)) ++ return -EINVAL; + +- } while (false); ++ for (i = 0; i < adw_info->num_watch_points; i++) { ++ ++ dbgdev_address_watch_set_registers( ++ adw_info, ++ &addrHi, ++ &addrLo, ++ &cntl, ++ i, ++ vmid, ++ dbgdev->dev->device_info->asic_family ++ ); ++ ++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); ++ pr_debug("\t\t%20s %08x\n", "register index :", i); ++ pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); ++ pr_debug("\t\t%20s %08x\n", "Address Low is :", ++ addrLo.bitfields.addr); ++ pr_debug("\t\t%20s %08x\n", "Address high is :", ++ addrHi.bitfields.addr); ++ pr_debug("\t\t%20s %08x\n", "Address high is :", ++ addrHi.bitfields.addr); ++ pr_debug("\t\t%20s %08x\n", "Control Mask is :", ++ cntl.bitfields.mask); ++ pr_debug("\t\t%20s %08x\n", "Control Mode is :", ++ cntl.bitfields.mode); ++ pr_debug("\t\t%20s %08x\n", "Control Vmid is :", ++ cntl.bitfields.vmid); ++ pr_debug("\t\t%20s %08x\n", "Control atc is :", ++ cntl.bitfields.atc); ++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); ++ ++ pdd->dev->kfd2kgd->address_watch_execute( ++ dbgdev->dev->kgd, ++ i, ++ cntl.u32All, ++ addrHi.u32All, ++ addrLo.u32All); ++ } + +- return status; ++ return 0; + } + + static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, +@@ -386,131 +396,135 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + addrLo.u32All = 0; + cntl.u32All = 0; + +- do { +- +- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || (adw_info->num_watch_points == 0)) { +- status = -EINVAL; +- break; +- } ++ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || ++ (adw_info->num_watch_points == 0) || ++ !adw_info->watch_mode || !adw_info->watch_address) ++ return -EINVAL; + +- if ((NULL == adw_info->watch_mode) || (NULL == adw_info->watch_address)) { +- status = -EINVAL; +- break; +- } ++ status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, ++ ib_size/sizeof(uint32_t), ++ &packet_buff_uint, &packet_buff_gpu_addr); ++ if (status != 0) ++ return status; ++ memset(packet_buff_uint, 0, ib_size); ++ ++ packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); ++ ++ packets_vec[0].header.count = 1; ++ packets_vec[0].header.opcode = IT_SET_CONFIG_REG; ++ packets_vec[0].header.type = PM4_TYPE_3; ++ packets_vec[0].bitfields2.vmid_shift = ++ ADDRESS_WATCH_CNTL_OFFSET; ++ packets_vec[0].bitfields2.insert_vmid = 1; ++ packets_vec[1].ordinal1 = packets_vec[0].ordinal1; ++ packets_vec[1].bitfields2.insert_vmid = 0; ++ packets_vec[2].ordinal1 = packets_vec[0].ordinal1; ++ packets_vec[2].bitfields2.insert_vmid = 0; ++ packets_vec[3].ordinal1 = packets_vec[0].ordinal1; ++ packets_vec[3].bitfields2.vmid_shift = ++ ADDRESS_WATCH_CNTL_OFFSET; ++ packets_vec[3].bitfields2.insert_vmid = 1; ++ ++ for (i = 0; i < adw_info->num_watch_points; i++) { ++ ++ dbgdev_address_watch_set_registers( ++ adw_info, ++ &addrHi, ++ &addrLo, ++ &cntl, ++ i, ++ vmid, ++ dbgdev->dev->device_info->asic_family ++ ); + +- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, +- ib_size/sizeof(uint32_t), +- &packet_buff_uint, &packet_buff_gpu_addr); +- +- if (status != 0) +- break; +- +- memset(packet_buff_uint, 0, ib_size); +- +- packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); +- +- packets_vec[0].header.count = 1; +- packets_vec[0].header.opcode = IT_SET_CONFIG_REG; +- packets_vec[0].header.type = PM4_TYPE_3; +- packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; +- packets_vec[0].bitfields2.insert_vmid = 1; +- packets_vec[1].ordinal1 = packets_vec[0].ordinal1; +- packets_vec[1].bitfields2.insert_vmid = 0; +- packets_vec[2].ordinal1 = packets_vec[0].ordinal1; +- packets_vec[2].bitfields2.insert_vmid = 0; +- packets_vec[3].ordinal1 = packets_vec[0].ordinal1; +- packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; +- packets_vec[3].bitfields2.insert_vmid = 1; +- +- for (i = 0; i < adw_info->num_watch_points; i++) { +- +- dbgdev_address_watch_set_registers( +- adw_info, +- &addrHi, +- &addrLo, +- &cntl, ++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); ++ pr_debug("\t\t%20s %08x\n", "register index :", i); ++ pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); ++ pr_debug("\t\t%20s %p\n", "Add ptr is :", ++ adw_info->watch_address); ++ pr_debug("\t\t%20s %08llx\n", "Add is :", ++ adw_info->watch_address[i]); ++ pr_debug("\t\t%20s %08x\n", "Address Low is :", ++ addrLo.bitfields.addr); ++ pr_debug("\t\t%20s %08x\n", "Address high is :", ++ addrHi.bitfields.addr); ++ pr_debug("\t\t%20s %08x\n", "Control Mask is :", ++ cntl.bitfields.mask); ++ pr_debug("\t\t%20s %08x\n", "Control Mode is :", ++ cntl.bitfields.mode); ++ pr_debug("\t\t%20s %08x\n", "Control Vmid is :", ++ cntl.bitfields.vmid); ++ pr_debug("\t\t%20s %08x\n", "Control atc is :", ++ cntl.bitfields.atc); ++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); ++ ++ aw_reg_add_dword = ++ dbgdev->dev->kfd2kgd ++ ->address_watch_get_offset( ++ dbgdev->dev->kgd, + i, +- vmid, +- dbgdev->dev->device_info->asic_family +- ); +- +- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); +- pr_debug("\t\t%20s %08x\n", "register index :", i); +- pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); +- pr_debug("\t\t%20s %p\n", "Add ptr is :", adw_info->watch_address); +- pr_debug("\t\t%20s %08llx\n", "Add is :", adw_info->watch_address[i]); +- pr_debug("\t\t%20s %08x\n", "Address Low is :", addrLo.bitfields.addr); +- pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr); +- pr_debug("\t\t%20s %08x\n", "Control Mask is :", cntl.bitfields.mask); +- pr_debug("\t\t%20s %08x\n", "Control Mode is :", cntl.bitfields.mode); +- pr_debug("\t\t%20s %08x\n", "Control Vmid is :", cntl.bitfields.vmid); +- pr_debug("\t\t%20s %08x\n", "Control atc is :", cntl.bitfields.atc); +- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); +- +- aw_reg_add_dword = +- dbgdev->dev->kfd2kgd +- ->address_watch_get_offset( +- dbgdev->dev->kgd, +- i, +- ADDRESS_WATCH_REG_CNTL); ++ ADDRESS_WATCH_REG_CNTL); + +- packets_vec[0].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; +- packets_vec[0].reg_data[0] = cntl.u32All; +- +- aw_reg_add_dword = +- dbgdev->dev->kfd2kgd +- ->address_watch_get_offset( +- dbgdev->dev->kgd, +- i, +- ADDRESS_WATCH_REG_ADDR_HI); ++ packets_vec[0].bitfields2.reg_offset = ++ aw_reg_add_dword - CONFIG_REG_BASE; ++ packets_vec[0].reg_data[0] = cntl.u32All; + ++ aw_reg_add_dword = ++ dbgdev->dev->kfd2kgd ++ ->address_watch_get_offset( ++ dbgdev->dev->kgd, ++ i, ++ ADDRESS_WATCH_REG_ADDR_HI); + +- packets_vec[1].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; +- packets_vec[1].reg_data[0] = addrHi.u32All; + +- aw_reg_add_dword = +- dbgdev->dev->kfd2kgd +- ->address_watch_get_offset( +- dbgdev->dev->kgd, +- i, +- ADDRESS_WATCH_REG_ADDR_LO); ++ packets_vec[1].bitfields2.reg_offset = ++ aw_reg_add_dword - CONFIG_REG_BASE; ++ packets_vec[1].reg_data[0] = addrHi.u32All; + ++ aw_reg_add_dword = ++ dbgdev->dev->kfd2kgd ++ ->address_watch_get_offset( ++ dbgdev->dev->kgd, ++ i, ++ ADDRESS_WATCH_REG_ADDR_LO); + +- packets_vec[2].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; +- packets_vec[2].reg_data[0] = addrLo.u32All; + +- /* enable watch flag if address is not zero*/ +- if (adw_info->watch_address[i] > 0) +- cntl.bitfields.valid = 1; +- else +- cntl.bitfields.valid = 0; ++ packets_vec[2].bitfields2.reg_offset = ++ aw_reg_add_dword - CONFIG_REG_BASE; ++ packets_vec[2].reg_data[0] = addrLo.u32All; + +- aw_reg_add_dword = +- dbgdev->dev->kfd2kgd +- ->address_watch_get_offset( +- dbgdev->dev->kgd, +- i, +- ADDRESS_WATCH_REG_CNTL); ++ /* enable watch flag if address is not zero*/ ++ if (adw_info->watch_address[i] > 0) ++ cntl.bitfields.valid = 1; ++ else ++ cntl.bitfields.valid = 0; + ++ aw_reg_add_dword = ++ dbgdev->dev->kfd2kgd ++ ->address_watch_get_offset( ++ dbgdev->dev->kgd, ++ i, ++ ADDRESS_WATCH_REG_CNTL); + +- packets_vec[3].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; +- packets_vec[3].reg_data[0] = cntl.u32All; + +- status = dbgdev_diq_submit_ib( +- dbgdev, +- adw_info->process->pasid, +- packet_buff_gpu_addr, +- packet_buff_uint, +- ib_size, true); ++ packets_vec[3].bitfields2.reg_offset = ++ aw_reg_add_dword - CONFIG_REG_BASE; ++ packets_vec[3].reg_data[0] = cntl.u32All; + +- if (status != 0) { +- pr_debug("Error! kfd: In func %s >> failed to submit DIQ packet\n", __func__); +- break; +- } ++ status = dbgdev_diq_submit_ib( ++ dbgdev, ++ adw_info->process->pasid, ++ packet_buff_gpu_addr, ++ packet_buff_uint, ++ ib_size, true); + ++ if (status != 0) { ++ pr_debug("Error! kfd: In func %s >> failed to submit DIQ packet\n", ++ __func__); ++ return status; + } + +- } while (false); ++ } + + return status; + +@@ -525,26 +539,30 @@ static int dbgdev_wave_control_set_registers( + int status = 0; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; ++ struct HsaDbgWaveMsgAMDGen2 *pMsg; + + reg_sq_cmd.u32All = 0; +- + reg_gfx_index.u32All = 0; ++ pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; + + switch (wac_info->mode) { +- case HSA_DBG_WAVEMODE_SINGLE: /* Send command to single wave */ +- /*limit access to the process waves only,by setting vmid check */ ++ /* Send command to single wave */ ++ case HSA_DBG_WAVEMODE_SINGLE: ++ /* limit access to the process waves only,by setting vmid check ++ */ + reg_sq_cmd.bits.check_vmid = 1; +- reg_sq_cmd.bits.simd_id = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.SIMD; +- reg_sq_cmd.bits.wave_id = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.WaveId; ++ reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; ++ reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; + +- reg_gfx_index.bits.sh_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderArray; +- reg_gfx_index.bits.se_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderEngine; +- reg_gfx_index.bits.instance_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.HSACU; ++ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; ++ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; ++ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; + + break; + +- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: /* Send command to all waves with matching VMID */ ++ /* Send command to all waves with matching VMID */ ++ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: + + + reg_gfx_index.bits.sh_broadcast_writes = 1; +@@ -554,14 +572,15 @@ static int dbgdev_wave_control_set_registers( + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + break; + +- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: /* Send command to all CU waves with matching VMID */ ++ /* Send command to all CU waves with matching VMID */ ++ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: + + reg_sq_cmd.bits.check_vmid = 1; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + +- reg_gfx_index.bits.sh_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderArray; +- reg_gfx_index.bits.se_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderEngine; +- reg_gfx_index.bits.instance_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.HSACU; ++ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; ++ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; ++ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; + + break; + +@@ -636,91 +655,98 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; + + reg_sq_cmd.u32All = 0; +- do { + +- status = dbgdev_wave_control_set_registers(wac_info, +- ®_sq_cmd, +- ®_gfx_index, +- dbgdev->dev->device_info->asic_family); ++ status = dbgdev_wave_control_set_registers(wac_info, ++ ®_sq_cmd, ++ ®_gfx_index, ++ dbgdev->dev->device_info->asic_family); + +- /* we do not control the VMID in DIQ,so reset it to a known value */ +- reg_sq_cmd.bits.vm_id = 0; +- if (status != 0) +- break; +- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); +- +- pr_debug("\t\t mode is: %u\n", wac_info->mode); +- pr_debug("\t\t operand is: %u\n", wac_info->operand); +- pr_debug("\t\t trap id is: %u\n", wac_info->trapId); +- pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); +- pr_debug("\t\t vmid is: N/A\n"); +- +- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); +- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); +- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); +- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); +- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); +- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); +- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); +- +- pr_debug("\t\t ibw is : %u\n", reg_gfx_index.bitfields.instance_broadcast_writes); +- pr_debug("\t\t ii is : %u\n", reg_gfx_index.bitfields.instance_index); +- pr_debug("\t\t sebw is : %u\n", reg_gfx_index.bitfields.se_broadcast_writes); +- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); +- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); +- pr_debug("\t\t sbw is : %u\n", reg_gfx_index.bitfields.sh_broadcast_writes); +- +- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); +- +- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, +- ib_size / sizeof(uint32_t), +- &packet_buff_uint, &packet_buff_gpu_addr); +- +- if (status != 0) +- break; +- +- memset(packet_buff_uint, 0, ib_size); +- +- packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; +- packets_vec[0].header.count = 1; +- packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; +- packets_vec[0].header.type = PM4_TYPE_3; +- packets_vec[0].bitfields2.reg_offset = GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; +- packets_vec[0].bitfields2.insert_vmid = 0; +- packets_vec[0].reg_data[0] = reg_gfx_index.u32All; +- +- packets_vec[1].header.count = 1; +- packets_vec[1].header.opcode = IT_SET_CONFIG_REG; +- packets_vec[1].header.type = PM4_TYPE_3; +- packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - CONFIG_REG_BASE; +- packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; +- packets_vec[1].bitfields2.insert_vmid = 1; +- packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; +- +- /* Restore the GRBM_GFX_INDEX register */ +- +- reg_gfx_index.u32All = 0; +- reg_gfx_index.bits.sh_broadcast_writes = 1; +- reg_gfx_index.bits.instance_broadcast_writes = 1; +- reg_gfx_index.bits.se_broadcast_writes = 1; ++ /* we do not control the VMID in DIQ, so reset it to a ++ * known value ++ */ ++ reg_sq_cmd.bits.vm_id = 0; ++ if (status != 0) ++ return status; ++ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); ++ ++ pr_debug("\t\t mode is: %u\n", wac_info->mode); ++ pr_debug("\t\t operand is: %u\n", wac_info->operand); ++ pr_debug("\t\t trap id is: %u\n", wac_info->trapId); ++ pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); ++ pr_debug("\t\t vmid is: N/A\n"); ++ ++ pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); ++ pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); ++ pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); ++ pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); ++ pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); ++ pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); ++ pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); ++ ++ pr_debug("\t\t ibw is : %u\n", ++ reg_gfx_index.bitfields.instance_broadcast_writes); ++ pr_debug("\t\t ii is : %u\n", ++ reg_gfx_index.bitfields.instance_index); ++ pr_debug("\t\t sebw is : %u\n", ++ reg_gfx_index.bitfields.se_broadcast_writes); ++ pr_debug("\t\t se_ind is : %u\n", ++ reg_gfx_index.bitfields.se_index); ++ pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); ++ pr_debug("\t\t sbw is : %u\n", ++ reg_gfx_index.bitfields.sh_broadcast_writes); ++ ++ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); ++ ++ status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, ++ ib_size / sizeof(uint32_t), ++ &packet_buff_uint, &packet_buff_gpu_addr); ++ ++ if (status != 0) ++ return status; ++ ++ memset(packet_buff_uint, 0, ib_size); ++ ++ packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; ++ packets_vec[0].header.count = 1; ++ packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; ++ packets_vec[0].header.type = PM4_TYPE_3; ++ packets_vec[0].bitfields2.reg_offset = ++ GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; ++ packets_vec[0].bitfields2.insert_vmid = 0; ++ packets_vec[0].reg_data[0] = reg_gfx_index.u32All; ++ ++ packets_vec[1].header.count = 1; ++ packets_vec[1].header.opcode = IT_SET_CONFIG_REG; ++ packets_vec[1].header.type = PM4_TYPE_3; ++ packets_vec[1].bitfields2.reg_offset = ++ SQ_CMD / (sizeof(uint32_t)) - CONFIG_REG_BASE; ++ packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; ++ packets_vec[1].bitfields2.insert_vmid = 1; ++ packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; ++ ++ /* Restore the GRBM_GFX_INDEX register */ + ++ reg_gfx_index.u32All = 0; ++ reg_gfx_index.bits.sh_broadcast_writes = 1; ++ reg_gfx_index.bits.instance_broadcast_writes = 1; ++ reg_gfx_index.bits.se_broadcast_writes = 1; + +- packets_vec[2].ordinal1 = packets_vec[0].ordinal1; +- packets_vec[2].bitfields2.reg_offset = GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; +- packets_vec[2].bitfields2.insert_vmid = 0; +- packets_vec[2].reg_data[0] = reg_gfx_index.u32All; + +- status = dbgdev_diq_submit_ib( +- dbgdev, +- wac_info->process->pasid, +- packet_buff_gpu_addr, +- packet_buff_uint, +- ib_size, false); ++ packets_vec[2].ordinal1 = packets_vec[0].ordinal1; ++ packets_vec[2].bitfields2.reg_offset = ++ GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; ++ packets_vec[2].bitfields2.insert_vmid = 0; ++ packets_vec[2].reg_data[0] = reg_gfx_index.u32All; + +- if (status != 0) +- pr_debug("%s\n", " Critical Error ! Submit diq packet failed "); ++ status = dbgdev_diq_submit_ib( ++ dbgdev, ++ wac_info->process->pasid, ++ packet_buff_gpu_addr, ++ packet_buff_uint, ++ ib_size, false); + +- } while (false); ++ if (status != 0) ++ pr_debug("%s\n", " Critical Error ! Submit diq packet failed "); + + return status; + } +@@ -758,23 +784,37 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, + pr_debug("\t\t mode is: %u\n", wac_info->mode); + pr_debug("\t\t operand is: %u\n", wac_info->operand); + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); +- pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); ++ pr_debug("\t\t msg value is: %u\n", ++ wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t vmid is: %u\n", vmid); + +- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); +- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); +- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); +- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); +- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); +- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); +- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); +- +- pr_debug("\t\t ibw is : %u\n", reg_gfx_index.bitfields.instance_broadcast_writes); +- pr_debug("\t\t ii is : %u\n", reg_gfx_index.bitfields.instance_index); +- pr_debug("\t\t sebw is : %u\n", reg_gfx_index.bitfields.se_broadcast_writes); +- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); +- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); +- pr_debug("\t\t sbw is : %u\n", reg_gfx_index.bitfields.sh_broadcast_writes); ++ pr_debug("\t\t chk_vmid is : %u\n", ++ reg_sq_cmd.bitfields.check_vmid); ++ pr_debug("\t\t command is : %u\n", ++ reg_sq_cmd.bitfields.cmd); ++ pr_debug("\t\t queue id is : %u\n", ++ reg_sq_cmd.bitfields.queue_id); ++ pr_debug("\t\t simd id is : %u\n", ++ reg_sq_cmd.bitfields.simd_id); ++ pr_debug("\t\t mode is : %u\n", ++ reg_sq_cmd.bitfields.mode); ++ pr_debug("\t\t vm_id is : %u\n", ++ reg_sq_cmd.bitfields.vm_id); ++ pr_debug("\t\t wave_id is : %u\n", ++ reg_sq_cmd.bitfields.wave_id); ++ ++ pr_debug("\t\t ibw is : %u\n", ++ reg_gfx_index.bitfields.instance_broadcast_writes); ++ pr_debug("\t\t ii is : %u\n", ++ reg_gfx_index.bitfields.instance_index); ++ pr_debug("\t\t sebw is : %u\n", ++ reg_gfx_index.bitfields.se_broadcast_writes); ++ pr_debug("\t\t se_ind is : %u\n", ++ reg_gfx_index.bitfields.se_index); ++ pr_debug("\t\t sh_ind is : %u\n", ++ reg_gfx_index.bitfields.sh_index); ++ pr_debug("\t\t sbw is : %u\n", ++ reg_gfx_index.bitfields.sh_broadcast_writes); + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + +@@ -814,7 +854,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) + + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. + * ATC_VMID15_PASID_MAPPING +- * to check which VMID the current process is mapped to. */ ++ * to check which VMID the current process is mapped to. ++ */ + + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid +@@ -854,7 +895,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) + } + + void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, +- DBGDEV_TYPE type) ++ enum DBGDEV_TYPE type) + { + pdbgdev->dev = pdev; + pdbgdev->kq = NULL; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +index 82f48ff..75883e0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +@@ -90,17 +90,14 @@ enum SQ_IND_CMD_CMD { + SQ_IND_CMD_CMD_DEBUG = 0x00000004, + SQ_IND_CMD_CMD_TRAP = 0x00000005 + }; +-/* +- * SQ_IND_CMD_MODE enum +- */ + +-typedef enum SQ_IND_CMD_MODE { ++enum SQ_IND_CMD_MODE { + SQ_IND_CMD_MODE_SINGLE = 0x00000000, + SQ_IND_CMD_MODE_BROADCAST = 0x00000001, + SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, + SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, + SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, +-} SQ_IND_CMD_MODE; ++}; + + union SQ_IND_INDEX_BITS { + struct { +@@ -208,7 +205,7 @@ union TCP_WATCH_ADDR_L_BITS { + }; + + enum { +- QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ ++ QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ + QUEUESTATE__ACTIVE_COMPLETION_PENDING, + QUEUESTATE__ACTIVE + }; +@@ -226,6 +223,7 @@ union ULARGE_INTEGER { + #define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8)) + + +-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, DBGDEV_TYPE type); ++void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, ++ enum DBGDEV_TYPE type); + + #endif /* KFD_DBGDEV_H_ */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +index 426f776..603cdc3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +@@ -37,16 +37,12 @@ + + static DEFINE_MUTEX(kfd_dbgmgr_mutex); + +-struct mutex * +-get_dbgmgr_mutex(void) ++struct mutex *get_dbgmgr_mutex(void) + { + return &kfd_dbgmgr_mutex; + } + +-/*===========================================================================*/ +- +-static void +-kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) ++static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) + { + kfree(pmgr->dbgdev); + pmgr->dbgdev = NULL; +@@ -54,10 +50,7 @@ kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) + pmgr->dev = NULL; + } + +-/*===========================================================================*/ +- +-void +-kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) ++void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) + { + if (pmgr != NULL) { + kfd_dbgmgr_uninitialize(pmgr); +@@ -66,21 +59,18 @@ kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) + } + } + +-/*===========================================================================*/ +- +-bool +-kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) ++bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) + { +- DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; ++ enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; + struct kfd_dbgmgr *new_buff; + + BUG_ON(pdev == NULL); + BUG_ON(!pdev->init_complete); + + new_buff = kfd_alloc_struct(new_buff); +- if (!new_buff) +- { +- dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgmgr instance\n", __func__); ++ if (!new_buff) { ++ dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgmgr instance\n", ++ __func__); + return false; + } + +@@ -88,7 +78,8 @@ kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) + new_buff->dev = pdev; + new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); + if (!new_buff->dbgdev) { +- dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgdev\n", __func__); ++ dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgdev\n", ++ __func__); + kfree(new_buff); + return false; + } +@@ -108,195 +99,176 @@ kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) + long + kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) + { +- long status = 0; +- +- do { +- +- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL)) { +- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); +- /* Invalid Pointer. */ +- status = -EINVAL; +- break; +- } +- if (pmgr->pasid != 0) { +- /* HW debugger is already active. */ +- status = -EBUSY; +- break; +- } +- +- /* remember pasid */ ++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev)) { ++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", ++ __func__); ++ /* Invalid Pointer. */ ++ return -EINVAL; ++ } ++ if (pmgr->pasid != 0) { ++ /* HW debugger is already active. */ ++ return -EBUSY; ++ } + +- pmgr->pasid = p->pasid; ++ /* remember pasid */ + +- /* provide the pqm for diq generation */ ++ pmgr->pasid = p->pasid; + +- pmgr->dbgdev->pqm = &p->pqm; ++ /* provide the pqm for diq generation */ + +- /* activate the actual registering */ +- /* todo: you should lock with the process mutex here */ +- pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); +- /* todo: you should unlock with the process mutex here */ ++ pmgr->dbgdev->pqm = &p->pqm; + +- } while (false); ++ /* activate the actual registering */ ++ /* todo: you should lock with the process mutex here */ ++ pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); ++ /* todo: you should unlock with the process mutex here */ + +- return status; ++ return 0; + } + +-/* ========================================================================== */ ++/* ========================================================================= */ + + long + kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) + { + +- long status = 0; +- +- do { +- +- if ((pmgr == NULL) || (pmgr->dev == NULL) +- || (pmgr->dbgdev == NULL) || (p == NULL)) { +- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); +- /* Invalid Pointer */ +- status = -EINVAL; +- break; +- } +- if (pmgr->pasid != p->pasid) { +- /* Is the requests coming from the already registered process? */ +- status = -EINVAL; +- break; +- } +- +- /* todo: you should lock with the process mutex here */ ++ if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || ++ (p == NULL)) { ++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", ++ __func__); ++ /* Invalid Pointer */ ++ return -EINVAL; ++ } ++ if (pmgr->pasid != p->pasid) { ++ /* Is the requests coming from the already registered ++ * process? ++ */ ++ return -EINVAL; ++ } + +- pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); ++ /* todo: you should lock with the process mutex here */ + +- /* todo: you should unlock with the process mutex here */ ++ pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); + +- pmgr->pasid = 0; ++ /* todo: you should unlock with the process mutex here */ + +- } while (false); ++ pmgr->pasid = 0; + +- return status; ++ return 0; + } + +-/* =========================================================================== */ ++/* ========================================================================= */ + + long +-kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info) ++kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, ++ struct dbg_wave_control_info *wac_info) + { +- long status = 0; +- + dev_info(NULL, "kfd: In func %s\n", __func__); + +- do { +- +- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || (wac_info == NULL) +- || (wac_info->process == NULL)) { +- /* Invalid Pointer */ +- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); +- status = -EINVAL; +- break; +- } +- /* Is the requests coming from the already registered process? */ +- if (pmgr->pasid != wac_info->process->pasid) { +- /* HW debugger support was not registered for requester process */ +- status = -EINVAL; +- break; +- } +- +- status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); +- +- } while (false); +- +- return status; ++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev) || (!wac_info) ++ || (wac_info->process == NULL)) { ++ /* Invalid Pointer */ ++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", ++ __func__); ++ return -EINVAL; ++ } ++ /* Is the requests coming from the already registered ++ * process? ++ */ ++ if (pmgr->pasid != wac_info->process->pasid) { ++ /* HW debugger support was not registered for ++ * requester process ++ */ ++ return -EINVAL; ++ } + ++ return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, ++ wac_info); + } + +-/* =========================================================================== */ ++/* ========================================================================= */ + + long +-kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info) ++kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, ++ struct dbg_address_watch_info *adw_info) + { +- long status = 0; +- + dev_info(NULL, "kfd: In func %s\n", __func__); + +- do { +- +- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || (adw_info == NULL) +- || (adw_info->process == NULL)) { +- /* Invalid Pointer */ +- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); +- status = -EINVAL; +- break; +- } +- /* Is the requests coming from the already registered process? */ +- if (pmgr->pasid != adw_info->process->pasid) { +- /* HW debugger support was not registered for requester process */ +- status = -EINVAL; +- break; +- } +- +- status = (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, adw_info); +- +- } while (false); +- +- return status; ++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev) || (!adw_info) ++ || (adw_info->process == NULL)) { ++ /* Invalid Pointer */ ++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", ++ __func__); ++ return -EINVAL; ++ } ++ /* Is the requests coming from the already registered ++ * process? ++ */ ++ if (pmgr->pasid != adw_info->process->pasid) { ++ /* HW debugger support was not registered for ++ * requester process ++ */ ++ return -EINVAL; ++ } + ++ return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, ++ adw_info); + } + + +-/* =========================================================================== */ ++/* ========================================================================= */ + /* + * Handle abnormal process termination + * if we are in the midst of a debug session, we should kill all pending waves + * of the debugged process and unregister the process from the Debugger. + */ + long +-kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, struct kfd_process *process) ++kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, ++ struct kfd_process *process) + { + long status = 0; + struct dbg_wave_control_info wac_info; + + dev_info(NULL, "kfd: In func %s\n", __func__); + +- do { +- +- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL)) { +- /* Invalid Pointer */ +- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); +- status = -EINVAL; +- break; +- } +- /* first, we kill all the wavefronts of this process */ +- +- wac_info.process = process; +- wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; +- wac_info.operand = HSA_DBG_WAVEOP_KILL; +- wac_info.trapId = 0x0; /* not used for the KILL */ +- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 0; /* not used for kill */ +- wac_info.dbgWave_msg.MemoryVA = NULL; /* not used for kill */ +- +- status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, &wac_info); +- +- if (status != 0) { +- dev_info(NULL, "Error! kfd: In func %s: wave control failed, status is: %ld\n", __func__, status); +- break; +- } +- if (pmgr->pasid == wac_info.process->pasid) { +- /* if terminated process was registered for debug, then unregister it */ +- status = kfd_dbgmgr_unregister(pmgr, process); +- pmgr->pasid = 0; +- } +- if (status != 0) +- dev_info(NULL, +- "Error! kfd: In func %s: unregister failed, status is: %ld debugger can not be reused\n", +- __func__, status); +- +- } while (false); ++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev)) { ++ /* Invalid Pointer */ ++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", ++ __func__); ++ return -EINVAL; ++ } ++ /* first, we kill all the wavefronts of this process */ ++ ++ wac_info.process = process; ++ wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; ++ wac_info.operand = HSA_DBG_WAVEOP_KILL; + +- return status; ++ /* not used for KILL */ ++ wac_info.trapId = 0x0; ++ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 0; ++ wac_info.dbgWave_msg.MemoryVA = NULL; + +-} ++ status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, ++ &wac_info); + ++ if (status != 0) { ++ dev_info(NULL, "Error! kfd: In func %s: wave control failed, status is: %ld\n", ++ __func__, status); ++ return status; ++ } ++ if (pmgr->pasid == wac_info.process->pasid) { ++ /* if terminated process was registered for debug, ++ * then unregister it ++ */ ++ status = kfd_dbgmgr_unregister(pmgr, process); ++ pmgr->pasid = 0; ++ } ++ if (status != 0) ++ dev_info(NULL, ++ "Error! kfd: In func %s: unregister failed, status is: %ld debugger can not be reused\n", ++ __func__, status); ++ ++ return status; ++} + +-/*///////////////////////////////////////////////////////////////////////////////////////// */ ++/* ///////////////////////////////////////////////////////////////////////// */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +index 2b6484e..b9a769a 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +@@ -26,242 +26,251 @@ + + #include "kfd_priv.h" + +-/* +- * SQ_IND_CMD_CMD enum +- */ +- +- + /* must align with hsakmttypes definition. */ + #pragma pack(push, 4) + +-typedef enum _HSA_DBG_WAVEOP { +- HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ +- HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ +- HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ +- HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter debug mode */ +- HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ ++enum HSA_DBG_WAVEOP { ++ HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ ++ HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ ++ HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ ++ HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */ ++ HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ + HSA_DBG_NUM_WAVEOP = 5, + HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF +-} HSA_DBG_WAVEOP; ++}; + +-typedef enum _HSA_DBG_WAVEMODE { +- HSA_DBG_WAVEMODE_SINGLE = 0, /* send command to a single wave */ +- /* Broadcast to all wavefronts of all processes is not supported for HSA user mode */ +- HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, /* send to waves within current process */ +- HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, /* send to waves within current process on CU */ ++enum HSA_DBG_WAVEMODE { ++ /* send command to a single wave */ ++ HSA_DBG_WAVEMODE_SINGLE = 0, ++ /* Broadcast to all wavefronts of all processes is not supported for ++ * HSA user mode ++ */ ++ ++ /* send to waves within current process */ ++ HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, ++ /* send to waves within current process on CU */ ++ HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, + HSA_DBG_NUM_WAVEMODE = 3, + HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF +-} HSA_DBG_WAVEMODE; ++}; + +-typedef enum _HSA_DBG_WAVEMSG_TYPE { ++enum HSA_DBG_WAVEMSG_TYPE { + HSA_DBG_WAVEMSG_AUTO = 0, + HSA_DBG_WAVEMSG_USER = 1, + HSA_DBG_WAVEMSG_ERROR = 2, + HSA_DBG_NUM_WAVEMSG, + HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF +-} HSA_DBG_WAVEMSG_TYPE; ++}; + +-typedef enum _HSA_DBG_WATCH_MODE { +- HSA_DBG_WATCH_READ = 0, /* Read operations only */ +- HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ +- HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ +- HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ ++enum HSA_DBG_WATCH_MODE { ++ HSA_DBG_WATCH_READ = 0, /* Read operations only */ ++ HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ ++ HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ ++ HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ + HSA_DBG_WATCH_NUM, + HSA_DBG_WATCH_SIZE = 0xFFFFFFFF +-} HSA_DBG_WATCH_MODE; ++}; + + /* This structure is hardware specific and may change in the future */ +-typedef struct _HsaDbgWaveMsgAMDGen2 { ++struct HsaDbgWaveMsgAMDGen2 { + union { + struct { +- uint32_t UserData:8; /* user data */ +- uint32_t ShaderArray:1; /* Shader array */ +- uint32_t Priv:1; /* Privileged */ +- uint32_t Reserved0:4; /* This field is reserved, should be 0 */ +- uint32_t WaveId:4; /* wave id */ +- uint32_t SIMD:2; /* SIMD id */ +- uint32_t HSACU:4; /* Compute unit */ +- uint32_t ShaderEngine:2; /* Shader engine */ +- uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ +- uint32_t Reserved1:4; /* This field is reserved, should be 0 */ ++ uint32_t UserData:8; /* user data */ ++ uint32_t ShaderArray:1; /* Shader array */ ++ uint32_t Priv:1; /* Privileged */ ++ uint32_t Reserved0:4; /* Reserved, should be 0 */ ++ uint32_t WaveId:4; /* wave id */ ++ uint32_t SIMD:2; /* SIMD id */ ++ uint32_t HSACU:4; /* Compute unit */ ++ uint32_t ShaderEngine:2;/* Shader engine */ ++ uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ ++ uint32_t Reserved1:4; /* Reserved, should be 0 */ + } ui32; + uint32_t Value; + }; + + uint32_t Reserved2; + +-} HsaDbgWaveMsgAMDGen2; ++}; + +-typedef union _HsaDbgWaveMessageAMD { +- HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; +- /* for future HsaDbgWaveMsgAMDGen3; */ +-} HsaDbgWaveMessageAMD; ++union HsaDbgWaveMessageAMD { ++ struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; ++ /* for future HsaDbgWaveMsgAMDGen3; */ ++}; + +-typedef struct _HsaDbgWaveMessage { +- void *MemoryVA; /* ptr to associated host-accessible data */ +- HsaDbgWaveMessageAMD DbgWaveMsg; +-} HsaDbgWaveMessage; ++struct HsaDbgWaveMessage { ++ void *MemoryVA; /* ptr to associated host-accessible data */ ++ union HsaDbgWaveMessageAMD DbgWaveMsg; ++}; + + /* TODO: This definitions to be MOVED to kfd_event, once it is implemented. ++ * ++ * HSA sync primitive, Event and HW Exception notification API definitions. ++ * The API functions allow the runtime to define a so-called sync-primitive, ++ * a SW object combining a user-mode provided "syncvar" and a scheduler event ++ * that can be signaled through a defined GPU interrupt. A syncvar is ++ * a process virtual memory location of a certain size that can be accessed ++ * by CPU and GPU shader code within the process to set and query the content ++ * within that memory. The definition of the content is determined by the HSA ++ * runtime and potentially GPU shader code interfacing with the HSA runtime. ++ * The syncvar values may be commonly written through an PM4 WRITE_DATA packet ++ * in the user mode instruction stream. The OS scheduler event is typically ++ * associated and signaled by an interrupt issued by the GPU, but other HSA ++ * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced ++ * by the KFD by this mechanism, too. ++ */ + +- HSA sync primitive, Event and HW Exception notification API definitions +- The API functions allow the runtime to define a so-called sync-primitive, a SW object +- combining a user-mode provided "syncvar" and a scheduler event that can be signaled +- through a defined GPU interrupt. A syncvar is a process virtual memory location of +- a certain size that can be accessed by CPU and GPU shader code within the process to set +- and query the content within that memory. The definition of the content is determined by +- the HSA runtime and potentially GPU shader code interfacing with the HSA runtime. +- The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the +- user mode instruction stream. The OS scheduler event is typically associated and +- signaled by an interrupt issued by the GPU, but other HSA system interrupt conditions +- from other HW (e.g. IOMMUv2) may besurfaced by the KFD by this mechanism, too. */ +- +-/* these are the new definitions for events */ +- +-typedef enum _HSA_EVENTTYPE { +- HSA_EVENTTYPE_SIGNAL = 0, /* /user-mode generated GPU signal */ +- HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ +- HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change( start/stop ) */ +- HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ +- HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ +- HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ +- HSA_EVENTTYPE_PROFILE_EVENT = 6, /* GPU signal for profiling */ +- HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state (EOP pm4) */ +- /* ... */ ++/* these are the new definitions for events */ ++enum HSA_EVENTTYPE { ++ HSA_EVENTTYPE_SIGNAL = 0, /* User-mode generated GPU signal */ ++ HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ ++ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change ++ * ( start/stop ) ++ */ ++ HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ ++ HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ ++ HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ ++ HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ ++ HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state ++ *(EOP pm4) ++ */ ++ /* ... */ + HSA_EVENTTYPE_MAXID, + HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF +-} HSA_EVENTTYPE; +- +-typedef uint32_t HSA_EVENTID; +- +-/* Subdefinitions for various event types: Syncvar */ ++}; + +-typedef struct _HsaSyncVar { ++/* Subdefinitions for various event types: Syncvar */ ++struct HsaSyncVar { + union { +- void *UserData; /* pointer to user mode data */ +- uint64_t UserDataPtrValue; /* 64bit compatibility of value */ ++ void *UserData; /* pointer to user mode data */ ++ uint64_t UserDataPtrValue; /* 64bit compatibility of value */ + } SyncVar; + uint64_t SyncVarSize; +-} HsaSyncVar; +- +-/* +- Subdefinitions for various event types: NodeChange +-*/ ++}; + +-typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS { ++/* Subdefinitions for various event types: NodeChange */ ++enum HSA_EVENTTYPE_NODECHANGE_FLAGS { + HSA_EVENTTYPE_NODECHANGE_ADD = 0, + HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, + HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF +-} HSA_EVENTTYPE_NODECHANGE_FLAGS; ++}; + +-typedef struct _HsaNodeChange { +- HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; /* HSA node added/removed on the platform */ +-} HsaNodeChange; ++struct HsaNodeChange { ++ /* HSA node added/removed on the platform */ ++ enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; ++}; + +-/* +- Sub-definitions for various event types: DeviceStateChange +-*/ ++/* Sub-definitions for various event types: DeviceStateChange */ + +-typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { +- HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, /* device started (and available) */ +- HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, /* device stopped (i.e. unavailable) */ ++enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { ++ /* device started (and available) */ ++ HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, ++ /* device stopped (i.e. unavailable) */ ++ HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, + HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF +-} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS; ++}; + +-typedef enum _HSA_DEVICE { ++enum HSA_DEVICE { + HSA_DEVICE_CPU = 0, + HSA_DEVICE_GPU = 1, + MAX_HSA_DEVICE = 2 +-} HSA_DEVICE; ++}; + +-typedef struct _HsaDeviceStateChange { ++struct HsaDeviceStateChange { + uint32_t NodeId; /* F-NUMA node that contains the device */ +- HSA_DEVICE Device; /* device type: GPU or CPU */ +- HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ +-} HsaDeviceStateChange; ++ enum HSA_DEVICE Device; /* device type: GPU or CPU */ ++ enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ ++}; + +-typedef struct _HsaEventData { +- HSA_EVENTTYPE EventType; /* event type */ ++struct HsaEventData { ++ enum HSA_EVENTTYPE EventType; /* event type */ + union { +- /* return data associated with HSA_EVENTTYPE_SIGNAL and other events */ +- HsaSyncVar SyncVar; ++ /* return data associated with HSA_EVENTTYPE_SIGNAL and other ++ * events ++ */ ++ struct HsaSyncVar SyncVar; + + /* data associated with HSA_EVENTTYPE_NODE_CHANGE */ +- HsaNodeChange NodeChangeState; ++ struct HsaNodeChange NodeChangeState; + + /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */ +- HsaDeviceStateChange DeviceState; ++ struct HsaDeviceStateChange DeviceState; + } EventData; + +- /* the following data entries are internal to the KFD & thunk itself. */ ++ /* the following data entries are internal to the KFD & thunk itself */ + +- uint64_t HWData1; /* internal thunk store for Event data (OsEventHandle) */ +- uint64_t HWData2; /* internal thunk store for Event data (HWAddress) */ +- uint32_t HWData3; /* internal thunk store for Event data (HWData) */ +-} HsaEventData; ++ /* internal thunk store for Event data (OsEventHandle) */ ++ uint64_t HWData1; ++ /* internal thunk store for Event data (HWAddress) */ ++ uint64_t HWData2; ++ /* internal thunk store for Event data (HWData) */ ++ uint32_t HWData3; ++}; + +-typedef struct _HsaEventDescriptor { +- HSA_EVENTTYPE EventType; /* event type to allocate */ +- uint32_t NodeId; /* H-NUMA node containing GPU device that is event source */ +- HsaSyncVar SyncVar; /* pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL */ +-} HsaEventDescriptor; ++struct HsaEventDescriptor { ++ /* event type to allocate */ ++ enum HSA_EVENTTYPE EventType; ++ /* H-NUMA node containing GPU device that is event source */ ++ uint32_t NodeId; ++ /* pointer to user mode syncvar data, syncvar->UserDataPtrValue ++ * may be NULL ++ */ ++ struct HsaSyncVar SyncVar; ++}; + +-typedef struct _HsaEvent { +- HSA_EVENTID EventId; +- HsaEventData EventData; +-} HsaEvent; ++struct HsaEvent { ++ uint32_t EventId; ++ struct HsaEventData EventData; ++}; + + + #pragma pack(pop) + +-typedef enum _DBGDEV_TYPE { ++enum DBGDEV_TYPE { + DBGDEV_TYPE_ILLEGAL = 0, + DBGDEV_TYPE_NODIQ = 1, + DBGDEV_TYPE_DIQ = 2, + DBGDEV_TYPE_TEST = 3 +-} DBGDEV_TYPE; ++}; + + struct dbg_address_watch_info { + struct kfd_process *process; +- HSA_DBG_WATCH_MODE *watch_mode; ++ enum HSA_DBG_WATCH_MODE *watch_mode; + uint64_t *watch_address; + uint64_t *watch_mask; +- HsaEvent *watch_event; ++ struct HsaEvent *watch_event; + uint32_t num_watch_points; + }; + + struct dbg_wave_control_info { + struct kfd_process *process; + uint32_t trapId; +- HSA_DBG_WAVEOP operand; +- HSA_DBG_WAVEMODE mode; +- HsaDbgWaveMessage dbgWave_msg; ++ enum HSA_DBG_WAVEOP operand; ++ enum HSA_DBG_WAVEMODE mode; ++ struct HsaDbgWaveMessage dbgWave_msg; + }; + + struct kfd_dbgdev { + + /* The device that owns this data. */ +- + struct kfd_dev *dev; + + /* kernel queue for DIQ */ +- + struct kernel_queue *kq; + + /* a pointer to the pqm of the calling process */ +- + struct process_queue_manager *pqm; + + /* type of debug device ( DIQ, non DIQ, etc. ) */ +- +- DBGDEV_TYPE type; ++ enum DBGDEV_TYPE type; + + /* virtualized function pointers to device dbg */ +- + int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); + int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); +- int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, struct dbg_address_watch_info *adw_info); +- int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, struct dbg_wave_control_info *wac_info); ++ int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, ++ struct dbg_address_watch_info *adw_info); ++ int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, ++ struct dbg_wave_control_info *wac_info); + + }; + +@@ -277,7 +286,10 @@ void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); + bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); + long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); + long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); +-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info); +-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info); +-long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, struct kfd_process *process); +-#endif /* KFD_DBGMGR_H_ */ ++long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, ++ struct dbg_wave_control_info *wac_info); ++long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, ++ struct dbg_address_watch_info *adw_info); ++long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, ++ struct kfd_process *process); ++#endif /* KFD_DBGMGR_H_ */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +index ccf982d..24952c2 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +@@ -51,19 +51,19 @@ void kfd_debugfs_init(void) + return; + } + +- ent = debugfs_create_file("mqds", S_IFREG | S_IRUGO, debugfs_root, ++ ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root, + kfd_debugfs_mqds_by_process, + &kfd_debugfs_fops); + if (ent == NULL) + pr_warn("Failed to create mqds in kfd debugfs\n"); + +- ent = debugfs_create_file("hqds", S_IFREG | S_IRUGO, debugfs_root, ++ ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root, + kfd_debugfs_hqds_by_device, + &kfd_debugfs_fops); + if (ent == NULL) + pr_warn("Failed to create hqds in kfd debugfs\n"); + +- ent = debugfs_create_file("rls", S_IFREG | S_IRUGO, debugfs_root, ++ ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, + kfd_debugfs_rls_by_device, + &kfd_debugfs_fops); + if (ent == NULL) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 5d657a9..0abccc4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -343,12 +343,13 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) + dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, +- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0); ++ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) ++ != 0); + return false; + } + + pasid_limit = min_t(unsigned int, +- (unsigned int)1 << kfd->device_info->max_pasid_bits, ++ (unsigned int)(1 << kfd->device_info->max_pasid_bits), + iommu_info.max_pasids); + /* + * last pasid is used for kernel queues doorbells +@@ -718,9 +719,10 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) + + spin_lock(&kfd->interrupt_lock); + +- if (kfd->interrupts_active +- && interrupt_is_wanted(kfd, ih_ring_entry, patched_ihre, &is_patched) +- && enqueue_ih_ring_entry(kfd, is_patched ? patched_ihre : ih_ring_entry)) ++ if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry, ++ patched_ihre, &is_patched) ++ && enqueue_ih_ring_entry(kfd, ++ is_patched ? patched_ihre : ih_ring_entry)) + queue_work(kfd->ih_wq, &kfd->interrupt_work); + + spin_unlock(&kfd->interrupt_lock); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 2a4a556..99844c5 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -1182,7 +1182,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + } + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) +- dqm->sdma_queue_count++; ++ dqm->sdma_queue_count++; + /* + * Unconditionally increment this counter, regardless of the queue's + * type or whether the queue is active. +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +index 341adfa..bf24368 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +@@ -171,7 +171,8 @@ static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit +- * aperture addresses. */ ++ * aperture addresses. ++ */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + +@@ -202,7 +203,8 @@ static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit +- * aperture addresses. */ ++ * aperture addresses. ++ */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +index 2629143..2f37b04 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +@@ -225,7 +225,8 @@ static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit +- * aperture addresses. */ ++ * aperture addresses. ++ */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + +@@ -256,7 +257,8 @@ static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit +- * aperture addresses. */ ++ * aperture addresses. ++ */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index be5abd5..18198d8 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -34,8 +34,8 @@ + + /* A task can only be on a single wait_queue at a time, but we need to support + * waiting on multiple events (any/all). +- * Instead of each event simply having a wait_queue with sleeping tasks, it has a +- * singly-linked list of tasks. ++ * Instead of each event simply having a wait_queue with sleeping tasks, it has ++ * a singly-linked list of tasks. + * A thread that wants to sleep creates an array of these, one for each event + * and adds one to each event's waiter chain. + */ +@@ -56,9 +56,9 @@ struct kfd_event_waiter { + + /* Over-complicated pooled allocator for event notification slots. + * +- * Each signal event needs a 64-bit signal slot where the signaler will write a 1 +- * before sending an interrupt.l (This is needed because some interrupts do not +- * contain enough spare data bits to identify an event.) ++ * Each signal event needs a 64-bit signal slot where the signaler will write a ++ * 1 before sending an interrupt.l (This is needed because some interrupts do ++ * not contain enough spare data bits to identify an event.) + * We get whole pages from vmalloc and map them to the process VA. + * Individual signal events are then allocated a slot in a page. + */ +@@ -101,7 +101,10 @@ allocate_free_slot(struct kfd_process *process, + + list_for_each_entry(page, &process->signal_event_pages, event_pages) { + if (page->free_slots > 0) { +- unsigned int slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); ++ unsigned int slot = ++ find_first_zero_bit(page->used_slot_bitmap, ++ SLOTS_PER_PAGE); ++ + __set_bit(slot, page->used_slot_bitmap); + page->free_slots--; + +@@ -139,13 +142,14 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) + + page->free_slots = SLOTS_PER_PAGE; + +- backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \ ++ backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); + if (!backing_store) + goto fail_alloc_signal_store; + + /* prevent user-mode info leaks */ +- memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, KFD_SIGNAL_EVENT_LIMIT * 8); ++ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, ++ KFD_SIGNAL_EVENT_LIMIT * 8); + page->kernel_address = backing_store; + + /* Set bits of debug events to prevent allocation */ +@@ -213,7 +217,7 @@ allocate_signal_page_dgpu(struct kfd_process *p, + my_page->user_address = NULL; + my_page->free_slots = SLOTS_PER_PAGE; + if (list_empty(&p->signal_event_pages)) +- my_page->page_index = 0; ++ my_page->page_index = 0; + else + my_page->page_index = list_tail_entry(&p->signal_event_pages, + struct signal_page, +@@ -284,7 +288,8 @@ static void release_event_notification_slot(struct signal_page *page, + page->free_slots++; + + /* We don't free signal pages, they are retained by the process +- * and reused until it exits. */ ++ * and reused until it exits. ++ */ + } + + static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, +@@ -292,7 +297,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, + { + struct signal_page *page; + +- /* This is safe because we don't delete signal pages until the process exits. */ ++ /* This is safe because we don't delete signal pages until the process ++ * exits. ++ */ + list_for_each_entry(page, &p->signal_event_pages, event_pages) + if (page->page_index == page_index) + return page; +@@ -300,7 +307,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, + return NULL; + } + +-/* Assumes that p->event_mutex is held and of course that p is not going away (current or locked). */ ++/* Assumes that p->event_mutex is held and of course that p is not going away ++ * (current or locked). ++ */ + static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) + { + struct kfd_event *ev; +@@ -321,27 +330,30 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) + static u32 make_signal_event_id(struct signal_page *page, + unsigned int signal_slot_index) + { +- return page->page_index | (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); ++ return page->page_index | ++ (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); + } + + /* Produce a kfd event id for a nonsignal event. +- * These are arbitrary numbers, so we do a sequential search through the hash table +- * for an unused number. ++ * These are arbitrary numbers, so we do a sequential search through the hash ++ * table for an unused number. + */ + static u32 make_nonsignal_event_id(struct kfd_process *p) + { + u32 id; + + for (id = p->next_nonsignal_event_id; +- id < KFD_LAST_NONSIGNAL_EVENT_ID && lookup_event_by_id(p, id) != NULL; ++ id < KFD_LAST_NONSIGNAL_EVENT_ID && ++ lookup_event_by_id(p, id) != NULL; + id++) + ; + + if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { + + /* What if id == LAST_NONSIGNAL_EVENT_ID - 1? +- * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so the first loop +- * fails immediately and we proceed with the wraparound loop below. ++ * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so ++ * the first loop fails immediately and we proceed with the ++ * wraparound loop below. + */ + p->next_nonsignal_event_id = id + 1; + +@@ -349,7 +361,8 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) + } + + for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; +- id < KFD_LAST_NONSIGNAL_EVENT_ID && lookup_event_by_id(p, id) != NULL; ++ id < KFD_LAST_NONSIGNAL_EVENT_ID && ++ lookup_event_by_id(p, id) != NULL; + id++) + ; + +@@ -357,10 +370,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) + if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { + p->next_nonsignal_event_id = id + 1; + return id; +- } else { +- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; +- return 0; + } ++ p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; ++ return 0; + } + + static struct kfd_event * +@@ -371,7 +383,8 @@ lookup_event_by_page_slot(struct kfd_process *p, + } + + static int +-create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev) ++create_signal_event(struct file *devkfd, struct kfd_process *p, ++ struct kfd_event *ev) + { + if ((ev->type == KFD_EVENT_TYPE_SIGNAL) && + (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT)) { +@@ -408,9 +421,11 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event + p->debug_event_count++; + } + +- ev->user_signal_address = &ev->signal_page->user_address[ev->signal_slot_index]; ++ ev->user_signal_address = ++ &ev->signal_page->user_address[ev->signal_slot_index]; + +- ev->event_id = make_signal_event_id(ev->signal_page, ev->signal_slot_index); ++ ev->event_id = ++ make_signal_event_id(ev->signal_page, ev->signal_slot_index); + + pr_debug("signal event number %zu created with id %d, address %p\n", + p->signal_event_count, ev->event_id, +@@ -420,7 +435,9 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event + } + + /* No non-signal events are supported yet. +- * We create them as events that never signal. Set event calls from user-mode are failed. */ ++ * We create them as events that never signal. Set event calls from user-mode ++ * are failed. ++ */ + static int + create_other_event(struct kfd_process *p, struct kfd_event *ev) + { +@@ -456,7 +473,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) + } + } + +- /* Abandon the list of waiters. Individual waiting threads will clean up their own data.*/ ++ /* Abandon the list of waiters. Individual waiting threads will clean ++ * up their own data. ++ */ + list_del(&ev->waiters); + + hash_del(&ev->events); +@@ -479,13 +498,15 @@ static void destroy_events(struct kfd_process *p) + destroy_event(p, ev); + } + +-/* We assume that the process is being destroyed and there is no need to unmap the pages +- * or keep bookkeeping data in order. */ ++/* We assume that the process is being destroyed and there is no need to unmap ++ * the pages or keep bookkeeping data in order. ++ */ + static void shutdown_signal_pages(struct kfd_process *p) + { + struct signal_page *page, *tmp; + +- list_for_each_entry_safe(page, tmp, &p->signal_event_pages, event_pages) { ++ list_for_each_entry_safe(page, tmp, &p->signal_event_pages, ++ event_pages) { + if (page->user_address) { + free_pages((unsigned long)page->kernel_address, + get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); +@@ -502,7 +523,8 @@ void kfd_event_free_process(struct kfd_process *p) + + static bool event_can_be_gpu_signaled(const struct kfd_event *ev) + { +- return ev->type == KFD_EVENT_TYPE_SIGNAL || ev->type == KFD_EVENT_TYPE_DEBUG; ++ return ev->type == KFD_EVENT_TYPE_SIGNAL || ++ ev->type == KFD_EVENT_TYPE_DEBUG; + } + + static bool event_can_be_cpu_signaled(const struct kfd_event *ev) +@@ -519,6 +541,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, + int ret = 0; + + struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL); ++ + if (!ev) + return -ENOMEM; + +@@ -648,7 +671,8 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) + + static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) + { +- page_slots(ev->signal_page)[ev->signal_slot_index] = UNSIGNALED_EVENT_SLOT; ++ page_slots(ev->signal_page)[ev->signal_slot_index] = ++ UNSIGNALED_EVENT_SLOT; + } + + static bool is_slot_signaled(struct signal_page *page, unsigned int index) +@@ -656,7 +680,8 @@ static bool is_slot_signaled(struct signal_page *page, unsigned int index) + return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; + } + +-static void set_event_from_interrupt(struct kfd_process *p, struct kfd_event *ev) ++static void set_event_from_interrupt(struct kfd_process *p, ++ struct kfd_event *ev) + { + if (ev && event_can_be_gpu_signaled(ev)) { + acknowledge_signal(p, ev); +@@ -674,6 +699,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + * running so the lookup function increments the process ref count. + */ + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); ++ + if (!p) + return; /* Presumably process exited. */ + +@@ -686,19 +712,20 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + ev = lookup_event_by_id(p, partial_id); + set_event_from_interrupt(p, ev); + } else { +- /* Partial ID is in fact partial. For now we completely ignore it, +- * but we could use any bits we did receive to search faster. */ ++ /* Partial ID is in fact partial. For now we completely ignore ++ * it, but we could use any bits we did receive to search ++ * faster. ++ */ + struct signal_page *page; +- unsigned i; ++ unsigned int i; + +- list_for_each_entry(page, &p->signal_event_pages, event_pages) { +- for (i = 0; i < SLOTS_PER_PAGE; i++) { ++ list_for_each_entry(page, &p->signal_event_pages, event_pages) ++ for (i = 0; i < SLOTS_PER_PAGE; i++) + if (is_slot_signaled(page, i)) { +- ev = lookup_event_by_page_slot(p, page, i); ++ ev = lookup_event_by_page_slot(p, ++ page, i); + set_event_from_interrupt(p, ev); + } +- } +- } + } + + mutex_unlock(&p->event_mutex); +@@ -710,7 +737,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) + struct kfd_event_waiter *event_waiters; + uint32_t i; + +- event_waiters = kmalloc(num_events * sizeof(struct kfd_event_waiter), GFP_KERNEL); ++ event_waiters = kmalloc_array(num_events, ++ sizeof(struct kfd_event_waiter), GFP_KERNEL); + + if (event_waiters) { + for (i = 0; i < num_events; i++) { +@@ -746,7 +774,8 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) + struct kfd_event *ev = waiter->event; + + /* Only add to the wait list if we actually need to +- * wait on this event. */ ++ * wait on this event. ++ */ + if (!waiter->activated) + list_add(&waiter->waiters, &ev->waiters); + } +@@ -783,8 +812,8 @@ static bool copy_signaled_event_data(uint32_t num_events, + if (event_waiters[i].activated && + event_waiters[i].event->type == KFD_EVENT_TYPE_MEMORY) + if (copy_to_user(&data[event_waiters[i].input_index].memory_exception_data, +- &event_waiters[i].event->memory_exception_data, +- sizeof(struct kfd_hsa_memory_exception_data))) ++ &event_waiters[i].event->memory_exception_data, ++ sizeof(struct kfd_hsa_memory_exception_data))) + return false; + + return true; +@@ -803,7 +832,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms) + + /* msecs_to_jiffies interprets all values above 2^31-1 as infinite, + * but we consider them finite. +- * This hack is wrong, but nobody is likely to notice. */ ++ * This hack is wrong, but nobody is likely to notice. ++ */ + user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF); + + return msecs_to_jiffies(user_timeout_ms) + 1; +@@ -835,7 +865,8 @@ int kfd_wait_on_events(struct kfd_process *p, + mutex_lock(&p->event_mutex); + + /* Set to something unreasonable - this is really +- * just a bool for now. */ ++ * just a bool for now. ++ */ + *wait_result = KFD_WAIT_TIMEOUT; + + event_waiters = alloc_event_waiters(num_events); +@@ -889,10 +920,11 @@ int kfd_wait_on_events(struct kfd_process *p, + + if (signal_pending(current)) { + /* +- * This is wrong when a nonzero, non-infinite timeout is specified. +- * We need to use ERESTARTSYS_RESTARTBLOCK, but struct restart_block +- * contains a union with data for each user and it's in generic +- * kernel code that I don't want to touch yet. ++ * This is wrong when a nonzero, non-infinite timeout ++ * is specified. We need to use ++ * ERESTARTSYS_RESTARTBLOCK, but struct restart_block ++ * contains a union with data for each user and it's in ++ * generic kernel code that I don't want to touch yet. + */ + ret = -ERESTARTSYS; + break; +@@ -954,7 +986,8 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) + page = lookup_signal_page_by_index(p, page_index); + if (!page) { + /* Probably KFD bug, but mmap is user-accessible. */ +- pr_debug("signal page could not be found for page_index %u\n", page_index); ++ pr_debug("signal page could not be found for page_index %u\n", ++ page_index); + return -EINVAL; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +index 2fa5d32..a164fd5 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +@@ -426,5 +426,6 @@ int kfd_init_apertures(struct kfd_process *process) + void kfd_flush_tlb(struct kfd_dev *dev, uint32_t pasid) + { + const struct kfd2kgd_calls *f2g = dev->kfd2kgd; ++ + f2g->invalidate_tlbs(dev->kgd, pasid); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +index c48fab5..258fdda 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +@@ -58,7 +58,8 @@ int kfd_interrupt_init(struct kfd_dev *kfd) + int r; + + r = kfifo_alloc(&kfd->ih_fifo, +- KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, ++ KFD_IH_NUM_ENTRIES * ++ kfd->device_info->ih_ring_entry_size, + GFP_KERNEL); + if (r) { + dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); +@@ -159,7 +160,8 @@ static void interrupt_wq(struct work_struct *work) + sizeof(uint32_t))]; + + while (dequeue_ih_ring_entry(dev, ih_ring_entry)) +- dev->device_info->event_interrupt_class->interrupt_wq(dev, ih_ring_entry); ++ dev->device_info->event_interrupt_class->interrupt_wq(dev, ++ ih_ring_entry); + } + + bool interrupt_is_wanted(struct kfd_dev *dev, +@@ -167,7 +169,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev, + uint32_t *patched_ihre, bool *flag) + { + /* integer and bitwise OR so there is no boolean short-circuiting */ +- unsigned wanted = 0; ++ unsigned int wanted = 0; + + wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, + ih_ring_entry, patched_ihre, flag); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +index b826689..4a67e76 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +@@ -244,7 +244,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, + + if (wptr + packet_size_in_dwords >= queue_size_dwords) { + /* make sure after rolling back to position 0, there is +- * still enough space. */ ++ * still enough space. ++ */ + if (packet_size_in_dwords >= rptr) { + *buffer_ptr = NULL; + return -ENOMEM; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +index 6f12fe0..007a3ea 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +@@ -165,7 +165,7 @@ int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, +- sizeof(struct pm4_mes_map_queues)); ++ sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +index e6876f6..2126ec5 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +@@ -75,7 +75,7 @@ MODULE_PARM_DESC(send_sigterm, + + static int amdkfd_init_completed; + +-int debug_largebar = 0; ++int debug_largebar; + module_param(debug_largebar, int, 0444); + MODULE_PARM_DESC(debug_largebar, + "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)"); +@@ -90,7 +90,8 @@ module_param_named(noretry, vega10_noretry, int, 0644); + MODULE_PARM_DESC(noretry, + "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); + +-int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f) ++int kgd2kfd_init(unsigned int interface_version, ++ const struct kgd2kfd_calls **g2f) + { + if (!amdkfd_init_completed) + return -EPROBE_DEFER; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +index 41d28b3..4dff1ec 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +@@ -249,7 +249,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE; + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; +- if (atc_bit) { ++ if (atc_bit) { + m->cp_hqd_pq_control |= PQ_ATC_EN; + m->cp_hqd_ib_control |= IB_ATC_EN; + } +@@ -268,9 +268,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + + m->cp_hqd_vmid = q->vmid; + +- if (q->format == KFD_QUEUE_FORMAT_AQL) { ++ if (q->format == KFD_QUEUE_FORMAT_AQL) + m->cp_hqd_pq_control |= NO_UPDATE_RPTR; +- } + + update_cu_mask(mm, mqd, q); + set_priority(m, q); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +index 0aeebc1..ddca15f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +@@ -396,6 +396,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) + { + struct vi_sdma_mqd *m; ++ + BUG_ON(!mm || !mqd || !q); + + m = get_sdma_mqd(mqd); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +index 6cfe7f1..b3f7d43 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +@@ -32,7 +32,8 @@ int kfd_pasid_init(void) + { + pasid_limit = KFD_MAX_NUM_OF_PROCESSES; + +- pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); ++ pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), ++ GFP_KERNEL); + if (!pasid_bitmap) + return -ENOMEM; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c +index 3fb8896..937c0ac 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c +@@ -485,7 +485,6 @@ void kfd_init_peer_direct(void) + } + + pr_info("amdkfd: PeerDirect support was initialized successfully\n"); +- return; + } + + /** +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +index 05e692b..31cef21 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +@@ -28,14 +28,14 @@ + #define PM4_MES_HEADER_DEFINED + union PM4_MES_TYPE_3_HEADER { + struct { +- uint32_t reserved1:8; /* < reserved */ +- uint32_t opcode:8; /* < IT opcode */ +- uint32_t count:14; /* < number of DWORDs - 1 +- * in the information body. +- */ +- uint32_t type:2; /* < packet identifier. +- * It should be 3 for type 3 packets +- */ ++ /* reserved */ ++ uint32_t reserved1:8; ++ /* IT opcode */ ++ uint32_t opcode:8; ++ /* number of DWORDs - 1 in the information body */ ++ uint32_t count:14; ++ /* packet identifier. It should be 3 for type 3 packets */ ++ uint32_t type:2; + }; + uint32_t u32all; + }; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +index 8cb3094..7c8d9b3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +@@ -30,10 +30,12 @@ union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1 : 8; /* < reserved */ + uint32_t opcode : 8; /* < IT opcode */ +- uint32_t count : 14;/* < number of DWORDs - 1 in the +- information body. */ +- uint32_t type : 2; /* < packet identifier. +- It should be 3 for type 3 packets */ ++ uint32_t count : 14;/* < Number of DWORDS - 1 in the ++ * information body ++ */ ++ uint32_t type : 2; /* < packet identifier ++ * It should be 3 for type 3 packets ++ */ + }; + uint32_t u32All; + }; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 501eea4..a774152 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -188,9 +188,11 @@ enum asic_family_type { + #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) + + struct kfd_event_interrupt_class { +- bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry, +- uint32_t *patched_ihre, bool *patched_flag); +- void (*interrupt_wq)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); ++ bool (*interrupt_isr)(struct kfd_dev *dev, ++ const uint32_t *ih_ring_entry, uint32_t *patched_ihre, ++ bool *patched_flag); ++ void (*interrupt_wq)(struct kfd_dev *dev, ++ const uint32_t *ih_ring_entry); + }; + + struct kfd_device_info { +@@ -401,13 +403,13 @@ enum KFD_QUEUE_PRIORITY { + * @write_ptr: Defines the number of dwords written to the ring buffer. + * + * @doorbell_ptr: This field aim is to notify the H/W of new packet written to +- * the queue ring buffer. This field should be similar to write_ptr and the user +- * should update this field after he updated the write_ptr. ++ * the queue ring buffer. This field should be similar to write_ptr and the ++ * user should update this field after he updated the write_ptr. + * + * @doorbell_off: The doorbell offset in the doorbell pci-bar. + * +- * @is_interop: Defines if this is a interop queue. Interop queue means that the +- * queue can access both graphics and compute resources. ++ * @is_interop: Defines if this is a interop queue. Interop queue means that ++ * the queue can access both graphics and compute resources. + * + * @is_active: Defines if the queue is active or not. + * +@@ -466,9 +468,10 @@ struct queue_properties { + * @properties: The queue properties. + * + * @mec: Used only in no cp scheduling mode and identifies to micro engine id +- * that the queue should be execute on. ++ * that the queue should be execute on. + * +- * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. ++ * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe ++ * id. + * + * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. + * +@@ -552,7 +555,7 @@ struct qcm_process_device { + unsigned int queue_count; + unsigned int vmid; + bool is_debug; +- unsigned evicted; /* eviction counter, 0=active */ ++ unsigned int evicted; /* eviction counter, 0=active */ + /* + * All the memory management data should be here too + */ +@@ -601,9 +604,11 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct fence *fence); + + +-/*8 byte handle containing GPU ID in the most significant 4 bytes and +- * idr_handle in the least significant 4 bytes*/ +-#define MAKE_HANDLE(gpu_id, idr_handle) (((uint64_t)(gpu_id) << 32) + idr_handle) ++/* 8 byte handle containing GPU ID in the most significant 4 bytes and ++ * idr_handle in the least significant 4 bytes ++ */ ++#define MAKE_HANDLE(gpu_id, idr_handle) \ ++ (((uint64_t)(gpu_id) << 32) + idr_handle) + #define GET_GPU_ID(handle) (handle >> 32) + #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) + +@@ -642,7 +647,8 @@ struct kfd_process_device { + + uint64_t sh_hidden_private_base_vmid; + +- /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ ++ /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) ++ */ + enum kfd_pdd_bound bound; + + /* VM context for GPUVM allocations */ +@@ -711,7 +717,8 @@ struct kfd_process { + + struct process_queue_manager pqm; + +- unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; ++ unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, ++ BITS_PER_LONG)]; + + /*Is the user space process 32 bit?*/ + bool is_32bit_user_mode; +@@ -720,7 +727,8 @@ struct kfd_process { + struct mutex event_mutex; + /* All events in process hashed by ID, linked on kfd_event.events. */ + DECLARE_HASHTABLE(events, 4); +- struct list_head signal_event_pages; /* struct slot_page_header.event_pages */ ++ /* struct slot_page_header.event_pages */ ++ struct list_head signal_event_pages; + u32 next_nonsignal_event_id; + size_t signal_event_count; + size_t debug_event_count; +@@ -760,7 +768,7 @@ struct amdkfd_ioctl_desc { + void kfd_process_create_wq(void); + void kfd_process_destroy_wq(void); + struct kfd_process *kfd_create_process(struct file *filep); +-struct kfd_process *kfd_get_process(const struct task_struct *); ++struct kfd_process *kfd_get_process(const struct task_struct *task); + struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); + struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); + void kfd_unref_process(struct kfd_process *p); +@@ -777,7 +785,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, + struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + struct kfd_process *p); + +-int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma); ++int kfd_reserved_mem_mmap(struct kfd_process *process, ++ struct vm_area_struct *vma); + + /* KFD process API for creating and translating handles */ + int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, +@@ -802,9 +811,11 @@ int kfd_map_memory_to_gpu(void *mem, struct kfd_process_device *pdd); + int kfd_unmap_memory_from_gpu(void *mem, struct kfd_process_device *pdd); + + /* Process device data iterator */ +-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); +-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, +- struct kfd_process_device *pdd); ++struct kfd_process_device *kfd_get_first_process_device_data( ++ struct kfd_process *p); ++struct kfd_process_device *kfd_get_next_process_device_data( ++ struct kfd_process *p, ++ struct kfd_process_device *pdd); + bool kfd_has_process_device_data(struct kfd_process *p); + + /* PASIDs */ +@@ -938,7 +949,7 @@ struct packet_manager { + struct mutex lock; + bool allocated; + struct kfd_mem_obj *ib_buffer_obj; +- unsigned ib_size_bytes; ++ unsigned int ib_size_bytes; + + struct packet_manager_funcs *pmf; + }; +@@ -1046,7 +1057,8 @@ int kfd_wait_on_events(struct kfd_process *p, + uint32_t num_events, void __user *data, + bool all, uint32_t user_timeout_ms, + enum kfd_event_wait_result *wait_result); +-void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits); ++void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, ++ uint32_t valid_id_bits); + #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + void kfd_signal_iommu_event(struct kfd_dev *dev, + unsigned int pasid, unsigned long address, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 601e551..025ee5d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -366,7 +366,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) + /* No process locking is needed in this function, because the process + * is not findable any more. We must assume that no other thread is + * using it any more, otherwise we couldn't safely free the process +- * stucture in the end. */ ++ * structure in the end. ++ */ + static void kfd_process_wq_release(struct work_struct *work) + { + struct kfd_process *p = container_of(work, struct kfd_process, +@@ -448,7 +449,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, + + /* Iterate over all process device data structures and if the pdd is in + * debug mode,we should first force unregistration, then we will be +- * able to destroy the queues */ ++ * able to destroy the queues ++ */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + dev = pdd->dev; + mutex_lock(get_dbgmgr_mutex()); +@@ -611,8 +613,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, + process->last_restore_timestamp = get_jiffies_64(); + + /* If PeerDirect interface was not detected try to detect it again +- * in case if network driver was loaded later. +- */ ++ * in case if network driver was loaded later. ++ */ + kfd_init_peer_direct(); + + return process; +@@ -859,14 +861,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) + } + #endif /* CONFIG_AMD_IOMMU_V2 */ + +-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) ++struct kfd_process_device *kfd_get_first_process_device_data( ++ struct kfd_process *p) + { + return list_first_entry(&p->per_device_data, + struct kfd_process_device, + per_device_list); + } + +-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, ++struct kfd_process_device *kfd_get_next_process_device_data( ++ struct kfd_process *p, + struct kfd_process_device *pdd) + { + if (list_is_last(&pdd->per_device_list, &p->per_device_data)) +@@ -880,7 +884,8 @@ bool kfd_has_process_device_data(struct kfd_process *p) + } + + /* Create specific handle mapped to mem from process local memory idr +- * Assumes that the process lock is held. */ ++ * Assumes that the process lock is held. ++ */ + int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, + void *mem, uint64_t start, + uint64_t length, +@@ -935,7 +940,8 @@ struct kfd_bo *kfd_process_device_find_bo(struct kfd_process_device *pdd, + } + + /* Translate specific handle from process local memory idr +- * Assumes that the process lock is held. */ ++ * Assumes that the process lock is held. ++ */ + void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, + int handle) + { +@@ -973,7 +979,8 @@ void *kfd_process_find_bo_from_interval(struct kfd_process *p, + } + + /* Remove specific handle from process local memory idr +- * Assumes that the process lock is held. */ ++ * Assumes that the process lock is held. ++ */ + void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, + int handle) + { +@@ -1042,7 +1049,8 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) + return p; + } + +-int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma) ++int kfd_reserved_mem_mmap(struct kfd_process *process, ++ struct vm_area_struct *vma) + { + unsigned long pfn, i; + int ret = 0; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +index 94e07ee..e4384ce 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +@@ -240,7 +240,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, + + case KFD_QUEUE_TYPE_COMPUTE: + /* check if there is over subscription */ +- if ((dev->dqm->sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ++ if ((dev->dqm->sched_policy == ++ KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || + (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { + pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c +index 56bf9a2..2b3c300 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c +@@ -179,7 +179,7 @@ void run_rdma_free_callback(struct kfd_bo *buf_obj) + * This function release resources previously allocated by get_pages() call. + * + * \param p_p2p_data - A pointer to pointer to amd_p2p_info entries +- * allocated by get_pages() call. ++ * allocated by get_pages() call. + * + * \return 0 if operation was successful + */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +index 1d1992f..3e21aef 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +@@ -517,10 +517,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, + sysfs_show_64bit_prop(buffer, "local_mem_size", + local_mem_info.local_mem_size_private + + local_mem_info.local_mem_size_public); +- } +- else +- sysfs_show_64bit_prop(buffer, "local_mem_size", +- (unsigned long long int) 0); ++ } else ++ sysfs_show_64bit_prop(buffer, "local_mem_size", 0ULL); + + sysfs_show_32bit_prop(buffer, "fw_version", + dev->gpu->mec_fw_version); +@@ -881,17 +879,20 @@ static void kfd_debug_print_topology(void) + + down_read(&topology_lock); + +- dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); ++ dev = list_last_entry(&topology_device_list, ++ struct kfd_topology_device, list); + if (dev) { +- if (dev->node_props.cpu_cores_count && dev->node_props.simd_count) { ++ if (dev->node_props.cpu_cores_count && ++ dev->node_props.simd_count) { + pr_info("Topology: Add APU node [0x%0x:0x%0x]\n", +- dev->node_props.device_id, dev->node_props.vendor_id); +- } +- else if (dev->node_props.cpu_cores_count) ++ dev->node_props.device_id, ++ dev->node_props.vendor_id); ++ } else if (dev->node_props.cpu_cores_count) + pr_info("Topology: Add CPU node\n"); + else if (dev->node_props.simd_count) + pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n", +- dev->node_props.device_id, dev->node_props.vendor_id); ++ dev->node_props.device_id, ++ dev->node_props.vendor_id); + } + up_read(&topology_lock); + } +@@ -903,7 +904,8 @@ static void kfd_update_system_properties(void) + struct kfd_topology_device *dev; + + down_read(&topology_lock); +- dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); ++ dev = list_last_entry(&topology_device_list, ++ struct kfd_topology_device, list); + if (dev) { + sys_props.platform_id = + (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; +@@ -1020,8 +1022,7 @@ int kfd_topology_init(void) + * topology_device_list + */ + +- /* Initialize the head for the both the lists +- */ ++ /* Initialize the head for the both the lists */ + INIT_LIST_HEAD(&topology_device_list); + INIT_LIST_HEAD(&temp_topology_device_list); + init_rwsem(&topology_lock); +@@ -1031,7 +1032,8 @@ int kfd_topology_init(void) + /* Proximity domains in ACPI CRAT tables start counting at + * 0. The same should be true for virtual CRAT tables created + * at this stage. GPUs added later in kfd_topology_add_device +- * use a counter. */ ++ * use a counter. ++ */ + proximity_domain = 0; + + /* +@@ -1091,12 +1093,12 @@ int kfd_topology_init(void) + kfd_update_system_properties(); + kfd_debug_print_topology(); + pr_info("Finished initializing topology\n"); +- } +- else ++ } else + pr_err("Failed to update topology in sysfs ret=%d\n", ret); + + /* For nodes with GPU, this information gets added +- * when GPU is detected (kfd_topology_add_device). */ ++ * when GPU is detected (kfd_topology_add_device). ++ */ + if (cpu_only_node) { + /* Add additional information to CPU only node created above */ + down_write(&topology_lock); +@@ -1149,9 +1151,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) + return hashout; + } + /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If +- * the GPU device is not already present in the topology device list +- * then return NULL. This means a new topology device has to be +- * created for this GPU. ++ * the GPU device is not already present in the topology device ++ * list then return NULL. This means a new topology device has to ++ * be created for this GPU. + * TODO: Rather than assiging @gpu to first topology device withtout + * gpu attached, it will better to have more stringent check. + */ +@@ -1244,9 +1246,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + + /* Check to see if this gpu device exists in the topology_device_list. + * If so, assign the gpu to that device, +- * else create a Virtual CRAT for this gpu device and then parse that CRAT +- * to create a new topology device. Once created assign the gpu to that +- * topology device ++ * else create a Virtual CRAT for this gpu device and then parse that ++ * CRAT to create a new topology device. Once created assign the gpu to ++ * that topology device + */ + dev = kfd_assign_gpu(gpu); + if (!dev) { +@@ -1265,8 +1267,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); + +- /* +- * Update the SYSFS tree, since we added another topology device ++ /* Update the SYSFS tree, since we added another topology ++ * device + */ + res = kfd_topology_update_sysfs(); + up_write(&topology_lock); +@@ -1284,13 +1286,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + gpu->id = gpu_id; + + /* TODO: Move the following lines to function +- * kfd_add_non_crat_information */ ++ * kfd_add_non_crat_information ++ */ + + /* Fill-in additional information that is not available in CRAT but +- * needed for the topology */ ++ * needed for the topology ++ */ + + dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info); +- dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; ++ dev->node_props.simd_arrays_per_engine = ++ cu_info.num_shader_arrays_per_engine; + + dev->node_props.vendor_id = gpu->pdev->vendor; + dev->node_props.device_id = gpu->pdev->device; +@@ -1329,8 +1334,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + } + + /* Fix errors in CZ CRAT. +- * simd_count: Carrizo CRAT reports wrong simd_count, probably because it +- * doesn't consider masked out CUs ++ * simd_count: Carrizo CRAT reports wrong simd_count, probably because ++ * it doesn't consider masked out CUs + * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd. + * capability flag: Carrizo CRAT doesn't report IOMMU flags. + */ +@@ -1339,7 +1344,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + cu_info.simd_per_cu * cu_info.cu_active_number; + dev->node_props.max_waves_per_simd = 10; + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; +- } ++ } + + kfd_debug_print_topology(); + err: +@@ -1444,7 +1449,7 @@ int kfd_numa_node_to_apic_id(int numa_node_id) + int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) + { + struct kfd_topology_device *dev; +- unsigned i = 0; ++ unsigned int i = 0; + int r = 0; + + down_read(&topology_lock); +@@ -1469,7 +1474,7 @@ int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) + int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) + { + struct kfd_topology_device *dev; +- unsigned i = 0; ++ unsigned int i = 0; + int r = 0; + + down_read(&topology_lock); +-- +2.7.4 + |