From fc0872ed7666e177af85412cd69d22774fc8340f Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 6 Dec 2016 13:10:34 -0500 Subject: [PATCH 1692/4131] drm/amdkfd: Clean up KFD style errors and warnings Using checkpatch.pl -f showed a number of style issues. This patch addresses as many of them as possible. Some long lines have been left for readability, but attempts to minimize them have been made. Also clean up usage of do..while(0) loops, which are mostly for debugging anyways Change-Id: Ie8511447981a051f01b16a06833a70d9df0a85df Signed-off-by: Kent Russell Conflicts: drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 81 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 47 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 84 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +- drivers/gpu/drm/amd/amdkfd/cik_int.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 280 +++---- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 187 ++--- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 803 +++++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 12 +- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 288 ++++---- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 292 ++++---- drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 +- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 6 +- .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 137 ++-- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 8 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h | 16 +- drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h | 10 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 58 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 28 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_rdma.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 71 +- 36 files changed, 1312 insertions(+), 1182 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 30e5893..d41cebf 100755 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -34,7 +34,7 @@ const struct kfd2kgd_calls *kfd2kgd; const struct kgd2kfd_calls *kgd2kfd; -bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); +bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); unsigned int global_compute_vmid_bitmap = 0xFF00; @@ -43,7 +43,7 @@ int amdgpu_amdkfd_init(void) int ret; #if defined(CONFIG_HSA_AMD_MODULE) - int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); kgd2kfd_init_p = symbol_request(kgd2kfd_init); @@ -404,7 +404,8 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->cu_active_number = acu_info.number; cu_info->cu_ao_mask = acu_info.ao_cu_mask; - memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], sizeof(acu_info.bitmap)); + memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], + sizeof(acu_info.bitmap)); cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index ac167c8..0c797bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -153,7 +153,7 @@ static int amd_kfd_fence_signal(struct fence *f) * * This function is called when the reference count becomes zero. * It just RCU schedules freeing up the fence. -*/ + */ static void amd_kfd_fence_release(struct fence *f) { struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); @@ -173,7 +173,7 @@ static void amd_kfd_fence_release(struct fence *f) * * @f: [IN] fence * @mm: [IN] mm that needs to be verified -*/ + */ bool amd_kfd_fence_check_mm(struct fence *f, void *mm) { struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 5387fca..4adbf0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -68,7 +68,7 @@ enum { ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENTION = 0x03000000, - /* extend the mask to 26 bits in order to match the low address field. */ + /* extend the mask to 26 bits in order to match the low address field */ ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF }; @@ -93,7 +93,8 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; -static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, int fd, uint32_t handle, struct kgd_mem **mem); +static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, + int fd, uint32_t handle, struct kgd_mem **mem); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); @@ -101,10 +102,13 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); * Register access functions */ -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -312,11 +316,12 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, /* * We have to assume that there is no outstanding mapping. - * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a mapping - * is in progress or because a mapping finished and the SW cleared it. - * So the protocol is to always wait & clear. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a + * mapping is in progress or because a mapping finished and the SW + * cleared it. So the protocol is to always wait & clear. */ - uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID0_PASID_MAPPING__VALID_MASK; + uint32_t pasid_mapping = (pasid == 0) ? 0 : + (uint32_t)pasid | ATC_VMID0_PASID_MAPPING__VALID_MASK; WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); @@ -502,12 +507,15 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdma_rlc_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdma_rlc_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdma_rlc_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdma_rlc_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); @@ -638,7 +646,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, pr_debug("IQ timer is active\n"); } else break; - loop: +loop: if (!retry) { pr_err("CP HQD IQ timer status time out\n"); break; @@ -728,8 +736,9 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) /* Turning off this address until we set all the registers */ for (i = 0; i < MAX_WATCH_ADDRESSES; i++) - WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], - cntl.u32All); + WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); return 0; } @@ -747,19 +756,23 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, /* Turning off this watch point until we set all the registers */ cntl.bitfields.valid = 0; - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_HI], addr_hi); - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_LO], addr_lo); /* Enable the watch point */ cntl.bitfields.valid = 1; - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); return 0; @@ -883,43 +896,35 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 833eba2..9f80b1e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -517,11 +517,15 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdmax_rlcx_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdmax_rlcx_virtual_addr); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); @@ -660,7 +664,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, pr_debug("IQ timer is active\n"); } else break; - loop: +loop: if (!retry) { pr_err("CP HQD IQ timer status time out\n"); break; @@ -840,8 +844,9 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) /* Turning off this address until we set all the registers */ for (i = 0; i < MAX_WATCH_ADDRESSES; i++) - WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], - cntl.u32All); + WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); return 0; } @@ -859,19 +864,23 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, /* Turning off this watch point until we set all the registers */ cntl.bitfields.valid = 0; - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_HI], addr_hi); - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_LO], addr_lo); /* Enable the watch point */ cntl.bitfields.valid = 1; - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); return 0; @@ -945,42 +954,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e315dc7..53a0e52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -644,7 +644,8 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, ENABLE, 1); WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { @@ -661,9 +662,12 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); @@ -718,7 +722,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high = upper_32_bits(queue_address >> 8); if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && - high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) retval = true; } release_queue(kgd); @@ -927,22 +931,30 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) * TODO 2: support range-based invalidation, requires kfg2kgd * interface change */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), 0x0000001f); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), 0xffffffff); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), 0x0000001f); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), req); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), + req); while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & - (1 << vmid))) + (1 << vmid))) cpu_relax(); - while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & - (1 << vmid))) + while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) cpu_relax(); } @@ -1034,19 +1046,13 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, /* Turning off this watch point until we set all the registers */ cntl.bitfields.valid = 0; - WREG32(watch_base_addr + - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], + WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL], cntl.u32All); - WREG32(watch_base_addr + - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_HI], + WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI], addr_hi); - WREG32(watch_base_addr + - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_LO], + WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO], addr_lo); /* Enable the watch point */ @@ -1121,43 +1127,35 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; break; default: @@ -1199,9 +1197,9 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); + lower_32_bits(adev->vm_manager.max_pfn - 1)); WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); + upper_32_bits(adev->vm_manager.max_pfn - 1)); WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); @@ -1210,9 +1208,9 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); + lower_32_bits(adev->vm_manager.max_pfn - 1)); WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); + upper_32_bits(adev->vm_manager.max_pfn - 1)); WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3129383..1b84fc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -42,7 +42,8 @@ #include "gmc/gmc_8_1_d.h" /* Special VM and GART address alignment needed for VI pre-Fiji due to - * a HW bug. */ + * a HW bug. + */ #define VI_BO_SIZE_ALIGN (0x8000) /* BO flag to indicate a KFD userptr BO */ @@ -663,7 +664,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, va, size, domain_string(alloc_domain)); /* Allocate buffer object. Userptr objects need to start out - * in the CPU domain, get moved to GTT when pinned. */ + * in the CPU domain, get moved to GTT when pinned. + */ ret = amdgpu_bo_create(adev, size, byte_align, false, alloc_domain, flags, sg, NULL, &bo); @@ -719,7 +721,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, * back-off the reservation and then reacquire it. Track all the * reservation info in a context structure. Buffers can be mapped to * multiple VMs simultaneously (buffers being restored on multiple - * GPUs). */ + * GPUs). + */ struct bo_vm_reservation_context { struct amdgpu_bo_list_entry kfd_bo; unsigned int n_vms; @@ -802,7 +805,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, { struct amdgpu_bo *bo = mem->bo; struct kfd_bo_va_list *entry; - unsigned i; + unsigned int i; int ret; ctx->reserved = false; @@ -1158,7 +1161,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_unlock(&mem->lock); /* lock is not needed after this, since mem is unused and will - * be freed anyway */ + * be freed anyway + */ /* No more MMU notifiers */ amdgpu_mn_unregister(mem->bo); @@ -1527,7 +1531,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( { struct kfd_bo_va_list *entry; struct amdgpu_device *adev; - unsigned mapped_before; + unsigned int mapped_before; int ret = 0; struct bo_vm_reservation_context ctx; struct amdkfd_process_info *process_info; diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index 9054068..ff8255d 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -34,7 +34,8 @@ struct cik_ih_ring_entry { uint32_t reserved3:4; /* pipeid, meid and unused3 are officially called RINGID, - * but for our purposes, they always decode into pipe and ME. */ + * but for our purposes, they always decode into pipe and ME. + */ uint32_t pipeid:2; uint32_t meid:2; uint32_t reserved4:4; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 0c4ea11..8b35b70 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -557,14 +557,17 @@ kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) dev = kfd_device_by_id(args->gpu_id); if (!dev) { - dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__); + dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", + __func__); return status; } down_write(&p->lock); mutex_lock(get_dbgmgr_mutex()); - /* make sure that we have pdd, if this the first queue created for this process */ + /* make sure that we have pdd, if this the first queue created for + * this process + */ pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { mutex_unlock(get_dbgmgr_mutex()); @@ -599,12 +602,10 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, struct kfd_dev *dev; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) - return -EINVAL; - - if (dev->device_info->asic_family == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); - return -EINVAL; + if (!dev) { + dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", + __func__); + return status; } mutex_lock(get_dbgmgr_mutex()); @@ -646,92 +647,90 @@ kfd_ioctl_dbg_address_watch(struct file *filep, memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); - do { - dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - dev_info(NULL, - "Error! kfd: In func %s >> get device by id failed\n", - __func__); - break; - } - - cmd_from_user = (void __user *) args->content_ptr; - - if (args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) { - status = -EINVAL; - break; - } - - if (args->buf_size_in_bytes <= sizeof(*args)) { - status = -EINVAL; - break; - } + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + dev_info(NULL, + "Error! kfd: In func %s >> get device by id failed\n", + __func__); + return -EFAULT; + } - /* this is the actual buffer to work with */ + cmd_from_user = (void __user *) args->content_ptr; - args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - if (IS_ERR(args_buff)) - return PTR_ERR(args_buff); + if (args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE || + (args->buf_size_in_bytes <= sizeof(*args))) + return -EINVAL; - aw_info.process = p; + /* this is the actual buffer to work with */ + args_buff = memdup_user(cmd_from_user, + args->buf_size_in_bytes - sizeof(*args)); - aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(aw_info.num_watch_points); + if (IS_ERR(args_buff)) + return PTR_ERR(args_buff); - aw_info.watch_mode = (HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; - args_idx += sizeof(HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; + aw_info.process = p; - /* set watch address base pointer to point on the array base within args_buff */ + aw_info.num_watch_points = + *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(aw_info.num_watch_points); - aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; + aw_info.watch_mode = + (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; + args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * + aw_info.num_watch_points; - /*skip over the addresses buffer */ - args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; + /* set watch address base pointer to point on the array base + * within args_buff + */ - if (args_idx >= args->buf_size_in_bytes) { - status = -EINVAL; - break; - } + aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; - watch_mask_value = (uint64_t) args_buff[args_idx]; + /* skip over the addresses buffer */ + args_idx += sizeof(aw_info.watch_address) * + aw_info.num_watch_points; - if (watch_mask_value > 0) { - /* there is an array of masks */ + if (args_idx >= args->buf_size_in_bytes) { + status = -EINVAL; + goto out; + } - /* set watch mask base pointer to point on the array base within args_buff */ - aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; + watch_mask_value = (uint64_t) args_buff[args_idx]; - /*skip over the masks buffer */ - args_idx += sizeof(aw_info.watch_mask) * aw_info.num_watch_points; - } + if (watch_mask_value > 0) { + /* there is an array of masks */ - else - /* just the NULL mask, set to NULL and skip over it */ - { - aw_info.watch_mask = NULL; - args_idx += sizeof(aw_info.watch_mask); - } + /* set watch mask base pointer to point on the array + * base within args_buff + */ + aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; - if (args_idx > args->buf_size_in_bytes) { - status = -EINVAL; - break; - } + /* skip over the masks buffer */ + args_idx += sizeof(aw_info.watch_mask) * + aw_info.num_watch_points; + } - aw_info.watch_event = NULL; /* Currently HSA Event is not supported for DBG */ - status = 0; + else + /* just the NULL mask, set to NULL and skip over it */ + { + aw_info.watch_mask = NULL; + args_idx += sizeof(aw_info.watch_mask); + } - } while (0); + if (args_idx > args->buf_size_in_bytes) { + status = -EINVAL; + goto out; + } - if (status == 0) { - mutex_lock(get_dbgmgr_mutex()); + /* Currently HSA Event is not supported for DBG */ + aw_info.watch_event = NULL; - status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); + mutex_lock(get_dbgmgr_mutex()); - mutex_unlock(get_dbgmgr_mutex()); + status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); - } + mutex_unlock(get_dbgmgr_mutex()); +out: kfree(args_buff); return status; @@ -739,11 +738,13 @@ kfd_ioctl_dbg_address_watch(struct file *filep, /* * Parse and generate fixed size data structure for wave control. - * Buffer is generated in a "packed" form, for avoiding structure packing/pending dependencies. + * Buffer is generated in a "packed" form, for avoiding structure + * packing/pending dependencies. */ static int -kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data) +kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, + void *data) { long status = -EFAULT; struct kfd_ioctl_dbg_wave_control_args *args = data; @@ -768,78 +769,76 @@ kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data dev_info(NULL, "kfd: In func %s - start\n", __func__); - do { - dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__); - break; - } - - /* input size must match the computed "compact" size */ - - if (args->buf_size_in_bytes != computed_buff_size) { - dev_info(NULL, - "Error! kfd: In func %s >> size mismatch, computed : actual %u : %u\n", - __func__, args->buf_size_in_bytes, computed_buff_size); - status = -EINVAL; - break; - } - - cmd_from_user = (void __user *) args->content_ptr; - - /* copy the entire buffer from user */ + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", + __func__); + return -EFAULT; + } - args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - if (IS_ERR(args_buff)) - return PTR_ERR(args_buff); + /* input size must match the computed "compact" size */ - if (copy_from_user(args_buff, - (void __user *) args->content_ptr, - args->buf_size_in_bytes - sizeof(*args))) { - dev_info(NULL, - "Error! kfd: In func %s >> copy_from_user failed\n", - __func__); - break; - } + if (args->buf_size_in_bytes != computed_buff_size) { + dev_info(NULL, + "Error! kfd: In func %s >> size mismatch, computed : actual %u : %u\n", + __func__, args->buf_size_in_bytes, + computed_buff_size); + return -EINVAL; + } - /* move ptr to the start of the "pay-load" area */ + cmd_from_user = (void __user *) args->content_ptr; + /* copy the entire buffer from user */ - wac_info.process = p; + args_buff = memdup_user(cmd_from_user, + args->buf_size_in_bytes - sizeof(*args)); + if (IS_ERR(args_buff)) + return PTR_ERR(args_buff); - wac_info.operand = (HSA_DBG_WAVEOP) *((HSA_DBG_WAVEOP *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.operand); + if (copy_from_user(args_buff, + (void __user *) args->content_ptr, + args->buf_size_in_bytes - sizeof(*args))) { + dev_info(NULL, + "Error! kfd: In func %s >> copy_from_user failed\n", + __func__); + goto out; + } - wac_info.mode = (HSA_DBG_WAVEMODE) *((HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.mode); + /* move ptr to the start of the "pay-load" area */ - wac_info.trapId = (uint32_t) *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.trapId); + wac_info.process = p; - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = *((uint32_t *)(&args_buff[args_idx])); - wac_info.dbgWave_msg.MemoryVA = NULL; + wac_info.operand = + *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.operand); + wac_info.mode = + *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.mode); - status = 0; + wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.trapId); - } while (0); - if (status == 0) { - mutex_lock(get_dbgmgr_mutex()); + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = + *((uint32_t *)(&args_buff[args_idx])); + wac_info.dbgWave_msg.MemoryVA = NULL; - dev_info(NULL, - "kfd: In func %s >> calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", - __func__, wac_info.process, wac_info.operand, wac_info.mode, wac_info.trapId, - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + mutex_lock(get_dbgmgr_mutex()); - status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); + dev_info(NULL, + "kfd: In func %s >> calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", + __func__, wac_info.process, wac_info.operand, + wac_info.mode, wac_info.trapId, + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - dev_info(NULL, "kfd: In func %s >> returned status of dbg manager is %ld\n", __func__, status); + status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); - mutex_unlock(get_dbgmgr_mutex()); + dev_info(NULL, "kfd: In func %s >> returned status of dbg manager is %ld\n", + __func__, status); - } + mutex_unlock(get_dbgmgr_mutex()); +out: kfree(args_buff); return status; @@ -935,7 +934,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, "scratch_limit %llX\n", pdd->scratch_limit); args->num_of_nodes++; - } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && + } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != + NULL && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } @@ -957,7 +957,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, if (args->num_of_nodes == 0) { /* Return number of nodes, so that user space can alloacate - * sufficient memory */ + * sufficient memory + */ down_write(&p->lock); if (!kfd_has_process_device_data(p)) { @@ -978,7 +979,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, /* Fill in process-aperture information for all available * nodes, but not more than args->num_of_nodes as that is - * the amount of memory allocated by user */ + * the amount of memory allocated by user + */ pa = kzalloc((sizeof(struct kfd_process_device_apertures) * args->num_of_nodes), GFP_KERNEL); if (!pa) @@ -1290,7 +1292,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, pdd->vm); /* If freeing the buffer failed, leave the handle in place for - * clean-up during process tear-down. */ + * clean-up during process tear-down. + */ if (ret == 0) { down_write(&p->lock); kfd_process_device_remove_obj_handle( @@ -1553,14 +1556,16 @@ static int kfd_ioctl_open_graphic_handle(struct file *filep, down_write(&p->lock); /*TODO: When open_graphic_handle is implemented, we need to create - * the corresponding interval tree. We need to know the size of - * the buffer through open_graphic_handle(). We use 1 for now.*/ + * the corresponding interval tree. We need to know the size of + * the buffer through open_graphic_handle(). We use 1 for now. + */ idr_handle = kfd_process_device_create_obj_handle(pdd, mem, args->va_addr, 1, NULL); up_write(&p->lock); if (idr_handle < 0) { /* FIXME: destroy_process_gpumem doesn't seem to be - * implemented anywhere */ + * implemented anywhere + */ dev->kfd2kgd->destroy_process_gpumem(dev->kgd, mem); return -EFAULT; } @@ -1606,7 +1611,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kgd_dev *dma_buf_kgd; void *metadata_buffer = NULL; uint32_t flags; - unsigned i; + unsigned int i; int r; /* Find a KFD GPU device that supports the get_dmabuf_info query */ @@ -2003,7 +2008,8 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep, #endif #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ - [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ + .cmd_drv = 0, .name = #ioctl} /** Ioctl table */ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index ab35190..55c5e4e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -12,11 +12,13 @@ /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs - * used in the CRAT. */ + * used in the CRAT. + */ static uint32_t gpu_processor_id_low = 0x80001000; /* Return the next available gpu_processor_id and increment it for next GPU - * @total_cu_count - Total CUs present in the GPU including ones masked off + * @total_cu_count - Total CUs present in the GPU including ones + * masked off */ static inline unsigned int get_and_inc_gpu_processor_id( unsigned int total_cu_count) @@ -33,7 +35,8 @@ struct kfd_gpu_cache_info { uint32_t cache_level; uint32_t flags; /* Indicates how many Compute Units share this cache - * Value = 1 indicates the cache is not shared */ + * Value = 1 indicates the cache is not shared + */ uint32_t num_cu_shared; }; @@ -104,7 +107,8 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { }; /* NOTE: In future if more information is added to struct kfd_gpu_cache_info - * the following ASICs may need a separate table. */ + * the following ASICs may need a separate table. + */ #define hawaii_cache_info kaveri_cache_info #define tonga_cache_info carrizo_cache_info #define fiji_cache_info carrizo_cache_info @@ -151,7 +155,7 @@ static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, } /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct - * topology device present in the device_list + * topology device present in the device_list */ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, struct list_head *device_list) @@ -177,7 +181,7 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, } /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct - * topology device present in the device_list + * topology device present in the device_list */ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, struct list_head *device_list) @@ -195,9 +199,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, if (props == NULL) return -ENOMEM; - /* - * We're on GPU node - */ + /* We're on GPU node */ if (dev->node_props.cpu_cores_count == 0) { /* APU */ if (mem->visibility_type == 0) @@ -206,8 +208,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, /* dGPU */ else props->heap_type = mem->visibility_type; - } - else + } else props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) @@ -231,7 +232,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, } /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct - * topology device present in the device_list + * topology device present in the device_list */ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, struct list_head *device_list) @@ -254,8 +255,8 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, * Compute Unit. So map the cache using CPU core Id or SIMD * (GPU) ID. * TODO: This works because currently we can safely assume that - * Compute Units are parsed before caches are parsed. In future - * remove this dependency + * Compute Units are parsed before caches are parsed. In + * future, remove this dependency */ if ((id >= dev->node_props.cpu_core_id_base && id <= dev->node_props.cpu_core_id_base + @@ -298,7 +299,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, } /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct - * topology device present in the device_list + * topology device present in the device_list */ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, struct list_head *device_list) @@ -313,7 +314,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, id_from = iolink->proximity_domain_from; id_to = iolink->proximity_domain_to; - pr_debug("Found IO link entry in CRAT table with id_from=%d\n", id_from); + pr_debug("Found IO link entry in CRAT table with id_from=%d\n", + id_from); list_for_each_entry(dev, device_list, list) { if (id_from == dev->proximity_domain) { props = kfd_alloc_struct(props); @@ -368,7 +370,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, } /* kfd_parse_subtype - parse subtypes and attach it to correct topology device - * present in the device_list + * present in the device_list * @sub_type_hdr - subtype section of crat_image * @device_list - list of topology devices present in this crat_image */ @@ -397,15 +399,11 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, ret = kfd_parse_subtype_cache(cache, device_list); break; case CRAT_SUBTYPE_TLB_AFFINITY: - /* - * For now, nothing to do here - */ + /* For now, nothing to do here */ pr_debug("Found TLB entry in CRAT table (not processing)\n"); break; case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: - /* - * For now, nothing to do here - */ + /* For now, nothing to do here */ pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); break; case CRAT_SUBTYPE_IOLINK_AFFINITY: @@ -421,12 +419,13 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, } /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT - * create a kfd_topology_device and add in to device_list. Also parse - * CRAT subtypes and attach it to appropriate kfd_topology_device + * create a kfd_topology_device and add in to device_list. Also parse + * CRAT subtypes and attach it to appropriate kfd_topology_device * @crat_image - input image containing CRAT - * @device_list - [OUT] list of kfd_topology_device generated after parsing - * crat_image + * @device_list - [OUT] list of kfd_topology_device generated after + * parsing crat_image * @proximity_domain - Proximity domain of the first device in the table + * * Return - 0 if successful else -ve value */ int kfd_parse_crat_table(void *crat_image, @@ -445,9 +444,8 @@ int kfd_parse_crat_table(void *crat_image, if (!crat_image) return -EINVAL; - if (!list_empty(device_list)) { + if (!list_empty(device_list)) pr_warn("Error device list should be empty\n"); - } num_nodes = crat_table->num_domains; image_len = crat_table->length; @@ -465,7 +463,8 @@ int kfd_parse_crat_table(void *crat_image, return -ENOMEM; memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); - memcpy(top_dev->oem_table_id, crat_table->oem_table_id, CRAT_OEMTABLEID_LENGTH); + memcpy(top_dev->oem_table_id, crat_table->oem_table_id, + CRAT_OEMTABLEID_LENGTH); top_dev->oem_revision = crat_table->oem_revision; last_header_type = last_header_length = 0; @@ -527,7 +526,8 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, /* CU could be inactive. In case of shared cache find the first active * CU. and incase of non-shared cache check if the CU is inactive. If - * inactive active skip it*/ + * inactive active skip it + */ if (first_active_cu) { memset(pcache, 0, sizeof(struct crat_subtype_cache)); pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; @@ -539,7 +539,8 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, pcache->cache_size = pcache_info[cache_type].cache_size; /* Sibling map is w.r.t processor_id_low, so shift out - * inactive CU */ + * inactive CU + */ cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); @@ -555,9 +556,12 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, return 1; } -/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info tables +/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info + * tables + * * @kdev - [IN] GPU device - * @gpu_processor_id - [IN] GPU processor ID to which these caches associate + * @gpu_processor_id - [IN] GPU processor ID to which these caches + * associate * @available_size - [IN] Amount of memory available in pcache * @cu_info - [IN] Compute Unit info obtained from KGD * @pcache - [OUT] memory into which cache data is to be filled in. @@ -674,15 +678,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, /* * kfd_create_crat_image_acpi - Allocates memory for CRAT image and - * copies CRAT from ACPI (if available). - * + * copies CRAT from ACPI (if available). * NOTE: Call kfd_destroy_crat_image to free CRAT image memory * - * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then - * *crat_image will be NULL - * @size: [OUT] size of crat_image + * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then + * crat_image will be NULL + * @size: [OUT] size of crat_image * - * Return 0 if successful else return -ve value + * Return 0 if successful else return -ve value */ #ifdef CONFIG_ACPI int kfd_create_crat_image_acpi(void **crat_image, size_t *size) @@ -696,15 +699,14 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) *crat_image = NULL; - /* - * Fetch the CRAT table from ACPI - */ + /* Fetch the CRAT table from ACPI */ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); if (status == AE_NOT_FOUND) { pr_warn("CRAT table not found\n"); return -ENODATA; } else if (ACPI_FAILURE(status)) { const char *err = acpi_format_exception(status); + pr_err("CRAT table error: %s\n", err); return -EINVAL; } @@ -740,11 +742,11 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node * - * @numa_node_id: CPU NUMA node id - * @avail_size: Available size in the memory - * @sub_type_hdr: Memory into which compute info will be filled in + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in * - * Return 0 if successful else return -ve value + * Return 0 if successful else return -ve value */ static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, int proximity_domain, @@ -779,11 +781,11 @@ static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node * - * @numa_node_id: CPU NUMA node id - * @avail_size: Available size in the memory - * @sub_type_hdr: Memory into which compute info will be filled in + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in * - * Return 0 if successful else return -ve value + * Return 0 if successful else return -ve value */ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, int proximity_domain, @@ -808,7 +810,8 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, /* Unlike si_meminfo, si_meminfo_node is not exported. So * the following lines are duplicated from si_meminfo_node - * function */ + * function + */ pgdat = NODE_DATA(numa_node_id); for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) @@ -874,7 +877,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, * * @pcrat_image: Fill in VCRAT for CPU * @size: [IN] allocated size of crat_image. - * [OUT] actual size of data filled in crat_image + * [OUT] actual size of data filled in crat_image */ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) { @@ -902,7 +905,8 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) return -ENOMEM; memset(crat_table, 0, sizeof(struct crat_header)); - memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature)); + memcpy(&crat_table->signature, CRAT_SIGNATURE, + sizeof(crat_table->signature)); crat_table->length = sizeof(struct crat_header); #ifdef CONFIG_ACPI @@ -911,8 +915,10 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) pr_warn("DSDT table not found for OEM information\n"); else { crat_table->oem_revision = acpi_table->revision; - memcpy(crat_table->oem_id, acpi_table->oem_id, CRAT_OEMID_LENGTH); - memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, CRAT_OEMTABLEID_LENGTH); + memcpy(crat_table->oem_id, acpi_table->oem_id, + CRAT_OEMID_LENGTH); + memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, + CRAT_OEMTABLEID_LENGTH); } #else crat_table->oem_revision = 0; @@ -974,8 +980,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) /* TODO: Add cache Subtype for CPU. * Currently, CPU cache information is available in function * detect_cache_attributes(cpu) defined in the file - * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not exported - * and to get the same information the code needs to be duplicated. + * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not + * exported and to get the same information the code needs to be + * duplicated. */ *size = crat_table->length; @@ -1014,14 +1021,13 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size, } /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU - * to its NUMA node - * - * @avail_size: Available size in the memory - * @kdev - [IN] GPU device - * @sub_type_hdr: Memory into which io link info will be filled in - * @proximity_domain - proximity domain of the GPU node + * to its NUMA node + * @avail_size: Available size in the memory + * @kdev - [IN] GPU device + * @sub_type_hdr: Memory into which io link info will be filled in + * @proximity_domain - proximity domain of the GPU node * - * Return 0 if successful else return -ve value + * Return 0 if successful else return -ve value */ static int kfd_fill_gpu_direct_io_link(int *avail_size, struct kfd_dev *kdev, @@ -1040,7 +1046,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size, sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; /* Fill in IOLINK subtype. - * TODO: Fill-in other fields of iolink subtype */ + * TODO: Fill-in other fields of iolink subtype + */ sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; sub_type_hdr->proximity_domain_from = proximity_domain; #ifdef CONFIG_NUMA @@ -1076,8 +1083,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) struct amd_iommu_device_info iommu_info; const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; #endif struct kfd_local_mem_info local_mem_info; @@ -1093,8 +1100,10 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, memset(crat_table, 0, sizeof(struct crat_header)); - memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature)); - crat_table->length = sizeof(struct crat_header); /* Change length as we add more subtypes*/ + memcpy(&crat_table->signature, CRAT_SIGNATURE, + sizeof(crat_table->signature)); + /* Change length as we add more subtypes*/ + crat_table->length = sizeof(struct crat_header); crat_table->num_domains = 1; crat_table->total_entries = 0; @@ -1135,11 +1144,13 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, cu->hsa_capability = 0; /* Check if this node supports IOMMU. During parsing this flag will - * translate to HSA_CAP_ATS_PRESENT */ + * translate to HSA_CAP_ATS_PRESENT + */ #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) iommu_info.flags = 0; - if (0 == amd_iommu_device_info(kdev->pdev, &iommu_info)) { - if ((iommu_info.flags & required_iommu_flags) == required_iommu_flags) + if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { + if ((iommu_info.flags & required_iommu_flags) == + required_iommu_flags) cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; } #endif @@ -1150,7 +1161,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Fill in Subtype: Memory. Only on systems with large BAR (no * private FB), report memory as public. On other systems * report the total FB size (public+private) as a single - * private heap. */ + * private heap. + */ kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); @@ -1180,7 +1192,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, crat_table->total_entries++; /* TODO: Fill in cache information. This information is NOT readily - * available in KGD */ + * available in KGD + */ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, @@ -1224,17 +1237,17 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * NOTE: Call kfd_destroy_crat_image to free CRAT image memory * * @crat_image: VCRAT image created because ACPI does not have a - * CRAT for this device + * CRAT for this device * @size: [OUT] size of virtual crat_image * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device - * COMPUTE_UNIT_GPU - Create VCRAT for GPU - * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU - * -- this option is not currently implemented. The assumption - * is that all AMD APUs will have CRAT + * COMPUTE_UNIT_GPU - Create VCRAT for GPU + * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU + * -- this option is not currently implemented. + * The assumption is that all AMD APUs will have CRAT * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU * - * Return 0 if successful else return -ve value -*/ + * Return 0 if successful else return -ve value + */ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, int flags, struct kfd_dev *kdev, uint32_t proximity_domain) { @@ -1269,8 +1282,8 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, proximity_domain); break; - case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) : - /*TODO:*/ + case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): + /* TODO: */ ret = -EINVAL; pr_err("VCRAT not implemented for APU\n"); break; @@ -1287,12 +1300,10 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, /* kfd_destroy_crat_image * - * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) + * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) * */ void kfd_destroy_crat_image(void *crat_image) { - if (crat_image) - kfree(crat_image); - return; + kfree(crat_image); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index f01aea2..00de41f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -46,8 +46,8 @@ #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1) /* Compute Unit flags */ -#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */ -#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */ +#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */ +#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */ struct crat_header { uint32_t signature; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 5fea0d3..0fdc147 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -64,104 +64,112 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, union ULARGE_INTEGER *largep; union ULARGE_INTEGER addr; - do { - if ((kq == NULL) || (packet_buff == NULL) || (size_in_bytes == 0)) { - pr_debug("Error! kfd: In func %s >> Illegal packet parameters\n", __func__); - status = -EINVAL; - break; - } - /* todo - enter proper locking to be multithreaded safe */ - - /* We acquire a buffer from DIQ - * The receive packet buff will be sitting on the Indirect Buffer - * and in the PQ we put the IB packet + sync packet(s). - */ - if (sync) - pq_packets_size_in_bytes += - sizeof(struct pm4_mec_release_mem); - status = kq->ops.acquire_packet_buffer(kq, pq_packets_size_in_bytes / sizeof(uint32_t), &ib_packet_buff); - if (status != 0) { - pr_debug("Error! kfd: In func %s >> acquire_packet_buffer failed\n", __func__); - break; - } - - memset(ib_packet_buff, 0, pq_packets_size_in_bytes); + if ((kq == NULL) || (packet_buff == NULL) || + (size_in_bytes == 0)) { + pr_debug("Error! kfd: In func %s >> Illegal packet parameters\n", + __func__); + return -EINVAL; + } + /* todo - enter proper locking to be multithreaded safe */ + + /* We acquire a buffer from DIQ + * The receive packet buff will be sitting on the Indirect + * Buffer and in the PQ we put the IB packet + sync packet(s). + */ + if (sync) + pq_packets_size_in_bytes += + sizeof(struct pm4_mec_release_mem); + status = kq->ops.acquire_packet_buffer(kq, + pq_packets_size_in_bytes / sizeof(uint32_t), + &ib_packet_buff); + if (status != 0) { + pr_debug("Error! kfd: In func %s >> acquire_packet_buffer failed\n", + __func__); + return status; + } - ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); + memset(ib_packet_buff, 0, pq_packets_size_in_bytes); - ib_packet->header.count = 3; - ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; - ib_packet->header.type = PM4_TYPE_3; + ib_packet = + (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); - largep = (union ULARGE_INTEGER *) &vmid0_address; + ib_packet->header.count = 3; + ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; + ib_packet->header.type = PM4_TYPE_3; - ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; - ib_packet->bitfields3.ib_base_hi = largep->u.high_part; + largep = (union ULARGE_INTEGER *) &vmid0_address; - ib_packet->control = (1 << 23) | (1 << 31) | - ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); + ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; + ib_packet->bitfields3.ib_base_hi = largep->u.high_part; - ib_packet->bitfields5.pasid = pasid; + ib_packet->control = (1 << 23) | (1 << 31) | + ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); - if (!sync) { - kq->ops.submit_packet(kq); - break; - } + ib_packet->bitfields5.pasid = pasid; - /* - * for now we use release mem for GPU-CPU synchronization - * Consider WaitRegMem + WriteData as a better alternative - * we get a GART allocations ( gpu/cpu mapping), - * for the sync variable, and wait until: - * (a) Sync with HW - * (b) Sync var is written by CP to mem. - */ - rm_packet = (struct pm4_mec_release_mem *) (ib_packet_buff + - (sizeof(struct pm4__indirect_buffer_pasid) / sizeof(unsigned int))); - - status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), - &mem_obj); + if (!sync) { + kq->ops.submit_packet(kq); + return status; + } - if (status == 0) { + /* + * for now we use release mem for GPU-CPU synchronization + * Consider WaitRegMem + WriteData as a better alternative + * we get a GART allocations ( gpu/cpu mapping), + * for the sync variable, and wait until: + * (a) Sync with HW + * (b) Sync var is written by CP to mem. + */ + rm_packet = (struct pm4_mec_release_mem *) (ib_packet_buff + + (sizeof(struct pm4__indirect_buffer_pasid) / + sizeof(unsigned int))); + + status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), + &mem_obj); + + if (status != 0) { + pr_debug("Error! kfd: In func %s >> failed to allocate GART memory\n", + __func__); + return status; + } - rm_state = (uint64_t *) mem_obj->cpu_ptr; + rm_state = (uint64_t *) mem_obj->cpu_ptr; - *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; + *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; - rm_packet->header.opcode = IT_RELEASE_MEM; - rm_packet->header.type = PM4_TYPE_3; - rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int) - 2; + rm_packet->header.opcode = IT_RELEASE_MEM; + rm_packet->header.type = PM4_TYPE_3; + rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / + sizeof(unsigned int) - 2; - rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - rm_packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; - rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; - rm_packet->bitfields2.atc = 0; - rm_packet->bitfields2.tc_wb_action_ena = 1; + rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + rm_packet->bitfields2.event_index = + event_index___release_mem__end_of_pipe; + rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + rm_packet->bitfields2.atc = 0; + rm_packet->bitfields2.tc_wb_action_ena = 1; - addr.quad_part = mem_obj->gpu_addr; + addr.quad_part = mem_obj->gpu_addr; - rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; - rm_packet->address_hi = addr.u.high_part; + rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; + rm_packet->address_hi = addr.u.high_part; - rm_packet->bitfields3.data_sel = data_sel___release_mem__send_64_bit_data; - rm_packet->bitfields3.int_sel = int_sel___release_mem__send_data_after_write_confirm; - rm_packet->bitfields3.dst_sel = dst_sel___release_mem__memory_controller; + rm_packet->bitfields3.data_sel = + data_sel___release_mem__send_64_bit_data; + rm_packet->bitfields3.int_sel = + int_sel___release_mem__send_data_after_write_confirm; + rm_packet->bitfields3.dst_sel = + dst_sel___release_mem__memory_controller; - rm_packet->data_lo = QUEUESTATE__ACTIVE; + rm_packet->data_lo = QUEUESTATE__ACTIVE; - kq->ops.submit_packet(kq); + kq->ops.submit_packet(kq); - /* Wait till CP writes sync code: */ + /* Wait till CP writes sync code: */ - status = amdkfd_fence_wait_timeout( - (unsigned int *) rm_state, + status = amdkfd_fence_wait_timeout((unsigned int *) rm_state, QUEUESTATE__ACTIVE, 1500); - } else { - pr_debug("Error! kfd: In func %s >> failed to allocate GART memory\n", __func__); - } - } while (false); - if (rm_state != NULL) kfd_gtt_sa_free(dbgdev->dev, mem_obj); @@ -170,7 +178,9 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) { - /* no action is needed in this case, just make sure diq will not be used */ + /* no action is needed in this case, just make sure diq will not + * be used + */ dbgdev->kq = NULL; @@ -186,37 +196,33 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) unsigned int qid; struct process_queue_manager *pqm = dbgdev->pqm; - do { - - if (!pqm) { - pr_debug("Error! kfd: In func %s >> No PQM\n", __func__); - status = -EFAULT; - break; - } - - properties.type = KFD_QUEUE_TYPE_DIQ; + if (!pqm) { + pr_debug("Error! kfd: In func %s >> No PQM\n", + __func__); + return -EFAULT; + } - status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, &qid); + properties.type = KFD_QUEUE_TYPE_DIQ; - if (status != 0) { - pr_debug("Error! kfd: In func %s >> Create Queue failed\n", __func__); - break; - } + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, + &properties, &qid); - pr_debug("kfd: DIQ Created with queue id: %d\n", qid); + if (status != 0) { + pr_debug("Error! kfd: In func %s >> Create Queue failed\n", + __func__); + return status; + } - kq = pqm_get_kernel_queue(dbgdev->pqm, qid); + pr_debug("kfd: DIQ Created with queue id: %d\n", qid); - if (kq == NULL) { - pr_debug("Error! kfd: In func %s >> Error getting Kernel Queue\n", __func__); - status = -ENOMEM; - break; - } + kq = pqm_get_kernel_queue(dbgdev->pqm, qid); - dbgdev->kq = kq; - - } while (false); + if (kq == NULL) { + pr_debug("Error! kfd: In func %s >> Error getting Kernel Queue\n", + __func__); + return -ENOMEM; + } + dbgdev->kq = kq; return status; } @@ -233,7 +239,9 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) { /* todo - if needed, kill wavefronts and disable watch */ int status = 0; - if ((dbgdev == NULL) || (dbgdev->pqm == NULL) || (dbgdev->kq == NULL)) { + + if ((dbgdev == NULL) || (dbgdev->pqm == NULL) || + (dbgdev->kq == NULL)) { pr_debug("kfd Err:In func %s >> can't destroy diq\n", __func__); status = -EFAULT; } else { @@ -260,13 +268,16 @@ static void dbgdev_address_watch_set_registers( cntl->u32All = 0; if (adw_info->watch_mask != NULL) - cntl->bitfields.mask = (uint32_t) (adw_info->watch_mask[index] & ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); + cntl->bitfields.mask = + (uint32_t) (adw_info->watch_mask[index] & + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); else cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; addr.quad_part = (unsigned long long) adw_info->watch_address[index]; - addrHi->bitfields.addr = addr.u.high_part & ADDRESS_WATCH_REG_ADDHIGH_MASK; + addrHi->bitfields.addr = addr.u.high_part & + ADDRESS_WATCH_REG_ADDHIGH_MASK; addrLo->bitfields.addr = (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); @@ -276,8 +287,10 @@ static void dbgdev_address_watch_set_registers( if (KFD_IS_DGPU(asic_family) == false) cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); - pr_debug("\t\t%20s %08x\n", "set reg add high :", addrHi->bitfields.addr); - pr_debug("\t\t%20s %08x\n", "set reg add low :", addrLo->bitfields.addr); + pr_debug("\t\t%20s %08x\n", "set reg add high :", + addrHi->bitfields.addr); + pr_debug("\t\t%20s %08x\n", "set reg add low :", + addrLo->bitfields.addr); } @@ -285,8 +298,6 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, struct dbg_address_watch_info *adw_info) { - int status = 0; - union TCP_WATCH_ADDR_H_BITS addrHi; union TCP_WATCH_ADDR_L_BITS addrLo; union TCP_WATCH_CNTL_BITS cntl; @@ -296,68 +307,67 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, struct kfd_process_device *pdd; - do { - /* taking the vmid for that process on the safe way using pdd */ - pdd = kfd_get_process_device_data(dbgdev->dev, - adw_info->process); - if (!pdd) { - pr_debug("Error! kfd: In func %s >> no PDD available\n", __func__); - status = -EFAULT; - break; - } - - addrHi.u32All = 0; - addrLo.u32All = 0; - cntl.u32All = 0; - - vmid = pdd->qpd.vmid; - - if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) - || (adw_info->num_watch_points == 0)) { - status = -EINVAL; - break; - } - - if ((adw_info->watch_mode == NULL) || (adw_info->watch_address == NULL)) { - status = -EINVAL; - break; - } + /* taking the vmid for that process on the safe way + * using pdd + */ + pdd = kfd_get_process_device_data(dbgdev->dev, + adw_info->process); + if (!pdd) { + pr_debug("Error! kfd: In func %s >> no PDD available\n", + __func__); + return -EFAULT; + } - for (i = 0; i < adw_info->num_watch_points; i++) { + addrHi.u32All = 0; + addrLo.u32All = 0; + cntl.u32All = 0; - dbgdev_address_watch_set_registers( - adw_info, - &addrHi, - &addrLo, - &cntl, - i, - vmid, - dbgdev->dev->device_info->asic_family - ); + vmid = pdd->qpd.vmid; - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - pr_debug("\t\t%20s %08x\n", "register index :", i); - pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); - pr_debug("\t\t%20s %08x\n", "Address Low is :", addrLo.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Control Mask is :", cntl.bitfields.mask); - pr_debug("\t\t%20s %08x\n", "Control Mode is :", cntl.bitfields.mode); - pr_debug("\t\t%20s %08x\n", "Control Vmid is :", cntl.bitfields.vmid); - pr_debug("\t\t%20s %08x\n", "Control atc is :", cntl.bitfields.atc); - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - - pdd->dev->kfd2kgd->address_watch_execute( - dbgdev->dev->kgd, - i, - cntl.u32All, - addrHi.u32All, - addrLo.u32All); - } + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0) || (adw_info->watch_mode == NULL)) + return -EINVAL; - } while (false); + for (i = 0; i < adw_info->num_watch_points; i++) { + + dbgdev_address_watch_set_registers( + adw_info, + &addrHi, + &addrLo, + &cntl, + i, + vmid, + dbgdev->dev->device_info->asic_family + ); + + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + pr_debug("\t\t%20s %08x\n", "register index :", i); + pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); + pr_debug("\t\t%20s %08x\n", "Address Low is :", + addrLo.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Control Mask is :", + cntl.bitfields.mask); + pr_debug("\t\t%20s %08x\n", "Control Mode is :", + cntl.bitfields.mode); + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", + cntl.bitfields.vmid); + pr_debug("\t\t%20s %08x\n", "Control atc is :", + cntl.bitfields.atc); + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + + pdd->dev->kfd2kgd->address_watch_execute( + dbgdev->dev->kgd, + i, + cntl.u32All, + addrHi.u32All, + addrLo.u32All); + } - return status; + return 0; } static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, @@ -386,131 +396,135 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, addrLo.u32All = 0; cntl.u32All = 0; - do { - - if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || (adw_info->num_watch_points == 0)) { - status = -EINVAL; - break; - } + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0) || + !adw_info->watch_mode || !adw_info->watch_address) + return -EINVAL; - if ((NULL == adw_info->watch_mode) || (NULL == adw_info->watch_address)) { - status = -EINVAL; - break; - } + status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, + ib_size/sizeof(uint32_t), + &packet_buff_uint, &packet_buff_gpu_addr); + if (status != 0) + return status; + memset(packet_buff_uint, 0, ib_size); + + packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); + + packets_vec[0].header.count = 1; + packets_vec[0].header.opcode = IT_SET_CONFIG_REG; + packets_vec[0].header.type = PM4_TYPE_3; + packets_vec[0].bitfields2.vmid_shift = + ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[0].bitfields2.insert_vmid = 1; + packets_vec[1].ordinal1 = packets_vec[0].ordinal1; + packets_vec[1].bitfields2.insert_vmid = 0; + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; + packets_vec[2].bitfields2.insert_vmid = 0; + packets_vec[3].ordinal1 = packets_vec[0].ordinal1; + packets_vec[3].bitfields2.vmid_shift = + ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[3].bitfields2.insert_vmid = 1; + + for (i = 0; i < adw_info->num_watch_points; i++) { + + dbgdev_address_watch_set_registers( + adw_info, + &addrHi, + &addrLo, + &cntl, + i, + vmid, + dbgdev->dev->device_info->asic_family + ); - status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, - ib_size/sizeof(uint32_t), - &packet_buff_uint, &packet_buff_gpu_addr); - - if (status != 0) - break; - - memset(packet_buff_uint, 0, ib_size); - - packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); - - packets_vec[0].header.count = 1; - packets_vec[0].header.opcode = IT_SET_CONFIG_REG; - packets_vec[0].header.type = PM4_TYPE_3; - packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; - packets_vec[0].bitfields2.insert_vmid = 1; - packets_vec[1].ordinal1 = packets_vec[0].ordinal1; - packets_vec[1].bitfields2.insert_vmid = 0; - packets_vec[2].ordinal1 = packets_vec[0].ordinal1; - packets_vec[2].bitfields2.insert_vmid = 0; - packets_vec[3].ordinal1 = packets_vec[0].ordinal1; - packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; - packets_vec[3].bitfields2.insert_vmid = 1; - - for (i = 0; i < adw_info->num_watch_points; i++) { - - dbgdev_address_watch_set_registers( - adw_info, - &addrHi, - &addrLo, - &cntl, + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + pr_debug("\t\t%20s %08x\n", "register index :", i); + pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); + pr_debug("\t\t%20s %p\n", "Add ptr is :", + adw_info->watch_address); + pr_debug("\t\t%20s %08llx\n", "Add is :", + adw_info->watch_address[i]); + pr_debug("\t\t%20s %08x\n", "Address Low is :", + addrLo.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Control Mask is :", + cntl.bitfields.mask); + pr_debug("\t\t%20s %08x\n", "Control Mode is :", + cntl.bitfields.mode); + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", + cntl.bitfields.vmid); + pr_debug("\t\t%20s %08x\n", "Control atc is :", + cntl.bitfields.atc); + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd + ->address_watch_get_offset( + dbgdev->dev->kgd, i, - vmid, - dbgdev->dev->device_info->asic_family - ); - - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - pr_debug("\t\t%20s %08x\n", "register index :", i); - pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); - pr_debug("\t\t%20s %p\n", "Add ptr is :", adw_info->watch_address); - pr_debug("\t\t%20s %08llx\n", "Add is :", adw_info->watch_address[i]); - pr_debug("\t\t%20s %08x\n", "Address Low is :", addrLo.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Control Mask is :", cntl.bitfields.mask); - pr_debug("\t\t%20s %08x\n", "Control Mode is :", cntl.bitfields.mode); - pr_debug("\t\t%20s %08x\n", "Control Vmid is :", cntl.bitfields.vmid); - pr_debug("\t\t%20s %08x\n", "Control atc is :", cntl.bitfields.atc); - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( - dbgdev->dev->kgd, - i, - ADDRESS_WATCH_REG_CNTL); + ADDRESS_WATCH_REG_CNTL); - packets_vec[0].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; - packets_vec[0].reg_data[0] = cntl.u32All; - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( - dbgdev->dev->kgd, - i, - ADDRESS_WATCH_REG_ADDR_HI); + packets_vec[0].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[0].reg_data[0] = cntl.u32All; + aw_reg_add_dword = + dbgdev->dev->kfd2kgd + ->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_ADDR_HI); - packets_vec[1].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; - packets_vec[1].reg_data[0] = addrHi.u32All; - aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( - dbgdev->dev->kgd, - i, - ADDRESS_WATCH_REG_ADDR_LO); + packets_vec[1].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[1].reg_data[0] = addrHi.u32All; + aw_reg_add_dword = + dbgdev->dev->kfd2kgd + ->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_ADDR_LO); - packets_vec[2].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; - packets_vec[2].reg_data[0] = addrLo.u32All; - /* enable watch flag if address is not zero*/ - if (adw_info->watch_address[i] > 0) - cntl.bitfields.valid = 1; - else - cntl.bitfields.valid = 0; + packets_vec[2].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[2].reg_data[0] = addrLo.u32All; - aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( - dbgdev->dev->kgd, - i, - ADDRESS_WATCH_REG_CNTL); + /* enable watch flag if address is not zero*/ + if (adw_info->watch_address[i] > 0) + cntl.bitfields.valid = 1; + else + cntl.bitfields.valid = 0; + aw_reg_add_dword = + dbgdev->dev->kfd2kgd + ->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_CNTL); - packets_vec[3].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE; - packets_vec[3].reg_data[0] = cntl.u32All; - status = dbgdev_diq_submit_ib( - dbgdev, - adw_info->process->pasid, - packet_buff_gpu_addr, - packet_buff_uint, - ib_size, true); + packets_vec[3].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[3].reg_data[0] = cntl.u32All; - if (status != 0) { - pr_debug("Error! kfd: In func %s >> failed to submit DIQ packet\n", __func__); - break; - } + status = dbgdev_diq_submit_ib( + dbgdev, + adw_info->process->pasid, + packet_buff_gpu_addr, + packet_buff_uint, + ib_size, true); + if (status != 0) { + pr_debug("Error! kfd: In func %s >> failed to submit DIQ packet\n", + __func__); + return status; } - } while (false); + } return status; @@ -525,26 +539,30 @@ static int dbgdev_wave_control_set_registers( int status = 0; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct HsaDbgWaveMsgAMDGen2 *pMsg; reg_sq_cmd.u32All = 0; - reg_gfx_index.u32All = 0; + pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; switch (wac_info->mode) { - case HSA_DBG_WAVEMODE_SINGLE: /* Send command to single wave */ - /*limit access to the process waves only,by setting vmid check */ + /* Send command to single wave */ + case HSA_DBG_WAVEMODE_SINGLE: + /* limit access to the process waves only,by setting vmid check + */ reg_sq_cmd.bits.check_vmid = 1; - reg_sq_cmd.bits.simd_id = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.SIMD; - reg_sq_cmd.bits.wave_id = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.WaveId; + reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; + reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; - reg_gfx_index.bits.sh_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderArray; - reg_gfx_index.bits.se_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderEngine; - reg_gfx_index.bits.instance_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.HSACU; + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; break; - case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: /* Send command to all waves with matching VMID */ + /* Send command to all waves with matching VMID */ + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: reg_gfx_index.bits.sh_broadcast_writes = 1; @@ -554,14 +572,15 @@ static int dbgdev_wave_control_set_registers( reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; break; - case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: /* Send command to all CU waves with matching VMID */ + /* Send command to all CU waves with matching VMID */ + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: reg_sq_cmd.bits.check_vmid = 1; reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; - reg_gfx_index.bits.sh_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderArray; - reg_gfx_index.bits.se_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderEngine; - reg_gfx_index.bits.instance_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.HSACU; + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; break; @@ -636,91 +655,98 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; reg_sq_cmd.u32All = 0; - do { - status = dbgdev_wave_control_set_registers(wac_info, - ®_sq_cmd, - ®_gfx_index, - dbgdev->dev->device_info->asic_family); + status = dbgdev_wave_control_set_registers(wac_info, + ®_sq_cmd, + ®_gfx_index, + dbgdev->dev->device_info->asic_family); - /* we do not control the VMID in DIQ,so reset it to a known value */ - reg_sq_cmd.bits.vm_id = 0; - if (status != 0) - break; - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - pr_debug("\t\t mode is: %u\n", wac_info->mode); - pr_debug("\t\t operand is: %u\n", wac_info->operand); - pr_debug("\t\t trap id is: %u\n", wac_info->trapId); - pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - pr_debug("\t\t vmid is: N/A\n"); - - pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); - pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); - pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); - pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); - pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); - pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); - pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); - - pr_debug("\t\t ibw is : %u\n", reg_gfx_index.bitfields.instance_broadcast_writes); - pr_debug("\t\t ii is : %u\n", reg_gfx_index.bitfields.instance_index); - pr_debug("\t\t sebw is : %u\n", reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); - pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); - pr_debug("\t\t sbw is : %u\n", reg_gfx_index.bitfields.sh_broadcast_writes); - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, - ib_size / sizeof(uint32_t), - &packet_buff_uint, &packet_buff_gpu_addr); - - if (status != 0) - break; - - memset(packet_buff_uint, 0, ib_size); - - packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; - packets_vec[0].header.count = 1; - packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; - packets_vec[0].header.type = PM4_TYPE_3; - packets_vec[0].bitfields2.reg_offset = GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; - packets_vec[0].bitfields2.insert_vmid = 0; - packets_vec[0].reg_data[0] = reg_gfx_index.u32All; - - packets_vec[1].header.count = 1; - packets_vec[1].header.opcode = IT_SET_CONFIG_REG; - packets_vec[1].header.type = PM4_TYPE_3; - packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - CONFIG_REG_BASE; - packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; - packets_vec[1].bitfields2.insert_vmid = 1; - packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; - - /* Restore the GRBM_GFX_INDEX register */ - - reg_gfx_index.u32All = 0; - reg_gfx_index.bits.sh_broadcast_writes = 1; - reg_gfx_index.bits.instance_broadcast_writes = 1; - reg_gfx_index.bits.se_broadcast_writes = 1; + /* we do not control the VMID in DIQ, so reset it to a + * known value + */ + reg_sq_cmd.bits.vm_id = 0; + if (status != 0) + return status; + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + pr_debug("\t\t mode is: %u\n", wac_info->mode); + pr_debug("\t\t operand is: %u\n", wac_info->operand); + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); + pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t vmid is: N/A\n"); + + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); + + pr_debug("\t\t ibw is : %u\n", + reg_gfx_index.bitfields.instance_broadcast_writes); + pr_debug("\t\t ii is : %u\n", + reg_gfx_index.bitfields.instance_index); + pr_debug("\t\t sebw is : %u\n", + reg_gfx_index.bitfields.se_broadcast_writes); + pr_debug("\t\t se_ind is : %u\n", + reg_gfx_index.bitfields.se_index); + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); + pr_debug("\t\t sbw is : %u\n", + reg_gfx_index.bitfields.sh_broadcast_writes); + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, + ib_size / sizeof(uint32_t), + &packet_buff_uint, &packet_buff_gpu_addr); + + if (status != 0) + return status; + + memset(packet_buff_uint, 0, ib_size); + + packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; + packets_vec[0].header.count = 1; + packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; + packets_vec[0].header.type = PM4_TYPE_3; + packets_vec[0].bitfields2.reg_offset = + GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; + packets_vec[0].bitfields2.insert_vmid = 0; + packets_vec[0].reg_data[0] = reg_gfx_index.u32All; + + packets_vec[1].header.count = 1; + packets_vec[1].header.opcode = IT_SET_CONFIG_REG; + packets_vec[1].header.type = PM4_TYPE_3; + packets_vec[1].bitfields2.reg_offset = + SQ_CMD / (sizeof(uint32_t)) - CONFIG_REG_BASE; + packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; + packets_vec[1].bitfields2.insert_vmid = 1; + packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; + + /* Restore the GRBM_GFX_INDEX register */ + reg_gfx_index.u32All = 0; + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; - packets_vec[2].ordinal1 = packets_vec[0].ordinal1; - packets_vec[2].bitfields2.reg_offset = GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; - packets_vec[2].bitfields2.insert_vmid = 0; - packets_vec[2].reg_data[0] = reg_gfx_index.u32All; - status = dbgdev_diq_submit_ib( - dbgdev, - wac_info->process->pasid, - packet_buff_gpu_addr, - packet_buff_uint, - ib_size, false); + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; + packets_vec[2].bitfields2.reg_offset = + GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; + packets_vec[2].bitfields2.insert_vmid = 0; + packets_vec[2].reg_data[0] = reg_gfx_index.u32All; - if (status != 0) - pr_debug("%s\n", " Critical Error ! Submit diq packet failed "); + status = dbgdev_diq_submit_ib( + dbgdev, + wac_info->process->pasid, + packet_buff_gpu_addr, + packet_buff_uint, + ib_size, false); - } while (false); + if (status != 0) + pr_debug("%s\n", " Critical Error ! Submit diq packet failed "); return status; } @@ -758,23 +784,37 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, pr_debug("\t\t mode is: %u\n", wac_info->mode); pr_debug("\t\t operand is: %u\n", wac_info->operand); pr_debug("\t\t trap id is: %u\n", wac_info->trapId); - pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t msg value is: %u\n", + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); pr_debug("\t\t vmid is: %u\n", vmid); - pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); - pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); - pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); - pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); - pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); - pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); - pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); - - pr_debug("\t\t ibw is : %u\n", reg_gfx_index.bitfields.instance_broadcast_writes); - pr_debug("\t\t ii is : %u\n", reg_gfx_index.bitfields.instance_index); - pr_debug("\t\t sebw is : %u\n", reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); - pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); - pr_debug("\t\t sbw is : %u\n", reg_gfx_index.bitfields.sh_broadcast_writes); + pr_debug("\t\t chk_vmid is : %u\n", + reg_sq_cmd.bitfields.check_vmid); + pr_debug("\t\t command is : %u\n", + reg_sq_cmd.bitfields.cmd); + pr_debug("\t\t queue id is : %u\n", + reg_sq_cmd.bitfields.queue_id); + pr_debug("\t\t simd id is : %u\n", + reg_sq_cmd.bitfields.simd_id); + pr_debug("\t\t mode is : %u\n", + reg_sq_cmd.bitfields.mode); + pr_debug("\t\t vm_id is : %u\n", + reg_sq_cmd.bitfields.vm_id); + pr_debug("\t\t wave_id is : %u\n", + reg_sq_cmd.bitfields.wave_id); + + pr_debug("\t\t ibw is : %u\n", + reg_gfx_index.bitfields.instance_broadcast_writes); + pr_debug("\t\t ii is : %u\n", + reg_gfx_index.bitfields.instance_index); + pr_debug("\t\t sebw is : %u\n", + reg_gfx_index.bitfields.se_broadcast_writes); + pr_debug("\t\t se_ind is : %u\n", + reg_gfx_index.bitfields.se_index); + pr_debug("\t\t sh_ind is : %u\n", + reg_gfx_index.bitfields.sh_index); + pr_debug("\t\t sbw is : %u\n", + reg_gfx_index.bitfields.sh_broadcast_writes); pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); @@ -814,7 +854,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. * ATC_VMID15_PASID_MAPPING - * to check which VMID the current process is mapped to. */ + * to check which VMID the current process is mapped to. + */ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid @@ -854,7 +895,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) } void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, - DBGDEV_TYPE type) + enum DBGDEV_TYPE type) { pdbgdev->dev = pdev; pdbgdev->kq = NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h index 82f48ff..75883e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h @@ -90,17 +90,14 @@ enum SQ_IND_CMD_CMD { SQ_IND_CMD_CMD_DEBUG = 0x00000004, SQ_IND_CMD_CMD_TRAP = 0x00000005 }; -/* - * SQ_IND_CMD_MODE enum - */ -typedef enum SQ_IND_CMD_MODE { +enum SQ_IND_CMD_MODE { SQ_IND_CMD_MODE_SINGLE = 0x00000000, SQ_IND_CMD_MODE_BROADCAST = 0x00000001, SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, -} SQ_IND_CMD_MODE; +}; union SQ_IND_INDEX_BITS { struct { @@ -208,7 +205,7 @@ union TCP_WATCH_ADDR_L_BITS { }; enum { - QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ + QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ QUEUESTATE__ACTIVE_COMPLETION_PENDING, QUEUESTATE__ACTIVE }; @@ -226,6 +223,7 @@ union ULARGE_INTEGER { #define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8)) -void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, DBGDEV_TYPE type); +void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, + enum DBGDEV_TYPE type); #endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 426f776..603cdc3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -37,16 +37,12 @@ static DEFINE_MUTEX(kfd_dbgmgr_mutex); -struct mutex * -get_dbgmgr_mutex(void) +struct mutex *get_dbgmgr_mutex(void) { return &kfd_dbgmgr_mutex; } -/*===========================================================================*/ - -static void -kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) +static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) { kfree(pmgr->dbgdev); pmgr->dbgdev = NULL; @@ -54,10 +50,7 @@ kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) pmgr->dev = NULL; } -/*===========================================================================*/ - -void -kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) +void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) { if (pmgr != NULL) { kfd_dbgmgr_uninitialize(pmgr); @@ -66,21 +59,18 @@ kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) } } -/*===========================================================================*/ - -bool -kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) +bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) { - DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; + enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; struct kfd_dbgmgr *new_buff; BUG_ON(pdev == NULL); BUG_ON(!pdev->init_complete); new_buff = kfd_alloc_struct(new_buff); - if (!new_buff) - { - dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgmgr instance\n", __func__); + if (!new_buff) { + dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgmgr instance\n", + __func__); return false; } @@ -88,7 +78,8 @@ kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) new_buff->dev = pdev; new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); if (!new_buff->dbgdev) { - dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgdev\n", __func__); + dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgdev\n", + __func__); kfree(new_buff); return false; } @@ -108,195 +99,176 @@ kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { - long status = 0; - - do { - - if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL)) { - dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); - /* Invalid Pointer. */ - status = -EINVAL; - break; - } - if (pmgr->pasid != 0) { - /* HW debugger is already active. */ - status = -EBUSY; - break; - } - - /* remember pasid */ + if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev)) { + dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", + __func__); + /* Invalid Pointer. */ + return -EINVAL; + } + if (pmgr->pasid != 0) { + /* HW debugger is already active. */ + return -EBUSY; + } - pmgr->pasid = p->pasid; + /* remember pasid */ - /* provide the pqm for diq generation */ + pmgr->pasid = p->pasid; - pmgr->dbgdev->pqm = &p->pqm; + /* provide the pqm for diq generation */ - /* activate the actual registering */ - /* todo: you should lock with the process mutex here */ - pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); - /* todo: you should unlock with the process mutex here */ + pmgr->dbgdev->pqm = &p->pqm; - } while (false); + /* activate the actual registering */ + /* todo: you should lock with the process mutex here */ + pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); + /* todo: you should unlock with the process mutex here */ - return status; + return 0; } -/* ========================================================================== */ +/* ========================================================================= */ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { - long status = 0; - - do { - - if ((pmgr == NULL) || (pmgr->dev == NULL) - || (pmgr->dbgdev == NULL) || (p == NULL)) { - dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); - /* Invalid Pointer */ - status = -EINVAL; - break; - } - if (pmgr->pasid != p->pasid) { - /* Is the requests coming from the already registered process? */ - status = -EINVAL; - break; - } - - /* todo: you should lock with the process mutex here */ + if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || + (p == NULL)) { + dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", + __func__); + /* Invalid Pointer */ + return -EINVAL; + } + if (pmgr->pasid != p->pasid) { + /* Is the requests coming from the already registered + * process? + */ + return -EINVAL; + } - pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); + /* todo: you should lock with the process mutex here */ - /* todo: you should unlock with the process mutex here */ + pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); - pmgr->pasid = 0; + /* todo: you should unlock with the process mutex here */ - } while (false); + pmgr->pasid = 0; - return status; + return 0; } -/* =========================================================================== */ +/* ========================================================================= */ long -kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info) +kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + struct dbg_wave_control_info *wac_info) { - long status = 0; - dev_info(NULL, "kfd: In func %s\n", __func__); - do { - - if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || (wac_info == NULL) - || (wac_info->process == NULL)) { - /* Invalid Pointer */ - dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); - status = -EINVAL; - break; - } - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != wac_info->process->pasid) { - /* HW debugger support was not registered for requester process */ - status = -EINVAL; - break; - } - - status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); - - } while (false); - - return status; + if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev) || (!wac_info) + || (wac_info->process == NULL)) { + /* Invalid Pointer */ + dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", + __func__); + return -EINVAL; + } + /* Is the requests coming from the already registered + * process? + */ + if (pmgr->pasid != wac_info->process->pasid) { + /* HW debugger support was not registered for + * requester process + */ + return -EINVAL; + } + return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, + wac_info); } -/* =========================================================================== */ +/* ========================================================================= */ long -kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info) +kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, + struct dbg_address_watch_info *adw_info) { - long status = 0; - dev_info(NULL, "kfd: In func %s\n", __func__); - do { - - if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || (adw_info == NULL) - || (adw_info->process == NULL)) { - /* Invalid Pointer */ - dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); - status = -EINVAL; - break; - } - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != adw_info->process->pasid) { - /* HW debugger support was not registered for requester process */ - status = -EINVAL; - break; - } - - status = (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, adw_info); - - } while (false); - - return status; + if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev) || (!adw_info) + || (adw_info->process == NULL)) { + /* Invalid Pointer */ + dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", + __func__); + return -EINVAL; + } + /* Is the requests coming from the already registered + * process? + */ + if (pmgr->pasid != adw_info->process->pasid) { + /* HW debugger support was not registered for + * requester process + */ + return -EINVAL; + } + return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, + adw_info); } -/* =========================================================================== */ +/* ========================================================================= */ /* * Handle abnormal process termination * if we are in the midst of a debug session, we should kill all pending waves * of the debugged process and unregister the process from the Debugger. */ long -kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, struct kfd_process *process) +kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, + struct kfd_process *process) { long status = 0; struct dbg_wave_control_info wac_info; dev_info(NULL, "kfd: In func %s\n", __func__); - do { - - if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL)) { - /* Invalid Pointer */ - dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__); - status = -EINVAL; - break; - } - /* first, we kill all the wavefronts of this process */ - - wac_info.process = process; - wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; - wac_info.operand = HSA_DBG_WAVEOP_KILL; - wac_info.trapId = 0x0; /* not used for the KILL */ - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 0; /* not used for kill */ - wac_info.dbgWave_msg.MemoryVA = NULL; /* not used for kill */ - - status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, &wac_info); - - if (status != 0) { - dev_info(NULL, "Error! kfd: In func %s: wave control failed, status is: %ld\n", __func__, status); - break; - } - if (pmgr->pasid == wac_info.process->pasid) { - /* if terminated process was registered for debug, then unregister it */ - status = kfd_dbgmgr_unregister(pmgr, process); - pmgr->pasid = 0; - } - if (status != 0) - dev_info(NULL, - "Error! kfd: In func %s: unregister failed, status is: %ld debugger can not be reused\n", - __func__, status); - - } while (false); + if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev)) { + /* Invalid Pointer */ + dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", + __func__); + return -EINVAL; + } + /* first, we kill all the wavefronts of this process */ + + wac_info.process = process; + wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; + wac_info.operand = HSA_DBG_WAVEOP_KILL; - return status; + /* not used for KILL */ + wac_info.trapId = 0x0; + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 0; + wac_info.dbgWave_msg.MemoryVA = NULL; -} + status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, + &wac_info); + if (status != 0) { + dev_info(NULL, "Error! kfd: In func %s: wave control failed, status is: %ld\n", + __func__, status); + return status; + } + if (pmgr->pasid == wac_info.process->pasid) { + /* if terminated process was registered for debug, + * then unregister it + */ + status = kfd_dbgmgr_unregister(pmgr, process); + pmgr->pasid = 0; + } + if (status != 0) + dev_info(NULL, + "Error! kfd: In func %s: unregister failed, status is: %ld debugger can not be reused\n", + __func__, status); + + return status; +} -/*///////////////////////////////////////////////////////////////////////////////////////// */ +/* ///////////////////////////////////////////////////////////////////////// */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h index 2b6484e..b9a769a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h @@ -26,242 +26,251 @@ #include "kfd_priv.h" -/* - * SQ_IND_CMD_CMD enum - */ - - /* must align with hsakmttypes definition. */ #pragma pack(push, 4) -typedef enum _HSA_DBG_WAVEOP { - HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ - HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ - HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ - HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter debug mode */ - HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ +enum HSA_DBG_WAVEOP { + HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ + HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ + HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ + HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */ + HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ HSA_DBG_NUM_WAVEOP = 5, HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF -} HSA_DBG_WAVEOP; +}; -typedef enum _HSA_DBG_WAVEMODE { - HSA_DBG_WAVEMODE_SINGLE = 0, /* send command to a single wave */ - /* Broadcast to all wavefronts of all processes is not supported for HSA user mode */ - HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, /* send to waves within current process */ - HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, /* send to waves within current process on CU */ +enum HSA_DBG_WAVEMODE { + /* send command to a single wave */ + HSA_DBG_WAVEMODE_SINGLE = 0, + /* Broadcast to all wavefronts of all processes is not supported for + * HSA user mode + */ + + /* send to waves within current process */ + HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, + /* send to waves within current process on CU */ + HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, HSA_DBG_NUM_WAVEMODE = 3, HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF -} HSA_DBG_WAVEMODE; +}; -typedef enum _HSA_DBG_WAVEMSG_TYPE { +enum HSA_DBG_WAVEMSG_TYPE { HSA_DBG_WAVEMSG_AUTO = 0, HSA_DBG_WAVEMSG_USER = 1, HSA_DBG_WAVEMSG_ERROR = 2, HSA_DBG_NUM_WAVEMSG, HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF -} HSA_DBG_WAVEMSG_TYPE; +}; -typedef enum _HSA_DBG_WATCH_MODE { - HSA_DBG_WATCH_READ = 0, /* Read operations only */ - HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ - HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ - HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ +enum HSA_DBG_WATCH_MODE { + HSA_DBG_WATCH_READ = 0, /* Read operations only */ + HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ + HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ + HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ HSA_DBG_WATCH_NUM, HSA_DBG_WATCH_SIZE = 0xFFFFFFFF -} HSA_DBG_WATCH_MODE; +}; /* This structure is hardware specific and may change in the future */ -typedef struct _HsaDbgWaveMsgAMDGen2 { +struct HsaDbgWaveMsgAMDGen2 { union { struct { - uint32_t UserData:8; /* user data */ - uint32_t ShaderArray:1; /* Shader array */ - uint32_t Priv:1; /* Privileged */ - uint32_t Reserved0:4; /* This field is reserved, should be 0 */ - uint32_t WaveId:4; /* wave id */ - uint32_t SIMD:2; /* SIMD id */ - uint32_t HSACU:4; /* Compute unit */ - uint32_t ShaderEngine:2; /* Shader engine */ - uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ - uint32_t Reserved1:4; /* This field is reserved, should be 0 */ + uint32_t UserData:8; /* user data */ + uint32_t ShaderArray:1; /* Shader array */ + uint32_t Priv:1; /* Privileged */ + uint32_t Reserved0:4; /* Reserved, should be 0 */ + uint32_t WaveId:4; /* wave id */ + uint32_t SIMD:2; /* SIMD id */ + uint32_t HSACU:4; /* Compute unit */ + uint32_t ShaderEngine:2;/* Shader engine */ + uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ + uint32_t Reserved1:4; /* Reserved, should be 0 */ } ui32; uint32_t Value; }; uint32_t Reserved2; -} HsaDbgWaveMsgAMDGen2; +}; -typedef union _HsaDbgWaveMessageAMD { - HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; - /* for future HsaDbgWaveMsgAMDGen3; */ -} HsaDbgWaveMessageAMD; +union HsaDbgWaveMessageAMD { + struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; + /* for future HsaDbgWaveMsgAMDGen3; */ +}; -typedef struct _HsaDbgWaveMessage { - void *MemoryVA; /* ptr to associated host-accessible data */ - HsaDbgWaveMessageAMD DbgWaveMsg; -} HsaDbgWaveMessage; +struct HsaDbgWaveMessage { + void *MemoryVA; /* ptr to associated host-accessible data */ + union HsaDbgWaveMessageAMD DbgWaveMsg; +}; /* TODO: This definitions to be MOVED to kfd_event, once it is implemented. + * + * HSA sync primitive, Event and HW Exception notification API definitions. + * The API functions allow the runtime to define a so-called sync-primitive, + * a SW object combining a user-mode provided "syncvar" and a scheduler event + * that can be signaled through a defined GPU interrupt. A syncvar is + * a process virtual memory location of a certain size that can be accessed + * by CPU and GPU shader code within the process to set and query the content + * within that memory. The definition of the content is determined by the HSA + * runtime and potentially GPU shader code interfacing with the HSA runtime. + * The syncvar values may be commonly written through an PM4 WRITE_DATA packet + * in the user mode instruction stream. The OS scheduler event is typically + * associated and signaled by an interrupt issued by the GPU, but other HSA + * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced + * by the KFD by this mechanism, too. + */ - HSA sync primitive, Event and HW Exception notification API definitions - The API functions allow the runtime to define a so-called sync-primitive, a SW object - combining a user-mode provided "syncvar" and a scheduler event that can be signaled - through a defined GPU interrupt. A syncvar is a process virtual memory location of - a certain size that can be accessed by CPU and GPU shader code within the process to set - and query the content within that memory. The definition of the content is determined by - the HSA runtime and potentially GPU shader code interfacing with the HSA runtime. - The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the - user mode instruction stream. The OS scheduler event is typically associated and - signaled by an interrupt issued by the GPU, but other HSA system interrupt conditions - from other HW (e.g. IOMMUv2) may besurfaced by the KFD by this mechanism, too. */ - -/* these are the new definitions for events */ - -typedef enum _HSA_EVENTTYPE { - HSA_EVENTTYPE_SIGNAL = 0, /* /user-mode generated GPU signal */ - HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ - HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change( start/stop ) */ - HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ - HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ - HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ - HSA_EVENTTYPE_PROFILE_EVENT = 6, /* GPU signal for profiling */ - HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state (EOP pm4) */ - /* ... */ +/* these are the new definitions for events */ +enum HSA_EVENTTYPE { + HSA_EVENTTYPE_SIGNAL = 0, /* User-mode generated GPU signal */ + HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ + HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change + * ( start/stop ) + */ + HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ + HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ + HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ + HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ + HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state + *(EOP pm4) + */ + /* ... */ HSA_EVENTTYPE_MAXID, HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF -} HSA_EVENTTYPE; - -typedef uint32_t HSA_EVENTID; - -/* Subdefinitions for various event types: Syncvar */ +}; -typedef struct _HsaSyncVar { +/* Subdefinitions for various event types: Syncvar */ +struct HsaSyncVar { union { - void *UserData; /* pointer to user mode data */ - uint64_t UserDataPtrValue; /* 64bit compatibility of value */ + void *UserData; /* pointer to user mode data */ + uint64_t UserDataPtrValue; /* 64bit compatibility of value */ } SyncVar; uint64_t SyncVarSize; -} HsaSyncVar; - -/* - Subdefinitions for various event types: NodeChange -*/ +}; -typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS { +/* Subdefinitions for various event types: NodeChange */ +enum HSA_EVENTTYPE_NODECHANGE_FLAGS { HSA_EVENTTYPE_NODECHANGE_ADD = 0, HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF -} HSA_EVENTTYPE_NODECHANGE_FLAGS; +}; -typedef struct _HsaNodeChange { - HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; /* HSA node added/removed on the platform */ -} HsaNodeChange; +struct HsaNodeChange { + /* HSA node added/removed on the platform */ + enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; +}; -/* - Sub-definitions for various event types: DeviceStateChange -*/ +/* Sub-definitions for various event types: DeviceStateChange */ -typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { - HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, /* device started (and available) */ - HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, /* device stopped (i.e. unavailable) */ +enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { + /* device started (and available) */ + HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, + /* device stopped (i.e. unavailable) */ + HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF -} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS; +}; -typedef enum _HSA_DEVICE { +enum HSA_DEVICE { HSA_DEVICE_CPU = 0, HSA_DEVICE_GPU = 1, MAX_HSA_DEVICE = 2 -} HSA_DEVICE; +}; -typedef struct _HsaDeviceStateChange { +struct HsaDeviceStateChange { uint32_t NodeId; /* F-NUMA node that contains the device */ - HSA_DEVICE Device; /* device type: GPU or CPU */ - HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ -} HsaDeviceStateChange; + enum HSA_DEVICE Device; /* device type: GPU or CPU */ + enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ +}; -typedef struct _HsaEventData { - HSA_EVENTTYPE EventType; /* event type */ +struct HsaEventData { + enum HSA_EVENTTYPE EventType; /* event type */ union { - /* return data associated with HSA_EVENTTYPE_SIGNAL and other events */ - HsaSyncVar SyncVar; + /* return data associated with HSA_EVENTTYPE_SIGNAL and other + * events + */ + struct HsaSyncVar SyncVar; /* data associated with HSA_EVENTTYPE_NODE_CHANGE */ - HsaNodeChange NodeChangeState; + struct HsaNodeChange NodeChangeState; /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */ - HsaDeviceStateChange DeviceState; + struct HsaDeviceStateChange DeviceState; } EventData; - /* the following data entries are internal to the KFD & thunk itself. */ + /* the following data entries are internal to the KFD & thunk itself */ - uint64_t HWData1; /* internal thunk store for Event data (OsEventHandle) */ - uint64_t HWData2; /* internal thunk store for Event data (HWAddress) */ - uint32_t HWData3; /* internal thunk store for Event data (HWData) */ -} HsaEventData; + /* internal thunk store for Event data (OsEventHandle) */ + uint64_t HWData1; + /* internal thunk store for Event data (HWAddress) */ + uint64_t HWData2; + /* internal thunk store for Event data (HWData) */ + uint32_t HWData3; +}; -typedef struct _HsaEventDescriptor { - HSA_EVENTTYPE EventType; /* event type to allocate */ - uint32_t NodeId; /* H-NUMA node containing GPU device that is event source */ - HsaSyncVar SyncVar; /* pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL */ -} HsaEventDescriptor; +struct HsaEventDescriptor { + /* event type to allocate */ + enum HSA_EVENTTYPE EventType; + /* H-NUMA node containing GPU device that is event source */ + uint32_t NodeId; + /* pointer to user mode syncvar data, syncvar->UserDataPtrValue + * may be NULL + */ + struct HsaSyncVar SyncVar; +}; -typedef struct _HsaEvent { - HSA_EVENTID EventId; - HsaEventData EventData; -} HsaEvent; +struct HsaEvent { + uint32_t EventId; + struct HsaEventData EventData; +}; #pragma pack(pop) -typedef enum _DBGDEV_TYPE { +enum DBGDEV_TYPE { DBGDEV_TYPE_ILLEGAL = 0, DBGDEV_TYPE_NODIQ = 1, DBGDEV_TYPE_DIQ = 2, DBGDEV_TYPE_TEST = 3 -} DBGDEV_TYPE; +}; struct dbg_address_watch_info { struct kfd_process *process; - HSA_DBG_WATCH_MODE *watch_mode; + enum HSA_DBG_WATCH_MODE *watch_mode; uint64_t *watch_address; uint64_t *watch_mask; - HsaEvent *watch_event; + struct HsaEvent *watch_event; uint32_t num_watch_points; }; struct dbg_wave_control_info { struct kfd_process *process; uint32_t trapId; - HSA_DBG_WAVEOP operand; - HSA_DBG_WAVEMODE mode; - HsaDbgWaveMessage dbgWave_msg; + enum HSA_DBG_WAVEOP operand; + enum HSA_DBG_WAVEMODE mode; + struct HsaDbgWaveMessage dbgWave_msg; }; struct kfd_dbgdev { /* The device that owns this data. */ - struct kfd_dev *dev; /* kernel queue for DIQ */ - struct kernel_queue *kq; /* a pointer to the pqm of the calling process */ - struct process_queue_manager *pqm; /* type of debug device ( DIQ, non DIQ, etc. ) */ - - DBGDEV_TYPE type; + enum DBGDEV_TYPE type; /* virtualized function pointers to device dbg */ - int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); - int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, struct dbg_address_watch_info *adw_info); - int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, struct dbg_wave_control_info *wac_info); + int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info); + int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, + struct dbg_wave_control_info *wac_info); }; @@ -277,7 +286,10 @@ void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); -long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info); -long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info); -long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, struct kfd_process *process); -#endif /* KFD_DBGMGR_H_ */ +long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + struct dbg_wave_control_info *wac_info); +long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, + struct dbg_address_watch_info *adw_info); +long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, + struct kfd_process *process); +#endif /* KFD_DBGMGR_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index ccf982d..24952c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -51,19 +51,19 @@ void kfd_debugfs_init(void) return; } - ent = debugfs_create_file("mqds", S_IFREG | S_IRUGO, debugfs_root, + ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root, kfd_debugfs_mqds_by_process, &kfd_debugfs_fops); if (ent == NULL) pr_warn("Failed to create mqds in kfd debugfs\n"); - ent = debugfs_create_file("hqds", S_IFREG | S_IRUGO, debugfs_root, + ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root, kfd_debugfs_hqds_by_device, &kfd_debugfs_fops); if (ent == NULL) pr_warn("Failed to create hqds in kfd debugfs\n"); - ent = debugfs_create_file("rls", S_IFREG | S_IRUGO, debugfs_root, + ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, kfd_debugfs_rls_by_device, &kfd_debugfs_fops); if (ent == NULL) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5d657a9..0abccc4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -343,12 +343,13 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n", (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0); + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) + != 0); return false; } pasid_limit = min_t(unsigned int, - (unsigned int)1 << kfd->device_info->max_pasid_bits, + (unsigned int)(1 << kfd->device_info->max_pasid_bits), iommu_info.max_pasids); /* * last pasid is used for kernel queues doorbells @@ -718,9 +719,10 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_lock(&kfd->interrupt_lock); - if (kfd->interrupts_active - && interrupt_is_wanted(kfd, ih_ring_entry, patched_ihre, &is_patched) - && enqueue_ih_ring_entry(kfd, is_patched ? patched_ihre : ih_ring_entry)) + if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry, + patched_ihre, &is_patched) + && enqueue_ih_ring_entry(kfd, + is_patched ? patched_ihre : ih_ring_entry)) queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock(&kfd->interrupt_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2a4a556..99844c5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1182,7 +1182,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; + dqm->sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 341adfa..bf24368 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -171,7 +171,8 @@ static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, } /* On dGPU we're always in GPUVM64 addressing mode with 64-bit - * aperture addresses. */ + * aperture addresses. + */ temp = get_sh_mem_bases_nybble_64(pdd); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); @@ -202,7 +203,8 @@ static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { /* On dGPU we're always in GPUVM64 addressing mode with 64-bit - * aperture addresses. */ + * aperture addresses. + */ q->properties.sdma_vm_addr = ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 2629143..2f37b04 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -225,7 +225,8 @@ static int update_qpd_vi_tonga(struct device_queue_manager *dqm, } /* On dGPU we're always in GPUVM64 addressing mode with 64-bit - * aperture addresses. */ + * aperture addresses. + */ temp = get_sh_mem_bases_nybble_64(pdd); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); @@ -256,7 +257,8 @@ static void init_sdma_vm_tonga(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { /* On dGPU we're always in GPUVM64 addressing mode with 64-bit - * aperture addresses. */ + * aperture addresses. + */ q->properties.sdma_vm_addr = ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index be5abd5..18198d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -34,8 +34,8 @@ /* A task can only be on a single wait_queue at a time, but we need to support * waiting on multiple events (any/all). - * Instead of each event simply having a wait_queue with sleeping tasks, it has a - * singly-linked list of tasks. + * Instead of each event simply having a wait_queue with sleeping tasks, it has + * a singly-linked list of tasks. * A thread that wants to sleep creates an array of these, one for each event * and adds one to each event's waiter chain. */ @@ -56,9 +56,9 @@ struct kfd_event_waiter { /* Over-complicated pooled allocator for event notification slots. * - * Each signal event needs a 64-bit signal slot where the signaler will write a 1 - * before sending an interrupt.l (This is needed because some interrupts do not - * contain enough spare data bits to identify an event.) + * Each signal event needs a 64-bit signal slot where the signaler will write a + * 1 before sending an interrupt.l (This is needed because some interrupts do + * not contain enough spare data bits to identify an event.) * We get whole pages from vmalloc and map them to the process VA. * Individual signal events are then allocated a slot in a page. */ @@ -101,7 +101,10 @@ allocate_free_slot(struct kfd_process *process, list_for_each_entry(page, &process->signal_event_pages, event_pages) { if (page->free_slots > 0) { - unsigned int slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); + unsigned int slot = + find_first_zero_bit(page->used_slot_bitmap, + SLOTS_PER_PAGE); + __set_bit(slot, page->used_slot_bitmap); page->free_slots--; @@ -139,13 +142,14 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) page->free_slots = SLOTS_PER_PAGE; - backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \ + backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); if (!backing_store) goto fail_alloc_signal_store; /* prevent user-mode info leaks */ - memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, KFD_SIGNAL_EVENT_LIMIT * 8); + memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, + KFD_SIGNAL_EVENT_LIMIT * 8); page->kernel_address = backing_store; /* Set bits of debug events to prevent allocation */ @@ -213,7 +217,7 @@ allocate_signal_page_dgpu(struct kfd_process *p, my_page->user_address = NULL; my_page->free_slots = SLOTS_PER_PAGE; if (list_empty(&p->signal_event_pages)) - my_page->page_index = 0; + my_page->page_index = 0; else my_page->page_index = list_tail_entry(&p->signal_event_pages, struct signal_page, @@ -284,7 +288,8 @@ static void release_event_notification_slot(struct signal_page *page, page->free_slots++; /* We don't free signal pages, they are retained by the process - * and reused until it exits. */ + * and reused until it exits. + */ } static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, @@ -292,7 +297,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, { struct signal_page *page; - /* This is safe because we don't delete signal pages until the process exits. */ + /* This is safe because we don't delete signal pages until the process + * exits. + */ list_for_each_entry(page, &p->signal_event_pages, event_pages) if (page->page_index == page_index) return page; @@ -300,7 +307,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, return NULL; } -/* Assumes that p->event_mutex is held and of course that p is not going away (current or locked). */ +/* Assumes that p->event_mutex is held and of course that p is not going away + * (current or locked). + */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { struct kfd_event *ev; @@ -321,27 +330,30 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) static u32 make_signal_event_id(struct signal_page *page, unsigned int signal_slot_index) { - return page->page_index | (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); + return page->page_index | + (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); } /* Produce a kfd event id for a nonsignal event. - * These are arbitrary numbers, so we do a sequential search through the hash table - * for an unused number. + * These are arbitrary numbers, so we do a sequential search through the hash + * table for an unused number. */ static u32 make_nonsignal_event_id(struct kfd_process *p) { u32 id; for (id = p->next_nonsignal_event_id; - id < KFD_LAST_NONSIGNAL_EVENT_ID && lookup_event_by_id(p, id) != NULL; + id < KFD_LAST_NONSIGNAL_EVENT_ID && + lookup_event_by_id(p, id) != NULL; id++) ; if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { /* What if id == LAST_NONSIGNAL_EVENT_ID - 1? - * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so the first loop - * fails immediately and we proceed with the wraparound loop below. + * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so + * the first loop fails immediately and we proceed with the + * wraparound loop below. */ p->next_nonsignal_event_id = id + 1; @@ -349,7 +361,8 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) } for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; - id < KFD_LAST_NONSIGNAL_EVENT_ID && lookup_event_by_id(p, id) != NULL; + id < KFD_LAST_NONSIGNAL_EVENT_ID && + lookup_event_by_id(p, id) != NULL; id++) ; @@ -357,10 +370,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { p->next_nonsignal_event_id = id + 1; return id; - } else { - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; - return 0; } + p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; + return 0; } static struct kfd_event * @@ -371,7 +383,8 @@ lookup_event_by_page_slot(struct kfd_process *p, } static int -create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev) +create_signal_event(struct file *devkfd, struct kfd_process *p, + struct kfd_event *ev) { if ((ev->type == KFD_EVENT_TYPE_SIGNAL) && (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT)) { @@ -408,9 +421,11 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event p->debug_event_count++; } - ev->user_signal_address = &ev->signal_page->user_address[ev->signal_slot_index]; + ev->user_signal_address = + &ev->signal_page->user_address[ev->signal_slot_index]; - ev->event_id = make_signal_event_id(ev->signal_page, ev->signal_slot_index); + ev->event_id = + make_signal_event_id(ev->signal_page, ev->signal_slot_index); pr_debug("signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, @@ -420,7 +435,9 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event } /* No non-signal events are supported yet. - * We create them as events that never signal. Set event calls from user-mode are failed. */ + * We create them as events that never signal. Set event calls from user-mode + * are failed. + */ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) { @@ -456,7 +473,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) } } - /* Abandon the list of waiters. Individual waiting threads will clean up their own data.*/ + /* Abandon the list of waiters. Individual waiting threads will clean + * up their own data. + */ list_del(&ev->waiters); hash_del(&ev->events); @@ -479,13 +498,15 @@ static void destroy_events(struct kfd_process *p) destroy_event(p, ev); } -/* We assume that the process is being destroyed and there is no need to unmap the pages - * or keep bookkeeping data in order. */ +/* We assume that the process is being destroyed and there is no need to unmap + * the pages or keep bookkeeping data in order. + */ static void shutdown_signal_pages(struct kfd_process *p) { struct signal_page *page, *tmp; - list_for_each_entry_safe(page, tmp, &p->signal_event_pages, event_pages) { + list_for_each_entry_safe(page, tmp, &p->signal_event_pages, + event_pages) { if (page->user_address) { free_pages((unsigned long)page->kernel_address, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); @@ -502,7 +523,8 @@ void kfd_event_free_process(struct kfd_process *p) static bool event_can_be_gpu_signaled(const struct kfd_event *ev) { - return ev->type == KFD_EVENT_TYPE_SIGNAL || ev->type == KFD_EVENT_TYPE_DEBUG; + return ev->type == KFD_EVENT_TYPE_SIGNAL || + ev->type == KFD_EVENT_TYPE_DEBUG; } static bool event_can_be_cpu_signaled(const struct kfd_event *ev) @@ -519,6 +541,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, int ret = 0; struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL); + if (!ev) return -ENOMEM; @@ -648,7 +671,8 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) { - page_slots(ev->signal_page)[ev->signal_slot_index] = UNSIGNALED_EVENT_SLOT; + page_slots(ev->signal_page)[ev->signal_slot_index] = + UNSIGNALED_EVENT_SLOT; } static bool is_slot_signaled(struct signal_page *page, unsigned int index) @@ -656,7 +680,8 @@ static bool is_slot_signaled(struct signal_page *page, unsigned int index) return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; } -static void set_event_from_interrupt(struct kfd_process *p, struct kfd_event *ev) +static void set_event_from_interrupt(struct kfd_process *p, + struct kfd_event *ev) { if (ev && event_can_be_gpu_signaled(ev)) { acknowledge_signal(p, ev); @@ -674,6 +699,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, * running so the lookup function increments the process ref count. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + if (!p) return; /* Presumably process exited. */ @@ -686,19 +712,20 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, ev = lookup_event_by_id(p, partial_id); set_event_from_interrupt(p, ev); } else { - /* Partial ID is in fact partial. For now we completely ignore it, - * but we could use any bits we did receive to search faster. */ + /* Partial ID is in fact partial. For now we completely ignore + * it, but we could use any bits we did receive to search + * faster. + */ struct signal_page *page; - unsigned i; + unsigned int i; - list_for_each_entry(page, &p->signal_event_pages, event_pages) { - for (i = 0; i < SLOTS_PER_PAGE; i++) { + list_for_each_entry(page, &p->signal_event_pages, event_pages) + for (i = 0; i < SLOTS_PER_PAGE; i++) if (is_slot_signaled(page, i)) { - ev = lookup_event_by_page_slot(p, page, i); + ev = lookup_event_by_page_slot(p, + page, i); set_event_from_interrupt(p, ev); } - } - } } mutex_unlock(&p->event_mutex); @@ -710,7 +737,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) struct kfd_event_waiter *event_waiters; uint32_t i; - event_waiters = kmalloc(num_events * sizeof(struct kfd_event_waiter), GFP_KERNEL); + event_waiters = kmalloc_array(num_events, + sizeof(struct kfd_event_waiter), GFP_KERNEL); if (event_waiters) { for (i = 0; i < num_events; i++) { @@ -746,7 +774,8 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) struct kfd_event *ev = waiter->event; /* Only add to the wait list if we actually need to - * wait on this event. */ + * wait on this event. + */ if (!waiter->activated) list_add(&waiter->waiters, &ev->waiters); } @@ -783,8 +812,8 @@ static bool copy_signaled_event_data(uint32_t num_events, if (event_waiters[i].activated && event_waiters[i].event->type == KFD_EVENT_TYPE_MEMORY) if (copy_to_user(&data[event_waiters[i].input_index].memory_exception_data, - &event_waiters[i].event->memory_exception_data, - sizeof(struct kfd_hsa_memory_exception_data))) + &event_waiters[i].event->memory_exception_data, + sizeof(struct kfd_hsa_memory_exception_data))) return false; return true; @@ -803,7 +832,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms) /* msecs_to_jiffies interprets all values above 2^31-1 as infinite, * but we consider them finite. - * This hack is wrong, but nobody is likely to notice. */ + * This hack is wrong, but nobody is likely to notice. + */ user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF); return msecs_to_jiffies(user_timeout_ms) + 1; @@ -835,7 +865,8 @@ int kfd_wait_on_events(struct kfd_process *p, mutex_lock(&p->event_mutex); /* Set to something unreasonable - this is really - * just a bool for now. */ + * just a bool for now. + */ *wait_result = KFD_WAIT_TIMEOUT; event_waiters = alloc_event_waiters(num_events); @@ -889,10 +920,11 @@ int kfd_wait_on_events(struct kfd_process *p, if (signal_pending(current)) { /* - * This is wrong when a nonzero, non-infinite timeout is specified. - * We need to use ERESTARTSYS_RESTARTBLOCK, but struct restart_block - * contains a union with data for each user and it's in generic - * kernel code that I don't want to touch yet. + * This is wrong when a nonzero, non-infinite timeout + * is specified. We need to use + * ERESTARTSYS_RESTARTBLOCK, but struct restart_block + * contains a union with data for each user and it's in + * generic kernel code that I don't want to touch yet. */ ret = -ERESTARTSYS; break; @@ -954,7 +986,8 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) page = lookup_signal_page_by_index(p, page_index); if (!page) { /* Probably KFD bug, but mmap is user-accessible. */ - pr_debug("signal page could not be found for page_index %u\n", page_index); + pr_debug("signal page could not be found for page_index %u\n", + page_index); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 2fa5d32..a164fd5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -426,5 +426,6 @@ int kfd_init_apertures(struct kfd_process *process) void kfd_flush_tlb(struct kfd_dev *dev, uint32_t pasid) { const struct kfd2kgd_calls *f2g = dev->kfd2kgd; + f2g->invalidate_tlbs(dev->kgd, pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index c48fab5..258fdda 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -58,7 +58,8 @@ int kfd_interrupt_init(struct kfd_dev *kfd) int r; r = kfifo_alloc(&kfd->ih_fifo, - KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, + KFD_IH_NUM_ENTRIES * + kfd->device_info->ih_ring_entry_size, GFP_KERNEL); if (r) { dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); @@ -159,7 +160,8 @@ static void interrupt_wq(struct work_struct *work) sizeof(uint32_t))]; while (dequeue_ih_ring_entry(dev, ih_ring_entry)) - dev->device_info->event_interrupt_class->interrupt_wq(dev, ih_ring_entry); + dev->device_info->event_interrupt_class->interrupt_wq(dev, + ih_ring_entry); } bool interrupt_is_wanted(struct kfd_dev *dev, @@ -167,7 +169,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev, uint32_t *patched_ihre, bool *flag) { /* integer and bitwise OR so there is no boolean short-circuiting */ - unsigned wanted = 0; + unsigned int wanted = 0; wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, ih_ring_entry, patched_ihre, flag); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index b826689..4a67e76 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -244,7 +244,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, if (wptr + packet_size_in_dwords >= queue_size_dwords) { /* make sure after rolling back to position 0, there is - * still enough space. */ + * still enough space. + */ if (packet_size_in_dwords >= rptr) { *buffer_ptr = NULL; return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index 6f12fe0..007a3ea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -165,7 +165,7 @@ int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_mes_map_queues)); + sizeof(struct pm4_mes_map_queues)); packet->bitfields2.alloc_format = alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index e6876f6..2126ec5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -75,7 +75,7 @@ MODULE_PARM_DESC(send_sigterm, static int amdkfd_init_completed; -int debug_largebar = 0; +int debug_largebar; module_param(debug_largebar, int, 0444); MODULE_PARM_DESC(debug_largebar, "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)"); @@ -90,7 +90,8 @@ module_param_named(noretry, vega10_noretry, int, 0644); MODULE_PARM_DESC(noretry, "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); -int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f) +int kgd2kfd_init(unsigned int interface_version, + const struct kgd2kfd_calls **g2f) { if (!amdkfd_init_completed) return -EPROBE_DEFER; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 41d28b3..4dff1ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -249,7 +249,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE; m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; - if (atc_bit) { + if (atc_bit) { m->cp_hqd_pq_control |= PQ_ATC_EN; m->cp_hqd_ib_control |= IB_ATC_EN; } @@ -268,9 +268,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_vmid = q->vmid; - if (q->format == KFD_QUEUE_FORMAT_AQL) { + if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_pq_control |= NO_UPDATE_RPTR; - } update_cu_mask(mm, mqd, q); set_priority(m, q); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 0aeebc1..ddca15f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -396,6 +396,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { struct vi_sdma_mqd *m; + BUG_ON(!mm || !mqd || !q); m = get_sdma_mqd(mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 6cfe7f1..b3f7d43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -32,7 +32,8 @@ int kfd_pasid_init(void) { pasid_limit = KFD_MAX_NUM_OF_PROCESSES; - pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); + pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), + GFP_KERNEL); if (!pasid_bitmap) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c index 3fb8896..937c0ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c @@ -485,7 +485,6 @@ void kfd_init_peer_direct(void) } pr_info("amdkfd: PeerDirect support was initialized successfully\n"); - return; } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h index 05e692b..31cef21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h @@ -28,14 +28,14 @@ #define PM4_MES_HEADER_DEFINED union PM4_MES_TYPE_3_HEADER { struct { - uint32_t reserved1:8; /* < reserved */ - uint32_t opcode:8; /* < IT opcode */ - uint32_t count:14; /* < number of DWORDs - 1 - * in the information body. - */ - uint32_t type:2; /* < packet identifier. - * It should be 3 for type 3 packets - */ + /* reserved */ + uint32_t reserved1:8; + /* IT opcode */ + uint32_t opcode:8; + /* number of DWORDs - 1 in the information body */ + uint32_t count:14; + /* packet identifier. It should be 3 for type 3 packets */ + uint32_t type:2; }; uint32_t u32all; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h index 8cb3094..7c8d9b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -30,10 +30,12 @@ union PM4_MES_TYPE_3_HEADER { struct { uint32_t reserved1 : 8; /* < reserved */ uint32_t opcode : 8; /* < IT opcode */ - uint32_t count : 14;/* < number of DWORDs - 1 in the - information body. */ - uint32_t type : 2; /* < packet identifier. - It should be 3 for type 3 packets */ + uint32_t count : 14;/* < Number of DWORDS - 1 in the + * information body + */ + uint32_t type : 2; /* < packet identifier + * It should be 3 for type 3 packets + */ }; uint32_t u32All; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 501eea4..a774152 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -188,9 +188,11 @@ enum asic_family_type { #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) struct kfd_event_interrupt_class { - bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry, - uint32_t *patched_ihre, bool *patched_flag); - void (*interrupt_wq)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); + bool (*interrupt_isr)(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, uint32_t *patched_ihre, + bool *patched_flag); + void (*interrupt_wq)(struct kfd_dev *dev, + const uint32_t *ih_ring_entry); }; struct kfd_device_info { @@ -401,13 +403,13 @@ enum KFD_QUEUE_PRIORITY { * @write_ptr: Defines the number of dwords written to the ring buffer. * * @doorbell_ptr: This field aim is to notify the H/W of new packet written to - * the queue ring buffer. This field should be similar to write_ptr and the user - * should update this field after he updated the write_ptr. + * the queue ring buffer. This field should be similar to write_ptr and the + * user should update this field after he updated the write_ptr. * * @doorbell_off: The doorbell offset in the doorbell pci-bar. * - * @is_interop: Defines if this is a interop queue. Interop queue means that the - * queue can access both graphics and compute resources. + * @is_interop: Defines if this is a interop queue. Interop queue means that + * the queue can access both graphics and compute resources. * * @is_active: Defines if the queue is active or not. * @@ -466,9 +468,10 @@ struct queue_properties { * @properties: The queue properties. * * @mec: Used only in no cp scheduling mode and identifies to micro engine id - * that the queue should be execute on. + * that the queue should be execute on. * - * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. + * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe + * id. * * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. * @@ -552,7 +555,7 @@ struct qcm_process_device { unsigned int queue_count; unsigned int vmid; bool is_debug; - unsigned evicted; /* eviction counter, 0=active */ + unsigned int evicted; /* eviction counter, 0=active */ /* * All the memory management data should be here too */ @@ -601,9 +604,11 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct fence *fence); -/*8 byte handle containing GPU ID in the most significant 4 bytes and - * idr_handle in the least significant 4 bytes*/ -#define MAKE_HANDLE(gpu_id, idr_handle) (((uint64_t)(gpu_id) << 32) + idr_handle) +/* 8 byte handle containing GPU ID in the most significant 4 bytes and + * idr_handle in the least significant 4 bytes + */ +#define MAKE_HANDLE(gpu_id, idr_handle) \ + (((uint64_t)(gpu_id) << 32) + idr_handle) #define GET_GPU_ID(handle) (handle >> 32) #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) @@ -642,7 +647,8 @@ struct kfd_process_device { uint64_t sh_hidden_private_base_vmid; - /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ + /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) + */ enum kfd_pdd_bound bound; /* VM context for GPUVM allocations */ @@ -711,7 +717,8 @@ struct kfd_process { struct process_queue_manager pqm; - unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; + unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + BITS_PER_LONG)]; /*Is the user space process 32 bit?*/ bool is_32bit_user_mode; @@ -720,7 +727,8 @@ struct kfd_process { struct mutex event_mutex; /* All events in process hashed by ID, linked on kfd_event.events. */ DECLARE_HASHTABLE(events, 4); - struct list_head signal_event_pages; /* struct slot_page_header.event_pages */ + /* struct slot_page_header.event_pages */ + struct list_head signal_event_pages; u32 next_nonsignal_event_id; size_t signal_event_count; size_t debug_event_count; @@ -760,7 +768,7 @@ struct amdkfd_ioctl_desc { void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(struct file *filep); -struct kfd_process *kfd_get_process(const struct task_struct *); +struct kfd_process *kfd_get_process(const struct task_struct *task); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); @@ -777,7 +785,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); -int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma); +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma); /* KFD process API for creating and translating handles */ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, @@ -802,9 +811,11 @@ int kfd_map_memory_to_gpu(void *mem, struct kfd_process_device *pdd); int kfd_unmap_memory_from_gpu(void *mem, struct kfd_process_device *pdd); /* Process device data iterator */ -struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); -struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, - struct kfd_process_device *pdd); +struct kfd_process_device *kfd_get_first_process_device_data( + struct kfd_process *p); +struct kfd_process_device *kfd_get_next_process_device_data( + struct kfd_process *p, + struct kfd_process_device *pdd); bool kfd_has_process_device_data(struct kfd_process *p); /* PASIDs */ @@ -938,7 +949,7 @@ struct packet_manager { struct mutex lock; bool allocated; struct kfd_mem_obj *ib_buffer_obj; - unsigned ib_size_bytes; + unsigned int ib_size_bytes; struct packet_manager_funcs *pmf; }; @@ -1046,7 +1057,8 @@ int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, enum kfd_event_wait_result *wait_result); -void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits); +void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + uint32_t valid_id_bits); #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, unsigned long address, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 601e551..025ee5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -366,7 +366,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) /* No process locking is needed in this function, because the process * is not findable any more. We must assume that no other thread is * using it any more, otherwise we couldn't safely free the process - * stucture in the end. */ + * structure in the end. + */ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, @@ -448,7 +449,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, /* Iterate over all process device data structures and if the pdd is in * debug mode,we should first force unregistration, then we will be - * able to destroy the queues */ + * able to destroy the queues + */ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { dev = pdd->dev; mutex_lock(get_dbgmgr_mutex()); @@ -611,8 +613,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, process->last_restore_timestamp = get_jiffies_64(); /* If PeerDirect interface was not detected try to detect it again - * in case if network driver was loaded later. - */ + * in case if network driver was loaded later. + */ kfd_init_peer_direct(); return process; @@ -859,14 +861,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) } #endif /* CONFIG_AMD_IOMMU_V2 */ -struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) +struct kfd_process_device *kfd_get_first_process_device_data( + struct kfd_process *p) { return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list); } -struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, +struct kfd_process_device *kfd_get_next_process_device_data( + struct kfd_process *p, struct kfd_process_device *pdd) { if (list_is_last(&pdd->per_device_list, &p->per_device_data)) @@ -880,7 +884,8 @@ bool kfd_has_process_device_data(struct kfd_process *p) } /* Create specific handle mapped to mem from process local memory idr - * Assumes that the process lock is held. */ + * Assumes that the process lock is held. + */ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, void *mem, uint64_t start, uint64_t length, @@ -935,7 +940,8 @@ struct kfd_bo *kfd_process_device_find_bo(struct kfd_process_device *pdd, } /* Translate specific handle from process local memory idr - * Assumes that the process lock is held. */ + * Assumes that the process lock is held. + */ void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, int handle) { @@ -973,7 +979,8 @@ void *kfd_process_find_bo_from_interval(struct kfd_process *p, } /* Remove specific handle from process local memory idr - * Assumes that the process lock is held. */ + * Assumes that the process lock is held. + */ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, int handle) { @@ -1042,7 +1049,8 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) return p; } -int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma) +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma) { unsigned long pfn, i; int ret = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 94e07ee..e4384ce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -240,7 +240,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ - if ((dev->dqm->sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + if ((dev->dqm->sched_policy == + KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c index 56bf9a2..2b3c300 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c @@ -179,7 +179,7 @@ void run_rdma_free_callback(struct kfd_bo *buf_obj) * This function release resources previously allocated by get_pages() call. * * \param p_p2p_data - A pointer to pointer to amd_p2p_info entries - * allocated by get_pages() call. + * allocated by get_pages() call. * * \return 0 if operation was successful */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 1d1992f..3e21aef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -517,10 +517,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_64bit_prop(buffer, "local_mem_size", local_mem_info.local_mem_size_private + local_mem_info.local_mem_size_public); - } - else - sysfs_show_64bit_prop(buffer, "local_mem_size", - (unsigned long long int) 0); + } else + sysfs_show_64bit_prop(buffer, "local_mem_size", 0ULL); sysfs_show_32bit_prop(buffer, "fw_version", dev->gpu->mec_fw_version); @@ -881,17 +879,20 @@ static void kfd_debug_print_topology(void) down_read(&topology_lock); - dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); + dev = list_last_entry(&topology_device_list, + struct kfd_topology_device, list); if (dev) { - if (dev->node_props.cpu_cores_count && dev->node_props.simd_count) { + if (dev->node_props.cpu_cores_count && + dev->node_props.simd_count) { pr_info("Topology: Add APU node [0x%0x:0x%0x]\n", - dev->node_props.device_id, dev->node_props.vendor_id); - } - else if (dev->node_props.cpu_cores_count) + dev->node_props.device_id, + dev->node_props.vendor_id); + } else if (dev->node_props.cpu_cores_count) pr_info("Topology: Add CPU node\n"); else if (dev->node_props.simd_count) pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n", - dev->node_props.device_id, dev->node_props.vendor_id); + dev->node_props.device_id, + dev->node_props.vendor_id); } up_read(&topology_lock); } @@ -903,7 +904,8 @@ static void kfd_update_system_properties(void) struct kfd_topology_device *dev; down_read(&topology_lock); - dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); + dev = list_last_entry(&topology_device_list, + struct kfd_topology_device, list); if (dev) { sys_props.platform_id = (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; @@ -1020,8 +1022,7 @@ int kfd_topology_init(void) * topology_device_list */ - /* Initialize the head for the both the lists - */ + /* Initialize the head for the both the lists */ INIT_LIST_HEAD(&topology_device_list); INIT_LIST_HEAD(&temp_topology_device_list); init_rwsem(&topology_lock); @@ -1031,7 +1032,8 @@ int kfd_topology_init(void) /* Proximity domains in ACPI CRAT tables start counting at * 0. The same should be true for virtual CRAT tables created * at this stage. GPUs added later in kfd_topology_add_device - * use a counter. */ + * use a counter. + */ proximity_domain = 0; /* @@ -1091,12 +1093,12 @@ int kfd_topology_init(void) kfd_update_system_properties(); kfd_debug_print_topology(); pr_info("Finished initializing topology\n"); - } - else + } else pr_err("Failed to update topology in sysfs ret=%d\n", ret); /* For nodes with GPU, this information gets added - * when GPU is detected (kfd_topology_add_device). */ + * when GPU is detected (kfd_topology_add_device). + */ if (cpu_only_node) { /* Add additional information to CPU only node created above */ down_write(&topology_lock); @@ -1149,9 +1151,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) return hashout; } /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If - * the GPU device is not already present in the topology device list - * then return NULL. This means a new topology device has to be - * created for this GPU. + * the GPU device is not already present in the topology device + * list then return NULL. This means a new topology device has to + * be created for this GPU. * TODO: Rather than assiging @gpu to first topology device withtout * gpu attached, it will better to have more stringent check. */ @@ -1244,9 +1246,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, - * else create a Virtual CRAT for this gpu device and then parse that CRAT - * to create a new topology device. Once created assign the gpu to that - * topology device + * else create a Virtual CRAT for this gpu device and then parse that + * CRAT to create a new topology device. Once created assign the gpu to + * that topology device */ dev = kfd_assign_gpu(gpu); if (!dev) { @@ -1265,8 +1267,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) kfd_topology_update_device_list(&temp_topology_device_list, &topology_device_list); - /* - * Update the SYSFS tree, since we added another topology device + /* Update the SYSFS tree, since we added another topology + * device */ res = kfd_topology_update_sysfs(); up_write(&topology_lock); @@ -1284,13 +1286,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) gpu->id = gpu_id; /* TODO: Move the following lines to function - * kfd_add_non_crat_information */ + * kfd_add_non_crat_information + */ /* Fill-in additional information that is not available in CRAT but - * needed for the topology */ + * needed for the topology + */ dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info); - dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; + dev->node_props.simd_arrays_per_engine = + cu_info.num_shader_arrays_per_engine; dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; @@ -1329,8 +1334,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) } /* Fix errors in CZ CRAT. - * simd_count: Carrizo CRAT reports wrong simd_count, probably because it - * doesn't consider masked out CUs + * simd_count: Carrizo CRAT reports wrong simd_count, probably because + * it doesn't consider masked out CUs * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd. * capability flag: Carrizo CRAT doesn't report IOMMU flags. */ @@ -1339,7 +1344,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) cu_info.simd_per_cu * cu_info.cu_active_number; dev->node_props.max_waves_per_simd = 10; dev->node_props.capability |= HSA_CAP_ATS_PRESENT; - } + } kfd_debug_print_topology(); err: @@ -1444,7 +1449,7 @@ int kfd_numa_node_to_apic_id(int numa_node_id) int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) { struct kfd_topology_device *dev; - unsigned i = 0; + unsigned int i = 0; int r = 0; down_read(&topology_lock); @@ -1469,7 +1474,7 @@ int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) { struct kfd_topology_device *dev; - unsigned i = 0; + unsigned int i = 0; int r = 0; down_read(&topology_lock); -- 2.7.4