From e00ab6ceb6cc2d43225b57f76f63994bd4d49054 Mon Sep 17 00:00:00 2001 From: Chaudhary Amit Kumar Date: Wed, 17 Oct 2018 19:04:35 +0530 Subject: [PATCH 1121/4131] drm/amdkfd: revert kfd part to a previous state Revert following files to "2ba6b00 drm/amd/powerplay: add profile mode for vega10.": - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd* - drivers/gpu/drm/amd/amdkfd/* - drivers/gpu/drm/amd/include/kgd_kfd_interface.h - include/uapi/linux/kfd_ioctl.h - drivers/gpu/drm/radeon/radeon_kfd* Due to upstream, porting kfd patches to 4.13 all-open has many conflicts. It's hard to elegantly fix these conflicts. So we revert the kfd part to a previous commit, where we began to first port dkms patches in 4.12 hybrid. Then sequentially port all kfd patches. Signed-off-by: Le.Ma Acked-by: Junwei Zhang Signed-off-by: Sanjay R Mehta Signed-off-by: Chaudhary Amit Kumar --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 44 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 18 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 189 ++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 163 ++-------- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 183 ++++-------- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 102 ++++--- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 21 +- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 27 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 123 ++++---- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 316 ++++++++++++-------- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 8 +- .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 8 +- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 40 ++- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 33 ++- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 63 ++-- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 10 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 62 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 46 ++- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 289 ++++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 7 +- drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h | 330 ++++++++++++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h | 140 +-------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 32 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 25 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 71 +++-- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 12 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 46 ++- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 30 +- drivers/gpu/drm/radeon/radeon_kfd.c | 15 +- include/uapi/linux/kfd_ioctl.h | 36 +-- 33 files changed, 1261 insertions(+), 1235 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f7fa767..7ec1915 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -27,15 +27,16 @@ #include "amdgpu_gfx.h" #include +const struct kfd2kgd_calls *kfd2kgd; const struct kgd2kfd_calls *kgd2kfd; -bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); +bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); int amdgpu_amdkfd_init(void) { int ret; #if defined(CONFIG_HSA_AMD_MODULE) - int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); + int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); kgd2kfd_init_p = symbol_request(kgd2kfd_init); @@ -60,21 +61,8 @@ int amdgpu_amdkfd_init(void) return ret; } -void amdgpu_amdkfd_fini(void) -{ - if (kgd2kfd) { - kgd2kfd->exit(); - symbol_put(kgd2kfd_init); - } -} - -void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) { - const struct kfd2kgd_calls *kfd2kgd; - - if (!kgd2kfd) - return; - switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: @@ -85,12 +73,25 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: - dev_info(adev->dev, "kfd not supported on this ASIC\n"); - return; + return false; } - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + return true; +} + +void amdgpu_amdkfd_fini(void) +{ + if (kgd2kfd) { + kgd2kfd->exit(); + symbol_put(kgd2kfd_init); + } +} + +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) +{ + if (kgd2kfd) + adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) @@ -183,8 +184,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, - &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 1ef486b..6d3a10b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,7 +27,6 @@ #include #include -#include #include struct amdgpu_device; @@ -41,6 +40,8 @@ struct kgd_mem { int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev); + void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, @@ -62,19 +63,4 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); -#define read_user_wptr(mmptr, wptr, dst) \ - ({ \ - bool valid = false; \ - if ((mmptr) && (wptr)) { \ - if ((mmptr) == current->mm) { \ - valid = !get_user((dst), (wptr)); \ - } else if (current->mm == NULL) { \ - use_mm(mmptr); \ - valid = !get_user((dst), (wptr)); \ - unuse_mm(mmptr); \ - } \ - } \ - valid; \ - }) - #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index bdabaa3..5748504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -39,12 +39,6 @@ #include "gmc/gmc_7_1_sh_mask.h" #include "cik_structs.h" -enum hqd_dequeue_request_type { - NO_ACTION = 0, - DRAIN_PIPE, - RESET_WAVES -}; - enum { MAX_TRAPID = 8, /* 3 bits in the bitfield. */ MAX_WATCH_ADDRESSES = 4 @@ -102,15 +96,12 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); + uint32_t queue_id, uint32_t __user *wptr); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -135,33 +126,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); - -/* Because of REG_GET_FIELD() being used, we put this function in the - * asic specific file. - */ -static int get_tile_config(struct kgd_dev *kgd, - struct tile_config *config) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - config->gb_addr_config = adev->gfx.config.gb_addr_config; - config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, - MC_ARB_RAMCFG, NOOFBANK); - config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, - MC_ARB_RAMCFG, NOOFRANKS); - - config->tile_config_ptr = adev->gfx.config.tile_mode_array; - config->num_tile_configs = - ARRAY_SIZE(adev->gfx.config.tile_mode_array); - config->macro_tile_config_ptr = - adev->gfx.config.macrotile_mode_array; - config->num_macro_tile_configs = - ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); - - return 0; -} static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -186,9 +150,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, + .get_fw_version = get_fw_version }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -224,7 +186,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); @@ -328,38 +290,20 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) + uint32_t queue_id, uint32_t __user *wptr) { struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t wptr_shadow, is_wptr_shadow_valid; struct cik_mqd *m; - uint32_t *mqd_hqd; - uint32_t reg, wptr_val, data; m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); - - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ - mqd_hqd = &m->cp_mqd_base_addr_lo; - - for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) - WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); - - /* Copy userspace write pointer value to register. - * Activate doorbell logic to monitor subsequent changes. - */ - data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, - CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); - - if (read_user_wptr(mm, wptr, wptr_val)) - WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); - - data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32(mmCP_HQD_ACTIVE, data); + is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); + if (is_wptr_shadow_valid) + m->cp_hqd_pq_wptr = wptr_shadow; + acquire_queue(kgd, pipe_id, queue_id); + gfx_v7_0_mqd_commit(adev, m); release_queue(kgd); return 0; @@ -459,99 +403,30 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; - enum hqd_dequeue_request_type type; - unsigned long flags, end_jiffies; - int retry; + int timeout = utimeout; acquire_queue(kgd, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - switch (reset_type) { - case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: - type = DRAIN_PIPE; - break; - case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: - type = RESET_WAVES; - break; - default: - type = DRAIN_PIPE; - break; - } + WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); - /* Workaround: If IQ timer is active and the wait time is close to or - * equal to 0, dequeueing is not safe. Wait until either the wait time - * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is - * cleared before continuing. Also, ensure wait times are set to at - * least 0x3. - */ - local_irq_save(flags); - preempt_disable(); - retry = 5000; /* wait for 500 usecs at maximum */ - while (true) { - temp = RREG32(mmCP_HQD_IQ_TIMER); - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { - pr_debug("HW is processing IQ\n"); - goto loop; - } - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) - == 3) /* SEM-rearm is safe */ - break; - /* Wait time 3 is safe for CP, but our MMIO read/write - * time is close to 1 microsecond, so check for 10 to - * leave more buffer room - */ - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) - >= 10) - break; - pr_debug("IQ timer is active\n"); - } else - break; -loop: - if (!retry) { - pr_err("CP HQD IQ timer status time out\n"); - break; - } - ndelay(100); - --retry; - } - retry = 1000; - while (true) { - temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); - if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) - break; - pr_debug("Dequeue request is pending\n"); - - if (!retry) { - pr_err("CP HQD dequeue request time out\n"); - break; - } - ndelay(100); - --retry; - } - local_irq_restore(flags); - preempt_enable(); - - WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); - - end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); - if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("cp queue preemption time out\n"); + if (timeout <= 0) { + pr_err("kfd: cp queue preemption time out.\n"); release_queue(kgd); return -ETIME; } - usleep_range(500, 1000); + msleep(20); + timeout -= 20; } release_queue(kgd); @@ -702,16 +577,6 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); - unlock_srbm(kgd); -} - static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -722,42 +587,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index d1a32be..c5044d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -39,12 +39,6 @@ #include "vi_structs.h" #include "vid.h" -enum hqd_dequeue_request_type { - NO_ACTION = 0, - DRAIN_PIPE, - RESET_WAVES -}; - struct cik_sdma_rlc_registers; /* @@ -61,15 +55,12 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); + uint32_t queue_id, uint32_t __user *wptr); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, @@ -94,33 +85,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); - -/* Because of REG_GET_FIELD() being used, we put this function in the - * asic specific file. - */ -static int get_tile_config(struct kgd_dev *kgd, - struct tile_config *config) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - config->gb_addr_config = adev->gfx.config.gb_addr_config; - config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, - MC_ARB_RAMCFG, NOOFBANK); - config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, - MC_ARB_RAMCFG, NOOFRANKS); - - config->tile_config_ptr = adev->gfx.config.tile_mode_array; - config->num_tile_configs = - ARRAY_SIZE(adev->gfx.config.tile_mode_array); - config->macro_tile_config_ptr = - adev->gfx.config.macrotile_mode_array; - config->num_macro_tile_configs = - ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); - - return 0; -} static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -147,9 +111,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, + .get_fw_version = get_fw_version }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -185,7 +147,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); @@ -254,7 +216,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -398,102 +360,29 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; - enum hqd_dequeue_request_type type; - unsigned long flags, end_jiffies; - int retry; - struct vi_mqd *m = get_mqd(mqd); + int timeout = utimeout; acquire_queue(kgd, pipe_id, queue_id); - if (m->cp_hqd_vmid == 0) - WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); - switch (reset_type) { - case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: - type = DRAIN_PIPE; - break; - case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: - type = RESET_WAVES; - break; - default: - type = DRAIN_PIPE; - break; - } - - /* Workaround: If IQ timer is active and the wait time is close to or - * equal to 0, dequeueing is not safe. Wait until either the wait time - * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is - * cleared before continuing. Also, ensure wait times are set to at - * least 0x3. - */ - local_irq_save(flags); - preempt_disable(); - retry = 5000; /* wait for 500 usecs at maximum */ - while (true) { - temp = RREG32(mmCP_HQD_IQ_TIMER); - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { - pr_debug("HW is processing IQ\n"); - goto loop; - } - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) - == 3) /* SEM-rearm is safe */ - break; - /* Wait time 3 is safe for CP, but our MMIO read/write - * time is close to 1 microsecond, so check for 10 to - * leave more buffer room - */ - if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) - >= 10) - break; - pr_debug("IQ timer is active\n"); - } else - break; -loop: - if (!retry) { - pr_err("CP HQD IQ timer status time out\n"); - break; - } - ndelay(100); - --retry; - } - retry = 1000; - while (true) { - temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); - if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) - break; - pr_debug("Dequeue request is pending\n"); - - if (!retry) { - pr_err("CP HQD dequeue request time out\n"); - break; - } - ndelay(100); - --retry; - } - local_irq_restore(flags); - preempt_enable(); - - WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); - - end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); - if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("cp queue preemption time out.\n"); + if (timeout <= 0) { + pr_err("kfd: cp queue preemption time out.\n"); release_queue(kgd); return -ETIME; } - usleep_range(500, 1000); + msleep(20); + timeout -= 20; } release_queue(kgd); @@ -607,16 +496,6 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return 0; } -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); - unlock_srbm(kgd); -} - static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -627,42 +506,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index abdafb9..3016098 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -142,12 +142,12 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, struct kfd_ioctl_create_queue_args *args) { if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { - pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { - pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } @@ -155,26 +155,26 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, (!access_ok(VERIFY_WRITE, (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { - pr_err("Can't access ring base address\n"); + pr_err("kfd: can't access ring base address\n"); return -EFAULT; } if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { - pr_err("Ring size must be a power of 2 or 0\n"); + pr_err("kfd: ring size must be a power of 2 or 0\n"); return -EINVAL; } if (!access_ok(VERIFY_WRITE, (const void __user *) args->read_pointer_address, sizeof(uint32_t))) { - pr_err("Can't access read pointer\n"); + pr_err("kfd: can't access read pointer\n"); return -EFAULT; } if (!access_ok(VERIFY_WRITE, (const void __user *) args->write_pointer_address, sizeof(uint32_t))) { - pr_err("Can't access write pointer\n"); + pr_err("kfd: can't access write pointer\n"); return -EFAULT; } @@ -182,7 +182,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, !access_ok(VERIFY_WRITE, (const void __user *) args->eop_buffer_address, sizeof(uint32_t))) { - pr_debug("Can't access eop buffer"); + pr_debug("kfd: can't access eop buffer"); return -EFAULT; } @@ -190,7 +190,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, !access_ok(VERIFY_WRITE, (const void __user *) args->ctx_save_restore_address, sizeof(uint32_t))) { - pr_debug("Can't access ctx save restore buffer"); + pr_debug("kfd: can't access ctx save restore buffer"); return -EFAULT; } @@ -219,27 +219,27 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, else q_properties->format = KFD_QUEUE_FORMAT_PM4; - pr_debug("Queue Percentage: %d, %d\n", + pr_debug("Queue Percentage (%d, %d)\n", q_properties->queue_percent, args->queue_percentage); - pr_debug("Queue Priority: %d, %d\n", + pr_debug("Queue Priority (%d, %d)\n", q_properties->priority, args->queue_priority); - pr_debug("Queue Address: 0x%llX, 0x%llX\n", + pr_debug("Queue Address (0x%llX, 0x%llX)\n", q_properties->queue_address, args->ring_base_address); - pr_debug("Queue Size: 0x%llX, %u\n", + pr_debug("Queue Size (0x%llX, %u)\n", q_properties->queue_size, args->ring_size); - pr_debug("Queue r/w Pointers: %p, %p\n", - q_properties->read_ptr, - q_properties->write_ptr); + pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n", + (uint64_t) q_properties->read_ptr, + (uint64_t) q_properties->write_ptr); - pr_debug("Queue Format: %d\n", q_properties->format); + pr_debug("Queue Format (%d)\n", q_properties->format); - pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); + pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address); - pr_debug("Queue CTX save area: 0x%llX\n", + pr_debug("Queue CTX save arex (0x%llX)\n", q_properties->ctx_save_restore_area_address); return 0; @@ -257,16 +257,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, memset(&q_properties, 0, sizeof(struct queue_properties)); - pr_debug("Creating queue ioctl\n"); + pr_debug("kfd: creating queue ioctl\n"); err = set_queue_properties_from_user(&q_properties, args); if (err) return err; - pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); + pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id); dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); + if (dev == NULL) { + pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id); return -EINVAL; } @@ -278,7 +278,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } - pr_debug("Creating queue for PASID %d on gpu 0x%x\n", + pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n", p->pasid, dev->id); @@ -296,15 +296,15 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, mutex_unlock(&p->mutex); - pr_debug("Queue id %d was created successfully\n", args->queue_id); + pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); - pr_debug("Ring buffer address == 0x%016llX\n", + pr_debug("ring buffer address == 0x%016llX\n", args->ring_base_address); - pr_debug("Read ptr address == 0x%016llX\n", + pr_debug("read ptr address == 0x%016llX\n", args->read_pointer_address); - pr_debug("Write ptr address == 0x%016llX\n", + pr_debug("write ptr address == 0x%016llX\n", args->write_pointer_address); return 0; @@ -321,7 +321,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, int retval; struct kfd_ioctl_destroy_queue_args *args = data; - pr_debug("Destroying queue id %d for pasid %d\n", + pr_debug("kfd: destroying queue id %d for PASID %d\n", args->queue_id, p->pasid); @@ -341,12 +341,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, struct queue_properties properties; if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { - pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { - pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } @@ -354,12 +354,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, (!access_ok(VERIFY_WRITE, (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { - pr_err("Can't access ring base address\n"); + pr_err("kfd: can't access ring base address\n"); return -EFAULT; } if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { - pr_err("Ring size must be a power of 2 or 0\n"); + pr_err("kfd: ring size must be a power of 2 or 0\n"); return -EINVAL; } @@ -368,7 +368,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, properties.queue_percent = args->queue_percentage; properties.priority = args->queue_priority; - pr_debug("Updating queue id %d for pasid %d\n", + pr_debug("kfd: updating queue id %d for PASID %d\n", args->queue_id, p->pasid); mutex_lock(&p->mutex); @@ -400,7 +400,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, } dev = kfd_device_by_id(args->gpu_id); - if (!dev) + if (dev == NULL) return -EINVAL; mutex_lock(&p->mutex); @@ -443,7 +443,7 @@ static int kfd_ioctl_dbg_register(struct file *filep, long status = 0; dev = kfd_device_by_id(args->gpu_id); - if (!dev) + if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -460,11 +460,12 @@ static int kfd_ioctl_dbg_register(struct file *filep, */ pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - status = PTR_ERR(pdd); - goto out; + mutex_unlock(&p->mutex); + mutex_unlock(kfd_get_dbgmgr_mutex()); + return PTR_ERR(pdd); } - if (!dev->dbgmgr) { + if (dev->dbgmgr == NULL) { /* In case of a legal call, we have no dbgmgr yet */ create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); if (create_ok) { @@ -479,7 +480,6 @@ static int kfd_ioctl_dbg_register(struct file *filep, status = -EINVAL; } -out: mutex_unlock(&p->mutex); mutex_unlock(kfd_get_dbgmgr_mutex()); @@ -494,7 +494,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, long status; dev = kfd_device_by_id(args->gpu_id); - if (!dev) + if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -505,7 +505,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, mutex_lock(kfd_get_dbgmgr_mutex()); status = kfd_dbgmgr_unregister(dev->dbgmgr, p); - if (!status) { + if (status == 0) { kfd_dbgmgr_destroy(dev->dbgmgr); dev->dbgmgr = NULL; } @@ -539,7 +539,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); dev = kfd_device_by_id(args->gpu_id); - if (!dev) + if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -580,8 +580,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { - status = -EINVAL; - goto out; + kfree(args_buff); + return -EINVAL; } watch_mask_value = (uint64_t) args_buff[args_idx]; @@ -604,8 +604,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, } if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { - status = -EINVAL; - goto out; + kfree(args_buff); + return -EINVAL; } /* Currently HSA Event is not supported for DBG */ @@ -617,7 +617,6 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, mutex_unlock(kfd_get_dbgmgr_mutex()); -out: kfree(args_buff); return status; @@ -647,7 +646,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep, sizeof(wac_info.trapId); dev = kfd_device_by_id(args->gpu_id); - if (!dev) + if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -784,9 +783,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, "scratch_limit %llX\n", pdd->scratch_limit); args->num_of_nodes++; - - pdd = kfd_get_next_process_device_data(p, pdd); - } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && + (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } mutex_unlock(&p->mutex); @@ -849,84 +847,9 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, return err; } -static int kfd_ioctl_set_scratch_backing_va(struct file *filep, - struct kfd_process *p, void *data) -{ - struct kfd_ioctl_set_scratch_backing_va_args *args = data; - struct kfd_process_device *pdd; - struct kfd_dev *dev; - long err; - - dev = kfd_device_by_id(args->gpu_id); - if (!dev) - return -EINVAL; - - mutex_lock(&p->mutex); - - pdd = kfd_bind_process_to_device(dev, p); - if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); - goto bind_process_to_device_fail; - } - - pdd->qpd.sh_hidden_private_base = args->va_addr; - - mutex_unlock(&p->mutex); - - if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) - dev->kfd2kgd->set_scratch_backing_va( - dev->kgd, args->va_addr, pdd->qpd.vmid); - - return 0; - -bind_process_to_device_fail: - mutex_unlock(&p->mutex); - return err; -} - -static int kfd_ioctl_get_tile_config(struct file *filep, - struct kfd_process *p, void *data) -{ - struct kfd_ioctl_get_tile_config_args *args = data; - struct kfd_dev *dev; - struct tile_config config; - int err = 0; - - dev = kfd_device_by_id(args->gpu_id); - - dev->kfd2kgd->get_tile_config(dev->kgd, &config); - - args->gb_addr_config = config.gb_addr_config; - args->num_banks = config.num_banks; - args->num_ranks = config.num_ranks; - - if (args->num_tile_configs > config.num_tile_configs) - args->num_tile_configs = config.num_tile_configs; - err = copy_to_user((void __user *)args->tile_config_ptr, - config.tile_config_ptr, - args->num_tile_configs * sizeof(uint32_t)); - if (err) { - args->num_tile_configs = 0; - return -EFAULT; - } - - if (args->num_macro_tile_configs > config.num_macro_tile_configs) - args->num_macro_tile_configs = - config.num_macro_tile_configs; - err = copy_to_user((void __user *)args->macro_tile_config_ptr, - config.macro_tile_config_ptr, - args->num_macro_tile_configs * sizeof(uint32_t)); - if (err) { - args->num_macro_tile_configs = 0; - return -EFAULT; - } - - return 0; -} #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ - [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ - .cmd_drv = 0, .name = #ioctl} + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} /** Ioctl table */ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { @@ -977,12 +900,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, kfd_ioctl_dbg_wave_control, 0), - - AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, - kfd_ioctl_set_scratch_backing_va, 0), - - AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, - kfd_ioctl_get_tile_config, 0) }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 0aa021a..d5e19b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -42,6 +42,8 @@ static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) { + BUG_ON(!dev || !dev->kfd2kgd); + dev->kfd2kgd->address_watch_disable(dev->kgd); } @@ -60,8 +62,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, unsigned int *ib_packet_buff; int status; - if (WARN_ON(!size_in_bytes)) - return -EINVAL; + BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); kq = dbgdev->kq; @@ -76,8 +77,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, status = kq->ops.acquire_packet_buffer(kq, pq_packets_size_in_bytes / sizeof(uint32_t), &ib_packet_buff); - if (status) { - pr_err("acquire_packet_buffer failed\n"); + if (status != 0) { + pr_err("amdkfd: acquire_packet_buffer failed\n"); return status; } @@ -114,8 +115,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), &mem_obj); - if (status) { - pr_err("Failed to allocate GART memory\n"); + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); kq->ops.rollback_packet(kq); return status; } @@ -167,6 +168,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) { + BUG_ON(!dbgdev); + /* * no action is needed in this case, * just make sure diq will not be used @@ -184,12 +187,14 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) struct kernel_queue *kq = NULL; int status; + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, &properties, 0, KFD_QUEUE_TYPE_DIQ, &qid); if (status) { - pr_err("Failed to create DIQ\n"); + pr_err("amdkfd: Failed to create DIQ\n"); return status; } @@ -197,8 +202,8 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) kq = pqm_get_kernel_queue(dbgdev->pqm, qid); - if (!kq) { - pr_err("Error getting DIQ\n"); + if (kq == NULL) { + pr_err("amdkfd: Error getting DIQ\n"); pqm_destroy_queue(dbgdev->pqm, qid); return -EFAULT; } @@ -210,6 +215,8 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) { + BUG_ON(!dbgdev || !dbgdev->dev); + /* disable watch address */ dbgdev_address_watch_disable_nodiq(dbgdev->dev); return 0; @@ -220,6 +227,8 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) /* todo - disable address watch */ int status; + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); + status = pqm_destroy_queue(dbgdev->pqm, dbgdev->kq->queue->properties.queue_id); dbgdev->kq = NULL; @@ -236,12 +245,14 @@ static void dbgdev_address_watch_set_registers( { union ULARGE_INTEGER addr; + BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); + addr.quad_part = 0; addrHi->u32All = 0; addrLo->u32All = 0; cntl->u32All = 0; - if (adw_info->watch_mask) + if (adw_info->watch_mask != NULL) cntl->bitfields.mask = (uint32_t) (adw_info->watch_mask[index] & ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); @@ -268,7 +279,7 @@ static void dbgdev_address_watch_set_registers( } static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) + struct dbg_address_watch_info *adw_info) { union TCP_WATCH_ADDR_H_BITS addrHi; union TCP_WATCH_ADDR_L_BITS addrLo; @@ -276,11 +287,13 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, struct kfd_process_device *pdd; unsigned int i; + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); + /* taking the vmid for that process on the safe way using pdd */ pdd = kfd_get_process_device_data(dbgdev->dev, adw_info->process); if (!pdd) { - pr_err("Failed to get pdd for wave control no DIQ\n"); + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); return -EFAULT; } @@ -290,16 +303,17 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || (adw_info->num_watch_points == 0)) { - pr_err("num_watch_points is invalid\n"); + pr_err("amdkfd: num_watch_points is invalid\n"); return -EINVAL; } - if (!adw_info->watch_mode || !adw_info->watch_address) { - pr_err("adw_info fields are not valid\n"); + if ((adw_info->watch_mode == NULL) || + (adw_info->watch_address == NULL)) { + pr_err("amdkfd: adw_info fields are not valid\n"); return -EINVAL; } - for (i = 0; i < adw_info->num_watch_points; i++) { + for (i = 0 ; i < adw_info->num_watch_points ; i++) { dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, &cntl, i, pdd->qpd.vmid); @@ -334,7 +348,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, } static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) + struct dbg_address_watch_info *adw_info) { struct pm4__set_config_reg *packets_vec; union TCP_WATCH_ADDR_H_BITS addrHi; @@ -349,25 +363,28 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, /* we do not control the vmid in DIQ mode, just a place holder */ unsigned int vmid = 0; + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); + addrHi.u32All = 0; addrLo.u32All = 0; cntl.u32All = 0; if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || (adw_info->num_watch_points == 0)) { - pr_err("num_watch_points is invalid\n"); + pr_err("amdkfd: num_watch_points is invalid\n"); return -EINVAL; } - if (!adw_info->watch_mode || !adw_info->watch_address) { - pr_err("adw_info fields are not valid\n"); + if ((NULL == adw_info->watch_mode) || + (NULL == adw_info->watch_address)) { + pr_err("amdkfd: adw_info fields are not valid\n"); return -EINVAL; } status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); - if (status) { - pr_err("Failed to allocate GART memory\n"); + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); return status; } @@ -425,6 +442,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_CNTL); + aw_reg_add_dword /= sizeof(uint32_t); + packets_vec[0].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; @@ -436,6 +455,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_ADDR_HI); + aw_reg_add_dword /= sizeof(uint32_t); + packets_vec[1].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[1].reg_data[0] = addrHi.u32All; @@ -446,6 +467,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_ADDR_LO); + aw_reg_add_dword /= sizeof(uint32_t); + packets_vec[2].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[2].reg_data[0] = addrLo.u32All; @@ -462,6 +485,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_CNTL); + aw_reg_add_dword /= sizeof(uint32_t); + packets_vec[3].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[3].reg_data[0] = cntl.u32All; @@ -473,8 +498,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, packet_buff_uint, ib_size); - if (status) { - pr_err("Failed to submit IB to DIQ\n"); + if (status != 0) { + pr_err("amdkfd: Failed to submit IB to DIQ\n"); break; } } @@ -493,6 +518,8 @@ static int dbgdev_wave_control_set_registers( union GRBM_GFX_INDEX_BITS reg_gfx_index; struct HsaDbgWaveMsgAMDGen2 *pMsg; + BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); + reg_sq_cmd.u32All = 0; reg_gfx_index.u32All = 0; pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; @@ -593,16 +620,18 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, struct pm4__set_config_reg *packets_vec; size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; + BUG_ON(!dbgdev || !wac_info); + reg_sq_cmd.u32All = 0; status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, ®_gfx_index); if (status) { - pr_err("Failed to set wave control registers\n"); + pr_err("amdkfd: Failed to set wave control registers\n"); return status; } - /* we do not control the VMID in DIQ, so reset it to a known value */ + /* we do not control the VMID in DIQ,so reset it to a known value */ reg_sq_cmd.bits.vm_id = 0; pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); @@ -638,7 +667,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); if (status != 0) { - pr_err("Failed to allocate GART memory\n"); + pr_err("amdkfd: Failed to allocate GART memory\n"); return status; } @@ -690,8 +719,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packet_buff_uint, ib_size); - if (status) - pr_err("Failed to submit IB to DIQ\n"); + if (status != 0) + pr_err("amdkfd: Failed to submit IB to DIQ\n"); kfd_gtt_sa_free(dbgdev->dev, mem_obj); @@ -706,19 +735,21 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, union GRBM_GFX_INDEX_BITS reg_gfx_index; struct kfd_process_device *pdd; + BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); + reg_sq_cmd.u32All = 0; /* taking the VMID for that process on the safe way using PDD */ pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); if (!pdd) { - pr_err("Failed to get pdd for wave control no DIQ\n"); + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); return -EFAULT; } status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, ®_gfx_index); if (status) { - pr_err("Failed to set wave control registers\n"); + pr_err("amdkfd: Failed to set wave control registers\n"); return status; } @@ -787,13 +818,12 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. * ATC_VMID15_PASID_MAPPING - * to check which VMID the current process is mapped to. - */ + * to check which VMID the current process is mapped to. */ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid (dev->kgd, vmid)) { - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid (dev->kgd, vmid) == p->pasid) { pr_debug("Killing wave fronts of vmid %d and pasid %d\n", vmid, p->pasid); @@ -803,7 +833,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) } if (vmid > last_vmid_to_scan) { - pr_err("Didn't find vmid for pasid %d\n", p->pasid); + pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); return -EFAULT; } @@ -830,6 +860,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, enum DBGDEV_TYPE type) { + BUG_ON(!pdbgdev || !pdev); + pdbgdev->dev = pdev; pdbgdev->kq = NULL; pdbgdev->type = type; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 3da25f7..56d6763 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -44,6 +44,8 @@ struct mutex *kfd_get_dbgmgr_mutex(void) static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) { + BUG_ON(!pmgr); + kfree(pmgr->dbgdev); pmgr->dbgdev = NULL; @@ -53,7 +55,7 @@ static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) { - if (pmgr) { + if (pmgr != NULL) { kfd_dbgmgr_uninitialize(pmgr); kfree(pmgr); } @@ -64,12 +66,12 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; struct kfd_dbgmgr *new_buff; - if (WARN_ON(!pdev->init_complete)) - return false; + BUG_ON(pdev == NULL); + BUG_ON(!pdev->init_complete); new_buff = kfd_alloc_struct(new_buff); if (!new_buff) { - pr_err("Failed to allocate dbgmgr instance\n"); + pr_err("amdkfd: Failed to allocate dbgmgr instance\n"); return false; } @@ -77,7 +79,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) new_buff->dev = pdev; new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); if (!new_buff->dbgdev) { - pr_err("Failed to allocate dbgdev instance\n"); + pr_err("amdkfd: Failed to allocate dbgdev instance\n"); kfree(new_buff); return false; } @@ -94,6 +96,8 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { + BUG_ON(!p || !pmgr || !pmgr->dbgdev); + if (pmgr->pasid != 0) { pr_debug("H/W debugger is already active using pasid %d\n", pmgr->pasid); @@ -114,6 +118,8 @@ long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { + BUG_ON(!p || !pmgr || !pmgr->dbgdev); + /* Is the requests coming from the already registered process? */ if (pmgr->pasid != p->pasid) { pr_debug("H/W debugger is not registered by calling pasid %d\n", @@ -131,6 +137,8 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info) { + BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info); + /* Is the requests coming from the already registered process? */ if (pmgr->pasid != wac_info->process->pasid) { pr_debug("H/W debugger support was not registered for requester pasid %d\n", @@ -144,6 +152,9 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info) { + BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info); + + /* Is the requests coming from the already registered process? */ if (pmgr->pasid != adw_info->process->pasid) { pr_debug("H/W debugger support was not registered for requester pasid %d\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h index a04a1fe..257a745 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h @@ -30,11 +30,13 @@ #pragma pack(push, 4) enum HSA_DBG_WAVEOP { - HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ - HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ - HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ - HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */ - HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ + HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ + HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ + HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ + HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter + debug mode */ + HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take + a trap */ HSA_DBG_NUM_WAVEOP = 5, HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF }; @@ -79,13 +81,15 @@ struct HsaDbgWaveMsgAMDGen2 { uint32_t UserData:8; /* user data */ uint32_t ShaderArray:1; /* Shader array */ uint32_t Priv:1; /* Privileged */ - uint32_t Reserved0:4; /* Reserved, should be 0 */ + uint32_t Reserved0:4; /* This field is reserved, + should be 0 */ uint32_t WaveId:4; /* wave id */ uint32_t SIMD:2; /* SIMD id */ uint32_t HSACU:4; /* Compute unit */ uint32_t ShaderEngine:2;/* Shader engine */ uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ - uint32_t Reserved1:4; /* Reserved, should be 0 */ + uint32_t Reserved1:4; /* This field is reserved, + should be 0 */ } ui32; uint32_t Value; }; @@ -117,23 +121,20 @@ struct HsaDbgWaveMessage { * in the user mode instruction stream. The OS scheduler event is typically * associated and signaled by an interrupt issued by the GPU, but other HSA * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced - * by the KFD by this mechanism, too. - */ + * by the KFD by this mechanism, too. */ /* these are the new definitions for events */ enum HSA_EVENTTYPE { HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change - * (start/stop) - */ + (start/stop) */ HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state - * (EOP pm4) - */ + (EOP pm4) */ /* ... */ HSA_EVENTTYPE_MAXID, HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 61fff25..3f95f7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -26,7 +26,7 @@ #include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" -#include "kfd_pm4_headers_vi.h" +#include "kfd_pm4_headers.h" #define MQD_SIZE_ALIGNED 768 @@ -98,14 +98,11 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did) for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { if (supported_devices[i].did == did) { - WARN_ON(!supported_devices[i].device_info); + BUG_ON(supported_devices[i].device_info == NULL); return supported_devices[i].device_info; } } - dev_warn(kfd_device, "DID %04x is missing in supported_devices\n", - did); - return NULL; } @@ -117,10 +114,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, const struct kfd_device_info *device_info = lookup_device_info(pdev->device); - if (!device_info) { - dev_err(kfd_device, "kgd2kfd_probe failed\n"); + if (!device_info) return NULL; - } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); if (!kfd) @@ -157,16 +152,15 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) } if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { - dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", + dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n", (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) - != 0); + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0); return false; } pasid_limit = min_t(unsigned int, - (unsigned int)(1 << kfd->device_info->max_pasid_bits), + (unsigned int)1 << kfd->device_info->max_pasid_bits, iommu_info.max_pasids); /* * last pasid is used for kernel queues doorbells @@ -217,8 +211,9 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, flags); dev = kfd_device_by_pci_dev(pdev); - if (!WARN_ON(!dev)) - kfd_signal_iommu_event(dev, pasid, address, + BUG_ON(dev == NULL); + + kfd_signal_iommu_event(dev, pasid, address, flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); return AMD_IOMMU_INV_PRI_RSP_INVALID; @@ -239,9 +234,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * calculate max size of runlist packet. * There can be only 2 packets at once */ - size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) + - max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) - + sizeof(struct pm4_mes_runlist)) * 2; + size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + + max_num_of_queues_per_device * + sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2; /* Add size of HIQ & DIQ */ size += KFD_KERNEL_QUEUE_SIZE * 2; @@ -252,37 +247,42 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (kfd->kfd2kgd->init_gtt_mem_allocation( kfd->kgd, size, &kfd->gtt_mem, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){ - dev_err(kfd_device, "Could not allocate %d bytes\n", size); + dev_err(kfd_device, + "Could not allocate %d bytes for device (%x:%x)\n", + size, kfd->pdev->vendor, kfd->pdev->device); goto out; } - dev_info(kfd_device, "Allocated %d bytes on gart\n", size); + dev_info(kfd_device, + "Allocated %d bytes on gart for device(%x:%x)\n", + size, kfd->pdev->vendor, kfd->pdev->device); /* Initialize GTT sa with 512 byte chunk size */ if (kfd_gtt_sa_init(kfd, size, 512) != 0) { - dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); + dev_err(kfd_device, + "Error initializing gtt sub-allocator\n"); goto kfd_gtt_sa_init_error; } - if (kfd_doorbell_init(kfd)) { - dev_err(kfd_device, - "Error initializing doorbell aperture\n"); - goto kfd_doorbell_error; - } + kfd_doorbell_init(kfd); - if (kfd_topology_add_device(kfd)) { - dev_err(kfd_device, "Error adding device to topology\n"); + if (kfd_topology_add_device(kfd) != 0) { + dev_err(kfd_device, + "Error adding device (%x:%x) to topology\n", + kfd->pdev->vendor, kfd->pdev->device); goto kfd_topology_add_device_error; } if (kfd_interrupt_init(kfd)) { - dev_err(kfd_device, "Error initializing interrupts\n"); + dev_err(kfd_device, + "Error initializing interrupts for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); goto kfd_interrupt_error; } if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, - "Error initializing iommuv2 for device %x:%x\n", + "Error initializing iommuv2 for device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); goto device_iommu_pasid_error; } @@ -292,13 +292,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->dqm = device_queue_manager_init(kfd); if (!kfd->dqm) { - dev_err(kfd_device, "Error initializing queue manager\n"); + dev_err(kfd_device, + "Error initializing queue manager for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); goto device_queue_manager_error; } - if (kfd->dqm->ops.start(kfd->dqm)) { + if (kfd->dqm->ops.start(kfd->dqm) != 0) { dev_err(kfd_device, - "Error starting queue manager for device %x:%x\n", + "Error starting queuen manager for device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); goto dqm_start_error; } @@ -306,10 +308,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->dbgmgr = NULL; kfd->init_complete = true; - dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, + dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); - pr_debug("Starting kfd with the following scheduling policy %d\n", + pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", sched_policy); goto out; @@ -323,13 +325,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_interrupt_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: - kfd_doorbell_fini(kfd); -kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); dev_err(kfd_device, - "device %x:%x NOT added due to errors\n", + "device (%x:%x) NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); out: return kfd->init_complete; @@ -342,7 +342,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) amd_iommu_free_device(kfd->pdev); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); - kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); } @@ -352,6 +351,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) void kgd2kfd_suspend(struct kfd_dev *kfd) { + BUG_ON(kfd == NULL); + if (kfd->init_complete) { kfd->dqm->ops.stop(kfd->dqm); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); @@ -365,15 +366,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd) unsigned int pasid_limit; int err; + BUG_ON(kfd == NULL); + pasid_limit = kfd_get_pasid_limit(); if (kfd->init_complete) { err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err < 0) { - dev_err(kfd_device, "failed to initialize iommu\n"); + if (err < 0) return -ENXIO; - } - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); @@ -402,27 +402,26 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size) { - unsigned int num_of_longs; + unsigned int num_of_bits; - if (WARN_ON(buf_size < chunk_size)) - return -EINVAL; - if (WARN_ON(buf_size == 0)) - return -EINVAL; - if (WARN_ON(chunk_size == 0)) - return -EINVAL; + BUG_ON(!kfd); + BUG_ON(!kfd->gtt_mem); + BUG_ON(buf_size < chunk_size); + BUG_ON(buf_size == 0); + BUG_ON(chunk_size == 0); kfd->gtt_sa_chunk_size = chunk_size; kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; - num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / - BITS_PER_LONG; + num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE; + BUG_ON(num_of_bits == 0); - kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); + kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL); if (!kfd->gtt_sa_bitmap) return -ENOMEM; - pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", + pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); mutex_init(&kfd->gtt_sa_lock); @@ -456,6 +455,8 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, { unsigned int found, start_search, cur_size; + BUG_ON(!kfd); + if (size == 0) return -EINVAL; @@ -466,7 +467,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if ((*mem_obj) == NULL) return -ENOMEM; - pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); + pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size); start_search = 0; @@ -478,7 +479,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, kfd->gtt_sa_num_of_chunks, start_search); - pr_debug("Found = %d\n", found); + pr_debug("kfd: found = %d\n", found); /* If there wasn't any free chunk, bail out */ if (found == kfd->gtt_sa_num_of_chunks) @@ -496,12 +497,12 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, found, kfd->gtt_sa_chunk_size); - pr_debug("gpu_addr = %p, cpu_addr = %p\n", + pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n", (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); /* If we need only one chunk, mark it as allocated and get out */ if (size <= kfd->gtt_sa_chunk_size) { - pr_debug("Single bit\n"); + pr_debug("kfd: single bit\n"); set_bit(found, kfd->gtt_sa_bitmap); goto kfd_gtt_out; } @@ -536,7 +537,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, } while (cur_size > 0); - pr_debug("range_start = %d, range_end = %d\n", + pr_debug("kfd: range_start = %d, range_end = %d\n", (*mem_obj)->range_start, (*mem_obj)->range_end); /* Mark the chunks as allocated */ @@ -550,7 +551,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, return 0; kfd_gtt_no_free_chunk: - pr_debug("Allocation failed with mem_obj = %p\n", mem_obj); + pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj); mutex_unlock(&kfd->gtt_sa_lock); kfree(mem_obj); return -ENOMEM; @@ -560,11 +561,13 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) { unsigned int bit; + BUG_ON(!kfd); + /* Act like kfree when trying to free a NULL object */ if (!mem_obj) return 0; - pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", + pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n", mem_obj, mem_obj->range_start, mem_obj->range_end); mutex_lock(&kfd->gtt_sa_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 53a66e8..42de22b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -79,17 +79,20 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) unsigned int get_queues_num(struct device_queue_manager *dqm) { + BUG_ON(!dqm || !dqm->dev); return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, KGD_MAX_QUEUES); } unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) { + BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.num_queue_per_pipe; } unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) { + BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.num_pipe_per_mec; } @@ -118,7 +121,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, /* Kaveri kfd vmid's starts from vmid 8 */ allocated_vmid = bit + KFD_VMID_START_OFFSET; - pr_debug("vmid allocation %d\n", allocated_vmid); + pr_debug("kfd: vmid allocation %d\n", allocated_vmid); qpd->vmid = allocated_vmid; q->properties.vmid = allocated_vmid; @@ -149,38 +152,42 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, { int retval; + BUG_ON(!dqm || !q || !qpd || !allocated_vmid); + + pr_debug("kfd: In func %s\n", __func__); print_queue(q); mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { - pr_warn("Can't create new usermode queue because %d queues were already created\n", + pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); - retval = -EPERM; - goto out_unlock; + mutex_unlock(&dqm->lock); + return -EPERM; } if (list_empty(&qpd->queues_list)) { retval = allocate_vmid(dqm, qpd, q); - if (retval) - goto out_unlock; + if (retval != 0) { + mutex_unlock(&dqm->lock); + return retval; + } } *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) retval = create_sdma_queue_nocpsch(dqm, q, qpd); - else - retval = -EINVAL; - if (retval) { + if (retval != 0) { if (list_empty(&qpd->queues_list)) { deallocate_vmid(dqm, qpd, q); *allocated_vmid = 0; } - goto out_unlock; + mutex_unlock(&dqm->lock); + return retval; } list_add(&q->list, &qpd->queues_list); @@ -198,9 +205,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); -out_unlock: mutex_unlock(&dqm->lock); - return retval; + return 0; } static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) @@ -210,8 +216,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) set = false; - for (pipe = dqm->next_pipe_to_allocate, i = 0; - i < get_pipes_per_mec(dqm); + for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm); pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { if (!is_pipe_enabled(dqm, 0, pipe)) @@ -234,7 +239,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) if (!set) return -EBUSY; - pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); + pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", + __func__, q->pipe, q->queue); /* horizontal hqd allocation */ dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); @@ -254,38 +260,36 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, int retval; struct mqd_manager *mqd; + BUG_ON(!dqm || !q || !qpd); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd) + if (mqd == NULL) return -ENOMEM; retval = allocate_hqd(dqm, q); - if (retval) + if (retval != 0) return retval; retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval) - goto out_deallocate_hqd; - - pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", - q->pipe, q->queue); + if (retval != 0) { + deallocate_hqd(dqm, q); + return retval; + } - dqm->dev->kfd2kgd->set_scratch_backing_va( - dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); + pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", + q->pipe, + q->queue); - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, - q->process->mm); - if (retval) - goto out_uninit_mqd; + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, (uint32_t __user *) q->properties.write_ptr); + if (retval != 0) { + deallocate_hqd(dqm, q); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + return retval; + } return 0; - -out_uninit_mqd: - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); -out_deallocate_hqd: - deallocate_hqd(dqm, q); - - return retval; } static int destroy_queue_nocpsch(struct device_queue_manager *dqm, @@ -295,8 +299,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, int retval; struct mqd_manager *mqd; + BUG_ON(!dqm || !q || !q->mqd || !qpd); + retval = 0; + pr_debug("kfd: In Func %s\n", __func__); + mutex_lock(&dqm->lock); if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { @@ -315,7 +323,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); } else { - pr_debug("q->properties.type %d is invalid\n", + pr_debug("q->properties.type is invalid (%d)\n", q->properties.type); retval = -EINVAL; goto out; @@ -326,7 +334,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q->pipe, q->queue); - if (retval) + if (retval != 0) goto out; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -356,12 +364,14 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) struct mqd_manager *mqd; bool prev_active = false; + BUG_ON(!dqm || !q || !q->mqd); + mutex_lock(&dqm->lock); mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { - retval = -ENOMEM; - goto out_unlock; + if (mqd == NULL) { + mutex_unlock(&dqm->lock); + return -ENOMEM; } if (q->properties.is_active) @@ -375,13 +385,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = mqd->update_mqd(mqd, q->mqd, &q->properties); if ((q->properties.is_active) && (!prev_active)) dqm->queue_count++; - else if (!q->properties.is_active && prev_active) + else if ((!q->properties.is_active) && (prev_active)) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = execute_queues_cpsch(dqm, false); -out_unlock: mutex_unlock(&dqm->lock); return retval; } @@ -391,16 +400,15 @@ static struct mqd_manager *get_mqd_manager_nocpsch( { struct mqd_manager *mqd; - if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) - return NULL; + BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); - pr_debug("mqd type %d\n", type); + pr_debug("kfd: In func %s mqd type %d\n", __func__, type); mqd = dqm->mqds[type]; if (!mqd) { mqd = mqd_manager_init(type, dqm->dev); - if (!mqd) - pr_err("mqd manager is NULL"); + if (mqd == NULL) + pr_err("kfd: mqd manager is NULL"); dqm->mqds[type] = mqd; } @@ -413,7 +421,11 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, struct device_process_node *n; int retval; - n = kzalloc(sizeof(*n), GFP_KERNEL); + BUG_ON(!dqm || !qpd); + + pr_debug("kfd: In func %s\n", __func__); + + n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); if (!n) return -ENOMEM; @@ -437,6 +449,10 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm, int retval; struct device_process_node *cur, *next; + BUG_ON(!dqm || !qpd); + + pr_debug("In func %s\n", __func__); + pr_debug("qpd->queues_list is %s\n", list_empty(&qpd->queues_list) ? "empty" : "not empty"); @@ -477,39 +493,51 @@ static void init_interrupts(struct device_queue_manager *dqm) { unsigned int i; + BUG_ON(dqm == NULL); + for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) if (is_pipe_enabled(dqm, 0, i)) dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); } +static int init_scheduler(struct device_queue_manager *dqm) +{ + int retval = 0; + + BUG_ON(!dqm); + + pr_debug("kfd: In %s\n", __func__); + + return retval; +} + static int initialize_nocpsch(struct device_queue_manager *dqm) { - int pipe, queue; + int i; - pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); + BUG_ON(!dqm); - dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), - sizeof(unsigned int), GFP_KERNEL); - if (!dqm->allocated_queues) - return -ENOMEM; + pr_debug("kfd: In func %s num of pipes: %d\n", + __func__, get_pipes_per_mec(dqm)); mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; - - for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { - int pipe_offset = pipe * get_queues_per_pipe(dqm); - - for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) - if (test_bit(pipe_offset + queue, - dqm->dev->shared_resources.queue_bitmap)) - dqm->allocated_queues[pipe] |= 1 << queue; + dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), + sizeof(unsigned int), GFP_KERNEL); + if (!dqm->allocated_queues) { + mutex_destroy(&dqm->lock); + return -ENOMEM; } + for (i = 0; i < get_pipes_per_mec(dqm); i++) + dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1; + dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + init_scheduler(dqm); return 0; } @@ -517,7 +545,9 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) { int i; - WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + BUG_ON(!dqm); + + BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) @@ -574,34 +604,33 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, return -ENOMEM; retval = allocate_sdma_queue(dqm, &q->sdma_id); - if (retval) + if (retval != 0) return retval; q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; - pr_debug("SDMA id is: %d\n", q->sdma_id); - pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); - pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); + pr_debug("kfd: sdma id is: %d\n", q->sdma_id); + pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); + pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval) - goto out_deallocate_sdma_queue; + if (retval != 0) { + deallocate_sdma_queue(dqm, q->sdma_id); + return retval; + } - retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); - if (retval) - goto out_uninit_mqd; + retval = mqd->load_mqd(mqd, q->mqd, 0, + 0, NULL); + if (retval != 0) { + deallocate_sdma_queue(dqm, q->sdma_id); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + return retval; + } return 0; - -out_uninit_mqd: - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); -out_deallocate_sdma_queue: - deallocate_sdma_queue(dqm, q->sdma_id); - - return retval; } /* @@ -613,6 +642,10 @@ static int set_sched_resources(struct device_queue_manager *dqm) int i, mec; struct scheduling_resources res; + BUG_ON(!dqm); + + pr_debug("kfd: In func %s\n", __func__); + res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; res.vmid_mask <<= KFD_VMID_START_OFFSET; @@ -630,8 +663,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) /* This situation may be hit in the future if a new HW * generation exposes more than 64 queues. If so, the - * definition of res.queue_mask needs updating - */ + * definition of res.queue_mask needs updating */ if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { pr_err("Invalid queue enabled by amdgpu: %d\n", i); break; @@ -642,9 +674,9 @@ static int set_sched_resources(struct device_queue_manager *dqm) res.gws_mask = res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; - pr_debug("Scheduling resources:\n" - "vmid mask: 0x%8X\n" - "queue mask: 0x%8llX\n", + pr_debug("kfd: scheduling resources:\n" + " vmid mask: 0x%8X\n" + " queue mask: 0x%8llX\n", res.vmid_mask, res.queue_mask); return pm_send_set_resources(&dqm->packets, &res); @@ -654,7 +686,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm) { int retval; - pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); + BUG_ON(!dqm); + + pr_debug("kfd: In func %s num of pipes: %d\n", + __func__, get_pipes_per_mec(dqm)); mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); @@ -662,9 +697,13 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->sdma_queue_count = 0; dqm->active_runlist = false; retval = dqm->ops_asic_specific.initialize(dqm); - if (retval) - mutex_destroy(&dqm->lock); + if (retval != 0) + goto fail_init_pipelines; + return 0; + +fail_init_pipelines: + mutex_destroy(&dqm->lock); return retval; } @@ -673,23 +712,25 @@ static int start_cpsch(struct device_queue_manager *dqm) struct device_process_node *node; int retval; + BUG_ON(!dqm); + retval = 0; retval = pm_init(&dqm->packets, dqm); - if (retval) + if (retval != 0) goto fail_packet_manager_init; retval = set_sched_resources(dqm); - if (retval) + if (retval != 0) goto fail_set_sched_resources; - pr_debug("Allocating fence memory\n"); + pr_debug("kfd: allocating fence memory\n"); /* allocate fence memory on the gart */ retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), &dqm->fence_mem); - if (retval) + if (retval != 0) goto fail_allocate_vidmem; dqm->fence_addr = dqm->fence_mem->cpu_ptr; @@ -717,6 +758,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) struct device_process_node *node; struct kfd_process_device *pdd; + BUG_ON(!dqm); + destroy_queues_cpsch(dqm, true, true); list_for_each_entry(node, &dqm->queues, list) { @@ -733,9 +776,13 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { + BUG_ON(!dqm || !kq || !qpd); + + pr_debug("kfd: In func %s\n", __func__); + mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { - pr_warn("Can't create new kernel queue because %d queues were already created\n", + pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n", dqm->total_queue_count); mutex_unlock(&dqm->lock); return -EPERM; @@ -762,6 +809,10 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { + BUG_ON(!dqm || !kq); + + pr_debug("kfd: In %s\n", __func__); + mutex_lock(&dqm->lock); /* here we actually preempt the DIQ */ destroy_queues_cpsch(dqm, true, false); @@ -793,6 +844,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd; + BUG_ON(!dqm || !q || !qpd); + retval = 0; if (allocate_vmid) @@ -801,7 +854,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { - pr_warn("Can't create new usermode queue because %d queues were already created\n", + pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; goto out; @@ -813,15 +866,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { - retval = -ENOMEM; - goto out; + if (mqd == NULL) { + mutex_unlock(&dqm->lock); + return -ENOMEM; } dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval) + if (retval != 0) goto out; list_add(&q->list, &qpd->queues_list); @@ -831,7 +884,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; + dqm->sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -850,11 +903,12 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout) { + BUG_ON(!fence_addr); timeout += jiffies; while (*fence_addr != fence_value) { if (time_after(jiffies, timeout)) { - pr_err("qcm fence wait loop timeout expired\n"); + pr_err("kfd: qcm fence wait loop timeout expired\n"); return -ETIME; } schedule(); @@ -878,6 +932,8 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, enum kfd_preempt_type_filter preempt_type; struct kfd_process_device *pdd; + BUG_ON(!dqm); + retval = 0; if (lock) @@ -885,7 +941,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) goto out; - pr_debug("Before destroying queues, sdma queue count is : %u\n", + pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", dqm->sdma_queue_count); if (dqm->sdma_queue_count > 0) { @@ -899,7 +955,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, preempt_type, 0, false, 0); - if (retval) + if (retval != 0) goto out; *dqm->fence_addr = KFD_FENCE_INIT; @@ -908,7 +964,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - if (retval) { + if (retval != 0) { pdd = kfd_get_process_device_data(dqm->dev, kfd_get_process(current)); pdd->reset_wavefronts = true; @@ -927,12 +983,14 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) { int retval; + BUG_ON(!dqm); + if (lock) mutex_lock(&dqm->lock); retval = destroy_queues_cpsch(dqm, false, false); - if (retval) { - pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); + if (retval != 0) { + pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); goto out; } @@ -947,8 +1005,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) } retval = pm_send_runlist(&dqm->packets, &dqm->queues); - if (retval) { - pr_err("failed to execute runlist"); + if (retval != 0) { + pr_err("kfd: failed to execute runlist"); goto out; } dqm->active_runlist = true; @@ -967,6 +1025,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd; bool preempt_all_queues; + BUG_ON(!dqm || !qpd || !q); + preempt_all_queues = false; retval = 0; @@ -1038,6 +1098,8 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, { bool retval; + pr_debug("kfd: In func %s\n", __func__); + mutex_lock(&dqm->lock); if (alternate_aperture_size == 0) { @@ -1058,11 +1120,14 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, uint64_t base = (uintptr_t)alternate_aperture_base; uint64_t limit = base + alternate_aperture_size - 1; - if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || - (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { - retval = false; + if (limit <= base) + goto out; + + if ((base & APE1_FIXED_BITS_MASK) != 0) + goto out; + + if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) goto out; - } qpd->sh_mem_ape1_base = base >> 16; qpd->sh_mem_ape1_limit = limit >> 16; @@ -1079,22 +1144,27 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); - pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", + pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", qpd->sh_mem_config, qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit); -out: mutex_unlock(&dqm->lock); return retval; + +out: + mutex_unlock(&dqm->lock); + return false; } struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) { struct device_queue_manager *dqm; - pr_debug("Loading device queue manager\n"); + BUG_ON(!dev); - dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); + pr_debug("kfd: loading device queue manager\n"); + + dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); if (!dqm) return NULL; @@ -1132,8 +1202,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; default: - pr_err("Invalid scheduling policy %d\n", sched_policy); - goto out_free; + BUG(); + break; } switch (dev->device_info->asic_family) { @@ -1146,16 +1216,18 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; } - if (!dqm->ops.initialize(dqm)) - return dqm; + if (dqm->ops.initialize(dqm) != 0) { + kfree(dqm); + return NULL; + } -out_free: - kfree(dqm); - return NULL; + return dqm; } void device_queue_manager_uninit(struct device_queue_manager *dqm) { + BUG_ON(!dqm); + dqm->ops.uninitialize(dqm); kfree(dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 72c3cba..48dc056 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -24,7 +24,6 @@ #include "kfd_device_queue_manager.h" #include "cik_regs.h" #include "oss/oss_2_4_sh_mask.h" -#include "gca/gfx_7_2_sh_mask.h" static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -66,7 +65,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) * for LDS/Scratch and GPUVM. */ - WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || top_address_nybble == 0); return PRIVATE_BASE(top_address_nybble << 12) | @@ -105,6 +104,8 @@ static int register_process_cik(struct device_queue_manager *dqm, struct kfd_process_device *pdd; unsigned int temp; + BUG_ON(!dqm || !qpd); + pdd = qpd_to_pdd(qpd); /* check if sh_mem_config register already configured */ @@ -124,10 +125,9 @@ static int register_process_cik(struct device_queue_manager *dqm, } else { temp = get_sh_mem_bases_nybble_64(pdd); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); - qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } - pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 40e9ddd..7e9cae9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -67,7 +67,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) * for LDS/Scratch and GPUVM. */ - WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || top_address_nybble == 0); return top_address_nybble << 12 | @@ -110,6 +110,8 @@ static int register_process_vi(struct device_queue_manager *dqm, struct kfd_process_device *pdd; unsigned int temp; + BUG_ON(!dqm || !qpd); + pdd = qpd_to_pdd(qpd); /* check if sh_mem_config register already configured */ @@ -135,11 +137,9 @@ static int register_process_vi(struct device_queue_manager *dqm, qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; - qpd->sh_mem_config |= 1 << - SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } - pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index acf4d2a..453c5d6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -59,7 +59,7 @@ static inline size_t doorbell_process_allocation(void) } /* Doorbell calculations for device init. */ -int kfd_doorbell_init(struct kfd_dev *kfd) +void kfd_doorbell_init(struct kfd_dev *kfd) { size_t doorbell_start_offset; size_t doorbell_aperture_size; @@ -95,35 +95,26 @@ int kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, doorbell_process_allocation()); - if (!kfd->doorbell_kernel_ptr) - return -ENOMEM; + BUG_ON(!kfd->doorbell_kernel_ptr); - pr_debug("Doorbell initialization:\n"); - pr_debug("doorbell base == 0x%08lX\n", + pr_debug("kfd: doorbell initialization:\n"); + pr_debug("kfd: doorbell base == 0x%08lX\n", (uintptr_t)kfd->doorbell_base); - pr_debug("doorbell_id_offset == 0x%08lX\n", + pr_debug("kfd: doorbell_id_offset == 0x%08lX\n", kfd->doorbell_id_offset); - pr_debug("doorbell_process_limit == 0x%08lX\n", + pr_debug("kfd: doorbell_process_limit == 0x%08lX\n", doorbell_process_limit); - pr_debug("doorbell_kernel_offset == 0x%08lX\n", + pr_debug("kfd: doorbell_kernel_offset == 0x%08lX\n", (uintptr_t)kfd->doorbell_base); - pr_debug("doorbell aperture size == 0x%08lX\n", + pr_debug("kfd: doorbell aperture size == 0x%08lX\n", kfd->shared_resources.doorbell_aperture_size); - pr_debug("doorbell kernel address == 0x%08lX\n", + pr_debug("kfd: doorbell kernel address == 0x%08lX\n", (uintptr_t)kfd->doorbell_kernel_ptr); - - return 0; -} - -void kfd_doorbell_fini(struct kfd_dev *kfd) -{ - if (kfd->doorbell_kernel_ptr) - iounmap(kfd->doorbell_kernel_ptr); } int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) @@ -140,7 +131,7 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) /* Find kfd device according to gpu id */ dev = kfd_device_by_id(vma->vm_pgoff); - if (!dev) + if (dev == NULL) return -EINVAL; /* Calculate physical address of doorbell */ @@ -151,11 +142,12 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("Mapping doorbell page\n" + pr_debug("kfd: mapping doorbell page in %s\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", + __func__, (unsigned long long) vma->vm_start, address, vma->vm_flags, doorbell_process_allocation()); @@ -174,6 +166,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, { u32 inx; + BUG_ON(!kfd || !doorbell_off); + mutex_lock(&kfd->doorbell_mutex); inx = find_first_zero_bit(kfd->doorbell_available_index, KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); @@ -191,7 +185,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, *doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() / sizeof(u32)) + inx; - pr_debug("Get kernel queue doorbell\n" + pr_debug("kfd: get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" " kernel address == 0x%08lX\n", *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); @@ -203,6 +197,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) { unsigned int inx; + BUG_ON(!kfd || !db_addr); + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); mutex_lock(&kfd->doorbell_mutex); @@ -214,7 +210,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) { if (db) { writel(value, db); - pr_debug("Writing %d to doorbell address 0x%p\n", value, db); + pr_debug("writing %d to doorbell address 0x%p\n", value, db); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 5979158..d1ce83d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -110,7 +110,7 @@ static bool allocate_free_slot(struct kfd_process *process, *out_page = page; *out_slot_index = slot; - pr_debug("Allocated event signal slot in page %p, slot %d\n", + pr_debug("allocated event signal slot in page %p, slot %d\n", page, slot); return true; @@ -155,9 +155,9 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) struct signal_page, event_pages)->page_index + 1; - pr_debug("Allocated new event signal page at %p, for process %p\n", + pr_debug("allocated new event signal page at %p, for process %p\n", page, p); - pr_debug("Page index is %d\n", page->page_index); + pr_debug("page index is %d\n", page->page_index); list_add(&page->event_pages, &p->signal_event_pages); @@ -194,8 +194,7 @@ static void release_event_notification_slot(struct signal_page *page, page->free_slots++; /* We don't free signal pages, they are retained by the process - * and reused until it exits. - */ + * and reused until it exits. */ } static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, @@ -247,7 +246,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) for (id = p->next_nonsignal_event_id; id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); + lookup_event_by_id(p, id) != NULL; id++) ; @@ -266,7 +265,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); + lookup_event_by_id(p, id) != NULL; id++) ; @@ -292,13 +291,13 @@ static int create_signal_event(struct file *devkfd, struct kfd_event *ev) { if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { - pr_warn("Signal event wasn't created because limit was reached\n"); + pr_warn("amdkfd: Signal event wasn't created because limit was reached\n"); return -ENOMEM; } if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, &ev->signal_slot_index)) { - pr_warn("Signal event wasn't created because out of kernel memory\n"); + pr_warn("amdkfd: Signal event wasn't created because out of kernel memory\n"); return -ENOMEM; } @@ -310,7 +309,11 @@ static int create_signal_event(struct file *devkfd, ev->event_id = make_signal_event_id(ev->signal_page, ev->signal_slot_index); - pr_debug("Signal event number %zu created with id %d, address %p\n", + pr_debug("signal event number %zu created with id %d, address %p\n", + p->signal_event_count, ev->event_id, + ev->user_signal_address); + + pr_debug("signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, ev->user_signal_address); @@ -342,7 +345,7 @@ void kfd_event_init_process(struct kfd_process *p) static void destroy_event(struct kfd_process *p, struct kfd_event *ev) { - if (ev->signal_page) { + if (ev->signal_page != NULL) { release_event_notification_slot(ev->signal_page, ev->signal_slot_index); p->signal_event_count--; @@ -581,7 +584,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, * search faster. */ struct signal_page *page; - unsigned int i; + unsigned i; list_for_each_entry(page, &p->signal_event_pages, event_pages) for (i = 0; i < SLOTS_PER_PAGE; i++) @@ -813,7 +816,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) /* check required size is logical */ if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != get_order(vma->vm_end - vma->vm_start)) { - pr_err("Event page mmap requested illegal size\n"); + pr_err("amdkfd: event page mmap requested illegal size\n"); return -EINVAL; } @@ -822,7 +825,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) page = lookup_signal_page_by_index(p, page_index); if (!page) { /* Probably KFD bug, but mmap is user-accessible. */ - pr_debug("Signal page could not be found for page_index %u\n", + pr_debug("signal page could not be found for page_index %u\n", page_index); return -EINVAL; } @@ -833,7 +836,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; - pr_debug("Mapping signal page\n"); + pr_debug("mapping signal page\n"); pr_debug(" start user address == 0x%08lx\n", vma->vm_start); pr_debug(" end user address == 0x%08lx\n", vma->vm_end); pr_debug(" pfn == 0x%016lX\n", pfn); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index c59384b..2b65510 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -304,7 +304,7 @@ int kfd_init_apertures(struct kfd_process *process) id < NUM_OF_SUPPORTED_GPUS) { pdd = kfd_create_process_device_data(dev, process); - if (!pdd) { + if (pdd == NULL) { pr_err("Failed to create process device data\n"); return -1; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 70b3a99c..7f134aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -179,7 +179,7 @@ static void interrupt_wq(struct work_struct *work) bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { /* integer and bitwise OR so there is no boolean short-circuiting */ - unsigned int wanted = 0; + unsigned wanted = 0; wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, ih_ring_entry); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 681b639..d135cd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -41,11 +41,11 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, int retval; union PM4_MES_TYPE_3_HEADER nop; - if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) - return false; + BUG_ON(!kq || !dev); + BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ); - pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, - queue_size); + pr_debug("amdkfd: In func %s initializing queue type %d size %d\n", + __func__, KFD_QUEUE_TYPE_HIQ, queue_size); memset(&prop, 0, sizeof(prop)); memset(&nop, 0, sizeof(nop)); @@ -63,23 +63,23 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, KFD_MQD_TYPE_HIQ); break; default: - pr_err("Invalid queue type %d\n", type); - return false; + BUG(); + break; } - if (!kq->mqd) + if (kq->mqd == NULL) return false; prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); - if (!prop.doorbell_ptr) { - pr_err("Failed to initialize doorbell"); + if (prop.doorbell_ptr == NULL) { + pr_err("amdkfd: error init doorbell"); goto err_get_kernel_doorbell; } retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); if (retval != 0) { - pr_err("Failed to init pq queues size %d\n", queue_size); + pr_err("amdkfd: error init pq queues size (%d)\n", queue_size); goto err_pq_allocate_vidmem; } @@ -87,7 +87,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_gpu_addr = kq->pq->gpu_addr; retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); - if (!retval) + if (retval == false) goto err_eop_allocate_vidmem; retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), @@ -139,12 +139,11 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, /* assign HIQ to HQD */ if (type == KFD_QUEUE_TYPE_HIQ) { - pr_debug("Assigning hiq to hqd\n"); + pr_debug("assigning hiq to hqd\n"); kq->queue->pipe = KFD_CIK_HIQ_PIPE; kq->queue->queue = KFD_CIK_HIQ_QUEUE; kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, - kq->queue->queue, &kq->queue->properties, - NULL); + kq->queue->queue, NULL); } else { /* allocate fence for DIQ */ @@ -181,6 +180,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, static void uninitialize(struct kernel_queue *kq) { + BUG_ON(!kq); + if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) kq->mqd->destroy_mqd(kq->mqd, NULL, @@ -210,6 +211,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, uint32_t wptr, rptr; unsigned int *queue_address; + BUG_ON(!kq || !buffer_ptr); + rptr = *kq->rptr_kernel; wptr = *kq->wptr_kernel; queue_address = (unsigned int *)kq->pq_kernel_addr; @@ -249,7 +252,11 @@ static void submit_packet(struct kernel_queue *kq) { #ifdef DEBUG int i; +#endif + + BUG_ON(!kq); +#ifdef DEBUG for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) { pr_debug("0x%2X ", kq->pq_kernel_addr[i]); if (i % 15 == 0) @@ -265,6 +272,7 @@ static void submit_packet(struct kernel_queue *kq) static void rollback_packet(struct kernel_queue *kq) { + BUG_ON(!kq); kq->pending_wptr = *kq->queue->properties.write_ptr; } @@ -273,7 +281,9 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, { struct kernel_queue *kq; - kq = kzalloc(sizeof(*kq), GFP_KERNEL); + BUG_ON(!dev); + + kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL); if (!kq) return NULL; @@ -294,7 +304,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, } if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) { - pr_err("Failed to init kernel queue\n"); + pr_err("amdkfd: failed to init kernel queue\n"); kfree(kq); return NULL; } @@ -303,37 +313,32 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, void kernel_queue_uninit(struct kernel_queue *kq) { + BUG_ON(!kq); + kq->ops.uninitialize(kq); kfree(kq); } -/* FIXME: Can this test be removed? */ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) { struct kernel_queue *kq; uint32_t *buffer, i; int retval; - pr_err("Starting kernel queue test\n"); + BUG_ON(!dev); + + pr_err("amdkfd: starting kernel queue test\n"); kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); - if (unlikely(!kq)) { - pr_err(" Failed to initialize HIQ\n"); - pr_err("Kernel queue test failed\n"); - return; - } + BUG_ON(!kq); retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); - if (unlikely(retval != 0)) { - pr_err(" Failed to acquire packet buffer\n"); - pr_err("Kernel queue test failed\n"); - return; - } + BUG_ON(retval != 0); for (i = 0; i < 5; i++) buffer[i] = kq->nop_packet; kq->ops.submit_packet(kq); - pr_err("Ending kernel queue test\n"); + pr_err("amdkfd: ending kernel queue test\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 0d73bea..850a562 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -61,8 +61,7 @@ MODULE_PARM_DESC(send_sigterm, static int amdkfd_init_completed; -int kgd2kfd_init(unsigned int interface_version, - const struct kgd2kfd_calls **g2f) +int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f) { if (!amdkfd_init_completed) return -EPROBE_DEFER; @@ -91,7 +90,7 @@ static int __init kfd_module_init(void) /* Verify module parameters */ if ((sched_policy < KFD_SCHED_POLICY_HWS) || (sched_policy > KFD_SCHED_POLICY_NO_HWS)) { - pr_err("sched_policy has invalid value\n"); + pr_err("kfd: sched_policy has invalid value\n"); return -1; } @@ -99,13 +98,13 @@ static int __init kfd_module_init(void) if ((max_num_of_queues_per_device < 1) || (max_num_of_queues_per_device > KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) { - pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n"); + pr_err("kfd: max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n"); return -1; } err = kfd_pasid_init(); if (err < 0) - return err; + goto err_pasid; err = kfd_chardev_init(); if (err < 0) @@ -127,6 +126,7 @@ static int __init kfd_module_init(void) kfd_chardev_exit(); err_ioctl: kfd_pasid_exit(); +err_pasid: return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 1f3a6ba..213a71e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -67,8 +67,7 @@ struct mqd_manager { int (*load_mqd)(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, - struct queue_properties *p, - struct mm_struct *mms); + uint32_t __user *wptr); int (*update_mqd)(struct mqd_manager *mm, void *mqd, struct queue_properties *q); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 164fa4b..e492692 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -44,6 +44,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, struct cik_mqd *m; int retval; + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), mqd_mem_obj); @@ -97,7 +101,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_iq_rptr = AQL_ENABLE; *mqd = m; - if (gart_addr) + if (gart_addr != NULL) *gart_addr = addr; retval = mm->update_mqd(mm, m, q); @@ -111,6 +115,8 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, int retval; struct cik_sdma_rlc_registers *m; + BUG_ON(!mm || !mqd || !mqd_mem_obj); + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_sdma_rlc_registers), mqd_mem_obj); @@ -123,7 +129,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); *mqd = m; - if (gart_addr) + if (gart_addr != NULL) *gart_addr = (*mqd_mem_obj)->gpu_addr; retval = mm->update_mqd(mm, m, q); @@ -134,31 +140,27 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, static void uninit_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { + BUG_ON(!mm || !mqd); kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { + BUG_ON(!mm || !mqd); kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, - uint32_t queue_id, struct queue_properties *p, - struct mm_struct *mms) + uint32_t queue_id, uint32_t __user *wptr) { - /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ - uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); - - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, - (uint32_t __user *)p->write_ptr, - wptr_shift, wptr_mask, mms); + return mm->dev->kfd2kgd->hqd_load + (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); } static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, - uint32_t pipe_id, uint32_t queue_id, - struct queue_properties *p, struct mm_struct *mms) + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr) { return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); } @@ -168,6 +170,10 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, { struct cik_mqd *m; + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + m = get_mqd(mqd); m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; @@ -182,17 +188,21 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); + m->cp_hqd_pq_doorbell_control = DOORBELL_EN | + DOORBELL_OFFSET(q->doorbell_off); m->cp_hqd_vmid = q->vmid; - if (q->format == KFD_QUEUE_FORMAT_AQL) + if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_pq_control |= NO_UPDATE_RPTR; + } + m->cp_hqd_active = 0; q->is_active = false; if (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0) { + m->cp_hqd_active = 1; q->is_active = true; } @@ -204,6 +214,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, { struct cik_sdma_rlc_registers *m; + BUG_ON(!mm || !mqd || !q); + m = get_sdma_mqd(mqd); m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | @@ -242,7 +254,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, + return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, pipe_id, queue_id); } @@ -289,6 +301,10 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct cik_mqd *m; int retval; + BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); + + pr_debug("kfd: In func %s\n", __func__); + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), mqd_mem_obj); @@ -343,6 +359,10 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, { struct cik_mqd *m; + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + m = get_mqd(mqd); m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | @@ -380,6 +400,8 @@ struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) { struct cik_sdma_rlc_registers *m; + BUG_ON(!mqd); + m = (struct cik_sdma_rlc_registers *)mqd; return m; @@ -390,10 +412,12 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, { struct mqd_manager *mqd; - if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) - return NULL; + BUG_ON(!dev); + BUG_ON(type >= KFD_MQD_TYPE_MAX); + + pr_debug("kfd: In func %s\n", __func__); - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 73cbfe1..a9b9882 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -85,7 +85,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_iq_rptr = 1; *mqd = m; - if (gart_addr) + if (gart_addr != NULL) *gart_addr = addr; retval = mm->update_mqd(mm, m, q); @@ -94,15 +94,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, - struct queue_properties *p, struct mm_struct *mms) + uint32_t __user *wptr) { - /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ - uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); - - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, - (uint32_t __user *)p->write_ptr, - wptr_shift, wptr_mask, mms); + return mm->dev->kfd2kgd->hqd_load + (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); } static int __update_mqd(struct mqd_manager *mm, void *mqd, @@ -111,6 +106,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, { struct vi_mqd *m; + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + m = get_mqd(mqd); m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT | @@ -118,7 +117,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); @@ -127,9 +126,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_doorbell_control = + 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT | q->doorbell_off << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; - pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n", m->cp_hqd_pq_doorbell_control); m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT | @@ -139,15 +139,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT; - /* - * HW does not clamp this field correctly. Maximum EOP queue size - * is constrained by per-SE EOP done signal count, which is 8-bit. - * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit - * more than (EOP entry count - 1) so a queue size of 0x800 dwords - * is safe, giving a maximum field value of 0xA. - */ - m->cp_hqd_eop_control |= min(0xA, - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + m->cp_hqd_eop_control |= + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1; m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = @@ -163,10 +156,12 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } + m->cp_hqd_active = 0; q->is_active = false; if (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0) { + m->cp_hqd_active = 1; q->is_active = true; } @@ -186,13 +181,14 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, mqd, type, timeout, + (mm->dev->kgd, type, timeout, pipe_id, queue_id); } static void uninit_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { + BUG_ON(!mm || !mqd); kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } @@ -242,10 +238,12 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, { struct mqd_manager *mqd; - if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) - return NULL; + BUG_ON(!dev); + BUG_ON(type >= KFD_MQD_TYPE_MAX); + + pr_debug("kfd: In func %s\n", __func__); - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 308571b..a470019 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -26,6 +26,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" #include "kfd_priv.h" +#include "kfd_pm4_headers.h" #include "kfd_pm4_headers_vi.h" #include "kfd_pm4_opcodes.h" @@ -34,8 +35,7 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, { unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); - WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, - "Runlist IB overflow"); + BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes); *wptr = temp; } @@ -43,12 +43,12 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) { union PM4_MES_TYPE_3_HEADER header; - header.u32All = 0; + header.u32all = 0; header.opcode = opcode; header.count = packet_size/sizeof(uint32_t) - 2; header.type = PM4_TYPE_3; - return header.u32All; + return header.u32all; } static void pm_calc_rlib_size(struct packet_manager *pm, @@ -58,6 +58,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int process_count, queue_count; unsigned int map_queue_size; + BUG_ON(!pm || !rlib_size || !over_subscription); + process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; @@ -65,12 +67,15 @@ static void pm_calc_rlib_size(struct packet_manager *pm, *over_subscription = false; if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { *over_subscription = true; - pr_debug("Over subscribed runlist\n"); + pr_debug("kfd: over subscribed runlist\n"); } - map_queue_size = sizeof(struct pm4_mes_map_queues); + map_queue_size = + (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ? + sizeof(struct pm4_mes_map_queues) : + sizeof(struct pm4_map_queues); /* calculate run list ib allocation size */ - *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + + *rlib_size = process_count * sizeof(struct pm4_map_process) + queue_count * map_queue_size; /* @@ -78,9 +83,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * when over subscription */ if (*over_subscription) - *rlib_size += sizeof(struct pm4_mes_runlist); + *rlib_size += sizeof(struct pm4_runlist); - pr_debug("runlist ib size %d\n", *rlib_size); + pr_debug("kfd: runlist ib size %d\n", *rlib_size); } static int pm_allocate_runlist_ib(struct packet_manager *pm, @@ -91,16 +96,17 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, { int retval; - if (WARN_ON(pm->allocated)) - return -EINVAL; + BUG_ON(!pm); + BUG_ON(pm->allocated); + BUG_ON(is_over_subscription == NULL); pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, &pm->ib_buffer_obj); - if (retval) { - pr_err("Failed to allocate runlist IB\n"); + if (retval != 0) { + pr_err("kfd: failed to allocate runlist IB\n"); return retval; } @@ -115,16 +121,15 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { - struct pm4_mes_runlist *packet; + struct pm4_runlist *packet; - if (WARN_ON(!ib)) - return -EFAULT; + BUG_ON(!pm || !buffer || !ib); - packet = (struct pm4_mes_runlist *)buffer; + packet = (struct pm4_runlist *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_runlist)); - packet->header.u32All = build_pm4_header(IT_RUN_LIST, - sizeof(struct pm4_mes_runlist)); + memset(buffer, 0, sizeof(struct pm4_runlist)); + packet->header.u32all = build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_runlist)); packet->bitfields4.ib_size = ib_size_in_dwords; packet->bitfields4.chain = chain ? 1 : 0; @@ -139,16 +144,20 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd) { - struct pm4_mes_map_process *packet; + struct pm4_map_process *packet; struct queue *cur; uint32_t num_queues; - packet = (struct pm4_mes_map_process *)buffer; + BUG_ON(!pm || !buffer || !qpd); + + packet = (struct pm4_map_process *)buffer; + + pr_debug("kfd: In func %s\n", __func__); - memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + memset(buffer, 0, sizeof(struct pm4_map_process)); - packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, - sizeof(struct pm4_mes_map_process)); + packet->header.u32all = build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_map_process)); packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 1; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -174,17 +183,21 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, +static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static) { struct pm4_mes_map_queues *packet; bool use_static = is_static; + BUG_ON(!pm || !buffer || !q); + + pr_debug("kfd: In func %s\n", __func__); + packet = (struct pm4_mes_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + memset(buffer, 0, sizeof(struct pm4_map_queues)); packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_mes_map_queues)); + sizeof(struct pm4_map_queues)); packet->bitfields2.alloc_format = alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; @@ -212,8 +225,10 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, use_static = false; /* no static queues under SDMA */ break; default: - WARN(1, "queue type %d", q->properties.type); - return -EINVAL; + pr_err("kfd: in %s queue type %d\n", __func__, + q->properties.type); + BUG(); + break; } packet->bitfields3.doorbell_offset = q->properties.doorbell_off; @@ -233,6 +248,68 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, return 0; } +static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_map_queues *packet; + bool use_static = is_static; + + BUG_ON(!pm || !buffer || !q); + + pr_debug("kfd: In func %s\n", __func__); + + packet = (struct pm4_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_map_queues)); + + packet->header.u32all = build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots; + + packet->bitfields2.vidmem = (q->properties.is_interop) ? + vidmem__mes_map_queues__uses_video_memory : + vidmem__mes_map_queues__uses_no_video_memory; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__sdma0; + use_static = false; /* no static queues under SDMA */ + break; + default: + BUG(); + break; + } + + packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mes_map_queues_ordinals[0].bitfields3.is_static = + (use_static) ? 1 : 0; + + packet->mes_map_queues_ordinals[0].mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mes_map_queues_ordinals[0].mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->mes_map_queues_ordinals[0].wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->mes_map_queues_ordinals[0].wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + static int pm_create_runlist_ib(struct packet_manager *pm, struct list_head *queues, uint64_t *rl_gpu_addr, @@ -247,16 +324,19 @@ static int pm_create_runlist_ib(struct packet_manager *pm, struct kernel_queue *kq; bool is_over_subscription; + BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr); + rl_wptr = retval = proccesses_mapped = 0; retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, &alloc_size_bytes, &is_over_subscription); - if (retval) + if (retval != 0) return retval; *rl_size_bytes = alloc_size_bytes; - pr_debug("Building runlist ib process count: %d queues count %d\n", + pr_debug("kfd: In func %s\n", __func__); + pr_debug("kfd: building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count, pm->dqm->queue_count); /* build the run list ib packet */ @@ -264,35 +344,42 @@ static int pm_create_runlist_ib(struct packet_manager *pm, qpd = cur->qpd; /* build map process packet */ if (proccesses_mapped >= pm->dqm->processes_count) { - pr_debug("Not enough space left in runlist IB\n"); + pr_debug("kfd: not enough space left in runlist IB\n"); pm_release_ib(pm); return -ENOMEM; } retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); - if (retval) + if (retval != 0) return retval; proccesses_mapped++; - inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), + inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), alloc_size_bytes); list_for_each_entry(kq, &qpd->priv_queue_list, list) { if (!kq->queue->properties.is_active) continue; - pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", + pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + if (pm->dqm->dev->device_info->asic_family == + CHIP_CARRIZO) + retval = pm_create_map_queue_vi(pm, + &rl_buffer[rl_wptr], + kq->queue, + qpd->is_debug); + else + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], kq->queue, qpd->is_debug); - if (retval) + if (retval != 0) return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + sizeof(struct pm4_map_queues), alloc_size_bytes); } @@ -300,44 +387,51 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (!q->properties.is_active) continue; - pr_debug("static_queue, mapping user queue %d, is debug status %d\n", + pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + if (pm->dqm->dev->device_info->asic_family == + CHIP_CARRIZO) + retval = pm_create_map_queue_vi(pm, + &rl_buffer[rl_wptr], + q, + qpd->is_debug); + else + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], q, qpd->is_debug); - if (retval) + if (retval != 0) return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + sizeof(struct pm4_map_queues), alloc_size_bytes); } } - pr_debug("Finished map process and queues to runlist\n"); + pr_debug("kfd: finished map process and queues to runlist\n"); if (is_over_subscription) - retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], - *rl_gpu_addr, - alloc_size_bytes / sizeof(uint32_t), - true); + pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, + alloc_size_bytes / sizeof(uint32_t), true); for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]); pr_debug("\n"); - return retval; + return 0; } int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { + BUG_ON(!dqm); + pm->dqm = dqm; mutex_init(&pm->lock); pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); - if (!pm->priv_queue) { + if (pm->priv_queue == NULL) { mutex_destroy(&pm->lock); return -ENOMEM; } @@ -348,6 +442,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) void pm_uninit(struct packet_manager *pm) { + BUG_ON(!pm); + mutex_destroy(&pm->lock); kernel_queue_uninit(pm->priv_queue); } @@ -355,22 +451,25 @@ void pm_uninit(struct packet_manager *pm) int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res) { - struct pm4_mes_set_resources *packet; - int retval = 0; + struct pm4_set_resources *packet; + + BUG_ON(!pm || !res); + + pr_debug("kfd: In func %s\n", __func__); mutex_lock(&pm->lock); pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, sizeof(*packet) / sizeof(uint32_t), - (unsigned int **)&packet); - if (!packet) { - pr_err("Failed to allocate buffer on kernel queue\n"); - retval = -ENOMEM; - goto out; + (unsigned int **)&packet); + if (packet == NULL) { + mutex_unlock(&pm->lock); + pr_err("kfd: failed to allocate buffer on kernel queue\n"); + return -ENOMEM; } - memset(packet, 0, sizeof(struct pm4_mes_set_resources)); - packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, - sizeof(struct pm4_mes_set_resources)); + memset(packet, 0, sizeof(struct pm4_set_resources)); + packet->header.u32all = build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_set_resources)); packet->bitfields2.queue_type = queue_type__mes_set_resources__hsa_interface_queue_hiq; @@ -388,10 +487,9 @@ int pm_send_set_resources(struct packet_manager *pm, pm->priv_queue->ops.submit_packet(pm->priv_queue); -out: mutex_unlock(&pm->lock); - return retval; + return 0; } int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) @@ -401,24 +499,26 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) size_t rl_ib_size, packet_size_dwords; int retval; + BUG_ON(!pm || !dqm_queues); + retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, &rl_ib_size); - if (retval) + if (retval != 0) goto fail_create_runlist_ib; - pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); + pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); + packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer); - if (retval) + if (retval != 0) goto fail_acquire_packet_buffer; retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false); - if (retval) + if (retval != 0) goto fail_create_runlist; pm->priv_queue->ops.submit_packet(pm->priv_queue); @@ -432,7 +532,8 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) fail_acquire_packet_buffer: mutex_unlock(&pm->lock); fail_create_runlist_ib: - pm_release_ib(pm); + if (pm->allocated) + pm_release_ib(pm); return retval; } @@ -440,21 +541,20 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value) { int retval; - struct pm4_mes_query_status *packet; + struct pm4_query_status *packet; - if (WARN_ON(!fence_address)) - return -EFAULT; + BUG_ON(!pm || !fence_address); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, - sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), + sizeof(struct pm4_query_status) / sizeof(uint32_t), (unsigned int **)&packet); - if (retval) + if (retval != 0) goto fail_acquire_packet_buffer; - packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, - sizeof(struct pm4_mes_query_status)); + packet->header.u32all = build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_query_status)); packet->bitfields2.context_id = 0; packet->bitfields2.interrupt_sel = @@ -468,6 +568,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, packet->data_lo = lower_32_bits((uint64_t)fence_value); pm->priv_queue->ops.submit_packet(pm->priv_queue); + mutex_unlock(&pm->lock); + + return 0; fail_acquire_packet_buffer: mutex_unlock(&pm->lock); @@ -481,22 +584,24 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, { int retval; uint32_t *buffer; - struct pm4_mes_unmap_queues *packet; + struct pm4_unmap_queues *packet; + + BUG_ON(!pm); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, - sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), + sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), &buffer); - if (retval) + if (retval != 0) goto err_acquire_packet_buffer; - packet = (struct pm4_mes_unmap_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", + packet = (struct pm4_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_unmap_queues)); + pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", mode, reset, type); - packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, - sizeof(struct pm4_mes_unmap_queues)); + packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_unmap_queues)); switch (type) { case KFD_QUEUE_TYPE_COMPUTE: case KFD_QUEUE_TYPE_DIQ: @@ -508,9 +613,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; default: - WARN(1, "queue type %d", type); - retval = -EINVAL; - goto err_invalid; + BUG(); + break; } if (reset) @@ -534,17 +638,16 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, break; case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_queues; + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; break; case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: /* in this case, we do not preempt static queues */ packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; break; default: - WARN(1, "filter %d", mode); - retval = -EINVAL; - goto err_invalid; + BUG(); + break; } pm->priv_queue->ops.submit_packet(pm->priv_queue); @@ -552,8 +655,6 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, mutex_unlock(&pm->lock); return 0; -err_invalid: - pm->priv_queue->ops.rollback_packet(pm->priv_queue); err_acquire_packet_buffer: mutex_unlock(&pm->lock); return retval; @@ -561,6 +662,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm) { + BUG_ON(!pm); + mutex_lock(&pm->lock); if (pm->allocated) { kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 1e06de0..6cfe7f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -32,8 +32,7 @@ int kfd_pasid_init(void) { pasid_limit = KFD_MAX_NUM_OF_PROCESSES; - pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), - GFP_KERNEL); + pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); if (!pasid_bitmap) return -ENOMEM; @@ -92,6 +91,6 @@ unsigned int kfd_pasid_alloc(void) void kfd_pasid_free(unsigned int pasid) { - if (!WARN_ON(pasid == 0 || pasid >= pasid_limit)) - clear_bit(pasid, pasid_bitmap); + BUG_ON(pasid == 0 || pasid >= pasid_limit); + clear_bit(pasid, pasid_bitmap); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h index e50f73d..5b393f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h @@ -28,19 +28,112 @@ #define PM4_MES_HEADER_DEFINED union PM4_MES_TYPE_3_HEADER { struct { - /* reserved */ - uint32_t reserved1:8; - /* IT opcode */ - uint32_t opcode:8; - /* number of DWORDs - 1 in the information body */ - uint32_t count:14; - /* packet identifier. It should be 3 for type 3 packets */ - uint32_t type:2; + uint32_t reserved1:8; /* < reserved */ + uint32_t opcode:8; /* < IT opcode */ + uint32_t count:14; /* < number of DWORDs - 1 + * in the information body. + */ + uint32_t type:2; /* < packet identifier. + * It should be 3 for type 3 packets + */ }; uint32_t u32all; }; #endif /* PM4_MES_HEADER_DEFINED */ +/* --------------------MES_SET_RESOURCES-------------------- */ + +#ifndef PM4_MES_SET_RESOURCES_DEFINED +#define PM4_MES_SET_RESOURCES_DEFINED +enum set_resources_queue_type_enum { + queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, + queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, + queue_type__mes_set_resources__hsa_debug_interface_queue = 4 +}; + +struct pm4_set_resources { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t vmid_mask:16; + uint32_t unmap_latency:8; + uint32_t reserved1:5; + enum set_resources_queue_type_enum queue_type:3; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t queue_mask_lo; + uint32_t queue_mask_hi; + uint32_t gws_mask_lo; + uint32_t gws_mask_hi; + + union { + struct { + uint32_t oac_mask:16; + uint32_t reserved2:16; + } bitfields7; + uint32_t ordinal7; + }; + + union { + struct { + uint32_t gds_heap_base:6; + uint32_t reserved3:5; + uint32_t gds_heap_size:6; + uint32_t reserved4:15; + } bitfields8; + uint32_t ordinal8; + }; + +}; +#endif + +/*--------------------MES_RUN_LIST-------------------- */ + +#ifndef PM4_MES_RUN_LIST_DEFINED +#define PM4_MES_RUN_LIST_DEFINED + +struct pm4_runlist { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:2; + uint32_t ib_base_lo:30; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t ib_base_hi:16; + uint32_t reserved2:16; + } bitfields3; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; + uint32_t reserved3:1; + uint32_t valid:1; + uint32_t reserved4:8; + } bitfields4; + uint32_t ordinal4; + }; + +}; +#endif /*--------------------MES_MAP_PROCESS-------------------- */ @@ -93,58 +186,217 @@ struct pm4_map_process { }; #endif -#ifndef PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH -#define PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH +/*--------------------MES_MAP_QUEUES--------------------*/ + +#ifndef PM4_MES_MAP_QUEUES_DEFINED +#define PM4_MES_MAP_QUEUES_DEFINED +enum map_queues_queue_sel_enum { + queue_sel__mes_map_queues__map_to_specified_queue_slots = 0, + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1, + queue_sel__mes_map_queues__enable_process_queues = 2 +}; -struct pm4_map_process_scratch_kv { +enum map_queues_vidmem_enum { + vidmem__mes_map_queues__uses_no_video_memory = 0, + vidmem__mes_map_queues__uses_video_memory = 1 +}; + +enum map_queues_alloc_format_enum { + alloc_format__mes_map_queues__one_per_pipe = 0, + alloc_format__mes_map_queues__all_on_one_pipe = 1 +}; + +enum map_queues_engine_sel_enum { + engine_sel__mes_map_queues__compute = 0, + engine_sel__mes_map_queues__sdma0 = 2, + engine_sel__mes_map_queues__sdma1 = 3 +}; + +struct pm4_map_queues { union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; }; union { struct { - uint32_t pasid:16; - uint32_t reserved1:8; - uint32_t diq_enable:1; - uint32_t process_quantum:7; + uint32_t reserved1:4; + enum map_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:2; + uint32_t vmid:4; + uint32_t reserved3:4; + enum map_queues_vidmem_enum vidmem:2; + uint32_t reserved4:6; + enum map_queues_alloc_format_enum alloc_format:2; + enum map_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; } bitfields2; uint32_t ordinal2; }; + struct { + union { + struct { + uint32_t is_static:1; + uint32_t reserved5:1; + uint32_t doorbell_offset:21; + uint32_t reserved6:3; + uint32_t queue:6; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t mqd_addr_lo; + uint32_t mqd_addr_hi; + uint32_t wptr_addr_lo; + uint32_t wptr_addr_hi; + + } mes_map_queues_ordinals[1]; /* 1..N of these ordinal groups */ + +}; +#endif + +/*--------------------MES_QUERY_STATUS--------------------*/ + +#ifndef PM4_MES_QUERY_STATUS_DEFINED +#define PM4_MES_QUERY_STATUS_DEFINED +enum query_status_interrupt_sel_enum { + interrupt_sel__mes_query_status__completion_status = 0, + interrupt_sel__mes_query_status__process_status = 1, + interrupt_sel__mes_query_status__queue_status = 2 +}; + +enum query_status_command_enum { + command__mes_query_status__interrupt_only = 0, + command__mes_query_status__fence_only_immediate = 1, + command__mes_query_status__fence_only_after_write_ack = 2, + command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 +}; + +enum query_status_engine_sel_enum { + engine_sel__mes_query_status__compute = 0, + engine_sel__mes_query_status__sdma0_queue = 2, + engine_sel__mes_query_status__sdma1_queue = 3 +}; + +struct pm4_query_status { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + union { struct { - uint32_t page_table_base:28; - uint32_t reserved2:4; - } bitfields3; + uint32_t context_id:28; + enum query_status_interrupt_sel_enum interrupt_sel:2; + enum query_status_command_enum command:2; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:16; + } bitfields3a; + struct { + uint32_t reserved2:2; + uint32_t doorbell_offset:21; + uint32_t reserved3:3; + enum query_status_engine_sel_enum engine_sel:3; + uint32_t reserved4:3; + } bitfields3b; uint32_t ordinal3; }; - uint32_t reserved3; - uint32_t sh_mem_bases; - uint32_t sh_mem_config; - uint32_t sh_mem_ape1_base; - uint32_t sh_mem_ape1_limit; - uint32_t sh_hidden_private_base_vmid; - uint32_t reserved4; - uint32_t reserved5; - uint32_t gds_addr_lo; - uint32_t gds_addr_hi; + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data_lo; + uint32_t data_hi; +}; +#endif + +/*--------------------MES_UNMAP_QUEUES--------------------*/ + +#ifndef PM4_MES_UNMAP_QUEUES_DEFINED +#define PM4_MES_UNMAP_QUEUES_DEFINED +enum unmap_queues_action_enum { + action__mes_unmap_queues__preempt_queues = 0, + action__mes_unmap_queues__reset_queues = 1, + action__mes_unmap_queues__disable_process_queues = 2 +}; + +enum unmap_queues_queue_sel_enum { + queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2, + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3 +}; + +enum unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__compute = 0, + engine_sel__mes_unmap_queues__sdma0 = 2, + engine_sel__mes_unmap_queues__sdma1 = 3 +}; + +struct pm4_unmap_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + enum unmap_queues_action_enum action:2; + uint32_t reserved1:2; + enum unmap_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:20; + enum unmap_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved3:16; + } bitfields3a; + struct { + uint32_t reserved4:2; + uint32_t doorbell_offset0:21; + uint32_t reserved5:9; + } bitfields3b; + uint32_t ordinal3; + }; union { struct { - uint32_t num_gws:6; uint32_t reserved6:2; - uint32_t num_oac:4; - uint32_t reserved7:4; - uint32_t gds_size:6; - uint32_t num_queues:10; - } bitfields14; - uint32_t ordinal14; + uint32_t doorbell_offset1:21; + uint32_t reserved7:9; + } bitfields4; + uint32_t ordinal4; + }; + + union { + struct { + uint32_t reserved8:2; + uint32_t doorbell_offset2:21; + uint32_t reserved9:9; + } bitfields5; + uint32_t ordinal5; + }; + + union { + struct { + uint32_t reserved10:2; + uint32_t doorbell_offset3:21; + uint32_t reserved11:9; + } bitfields6; + uint32_t ordinal6; }; - uint32_t completion_signal_lo32; -uint32_t completion_signal_hi32; }; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h index 7c8d9b3..08c7219 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -30,12 +30,10 @@ union PM4_MES_TYPE_3_HEADER { struct { uint32_t reserved1 : 8; /* < reserved */ uint32_t opcode : 8; /* < IT opcode */ - uint32_t count : 14;/* < Number of DWORDS - 1 in the - * information body - */ - uint32_t type : 2; /* < packet identifier - * It should be 3 for type 3 packets - */ + uint32_t count : 14;/* < number of DWORDs - 1 in the + information body. */ + uint32_t type : 2; /* < packet identifier. + It should be 3 for type 3 packets */ }; uint32_t u32All; }; @@ -126,10 +124,9 @@ struct pm4_mes_runlist { uint32_t ib_size:20; uint32_t chain:1; uint32_t offload_polling:1; - uint32_t reserved2:1; + uint32_t reserved3:1; uint32_t valid:1; - uint32_t process_cnt:4; - uint32_t reserved3:4; + uint32_t reserved4:8; } bitfields4; uint32_t ordinal4; }; @@ -144,8 +141,8 @@ struct pm4_mes_runlist { struct pm4_mes_map_process { union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; }; union { @@ -156,48 +153,36 @@ struct pm4_mes_map_process { uint32_t process_quantum:7; } bitfields2; uint32_t ordinal2; - }; +}; union { struct { uint32_t page_table_base:28; - uint32_t reserved3:4; + uint32_t reserved2:4; } bitfields3; uint32_t ordinal3; }; - uint32_t reserved; - uint32_t sh_mem_bases; - uint32_t sh_mem_config; uint32_t sh_mem_ape1_base; uint32_t sh_mem_ape1_limit; - - uint32_t sh_hidden_private_base_vmid; - - uint32_t reserved2; - uint32_t reserved3; - + uint32_t sh_mem_config; uint32_t gds_addr_lo; uint32_t gds_addr_hi; union { struct { uint32_t num_gws:6; - uint32_t reserved4:2; + uint32_t reserved3:2; uint32_t num_oac:4; - uint32_t reserved5:4; + uint32_t reserved4:4; uint32_t gds_size:6; uint32_t num_queues:10; } bitfields10; uint32_t ordinal10; }; - uint32_t completion_signal_lo; - uint32_t completion_signal_hi; - }; - #endif /*--------------------MES_MAP_QUEUES--------------------*/ @@ -350,7 +335,7 @@ enum mes_unmap_queues_engine_sel_enum { engine_sel__mes_unmap_queues__sdmal = 3 }; -struct pm4_mes_unmap_queues { +struct PM4_MES_UNMAP_QUEUES { union { union PM4_MES_TYPE_3_HEADER header; /* header */ uint32_t ordinal1; @@ -410,101 +395,4 @@ struct pm4_mes_unmap_queues { }; #endif -#ifndef PM4_MEC_RELEASE_MEM_DEFINED -#define PM4_MEC_RELEASE_MEM_DEFINED -enum RELEASE_MEM_event_index_enum { - event_index___release_mem__end_of_pipe = 5, - event_index___release_mem__shader_done = 6 -}; - -enum RELEASE_MEM_cache_policy_enum { - cache_policy___release_mem__lru = 0, - cache_policy___release_mem__stream = 1, - cache_policy___release_mem__bypass = 2 -}; - -enum RELEASE_MEM_dst_sel_enum { - dst_sel___release_mem__memory_controller = 0, - dst_sel___release_mem__tc_l2 = 1, - dst_sel___release_mem__queue_write_pointer_register = 2, - dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 -}; - -enum RELEASE_MEM_int_sel_enum { - int_sel___release_mem__none = 0, - int_sel___release_mem__send_interrupt_only = 1, - int_sel___release_mem__send_interrupt_after_write_confirm = 2, - int_sel___release_mem__send_data_after_write_confirm = 3 -}; - -enum RELEASE_MEM_data_sel_enum { - data_sel___release_mem__none = 0, - data_sel___release_mem__send_32_bit_low = 1, - data_sel___release_mem__send_64_bit_data = 2, - data_sel___release_mem__send_gpu_clock_counter = 3, - data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, - data_sel___release_mem__store_gds_data_to_memory = 5 -}; - -struct pm4_mec_release_mem { - union { - union PM4_MES_TYPE_3_HEADER header; /*header */ - unsigned int ordinal1; - }; - - union { - struct { - unsigned int event_type:6; - unsigned int reserved1:2; - enum RELEASE_MEM_event_index_enum event_index:4; - unsigned int tcl1_vol_action_ena:1; - unsigned int tc_vol_action_ena:1; - unsigned int reserved2:1; - unsigned int tc_wb_action_ena:1; - unsigned int tcl1_action_ena:1; - unsigned int tc_action_ena:1; - unsigned int reserved3:6; - unsigned int atc:1; - enum RELEASE_MEM_cache_policy_enum cache_policy:2; - unsigned int reserved4:5; - } bitfields2; - unsigned int ordinal2; - }; - - union { - struct { - unsigned int reserved5:16; - enum RELEASE_MEM_dst_sel_enum dst_sel:2; - unsigned int reserved6:6; - enum RELEASE_MEM_int_sel_enum int_sel:3; - unsigned int reserved7:2; - enum RELEASE_MEM_data_sel_enum data_sel:3; - } bitfields3; - unsigned int ordinal3; - }; - - union { - struct { - unsigned int reserved8:2; - unsigned int address_lo_32b:30; - } bitfields4; - struct { - unsigned int reserved9:3; - unsigned int address_lo_64b:29; - } bitfields5; - unsigned int ordinal4; - }; - - unsigned int address_hi; - - unsigned int data_lo; - - unsigned int data_hi; -}; -#endif - -enum { - CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 -}; - #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b397ec7..4750cab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -239,6 +239,11 @@ enum kfd_preempt_type_filter { KFD_PREEMPT_TYPE_FILTER_BY_PASID }; +enum kfd_preempt_type { + KFD_PREEMPT_TYPE_WAVEFRONT, + KFD_PREEMPT_TYPE_WAVEFRONT_RESET +}; + /** * enum kfd_queue_type * @@ -289,13 +294,13 @@ enum kfd_queue_format { * @write_ptr: Defines the number of dwords written to the ring buffer. * * @doorbell_ptr: This field aim is to notify the H/W of new packet written to - * the queue ring buffer. This field should be similar to write_ptr and the - * user should update this field after he updated the write_ptr. + * the queue ring buffer. This field should be similar to write_ptr and the user + * should update this field after he updated the write_ptr. * * @doorbell_off: The doorbell offset in the doorbell pci-bar. * - * @is_interop: Defines if this is a interop queue. Interop queue means that - * the queue can access both graphics and compute resources. + * @is_interop: Defines if this is a interop queue. Interop queue means that the + * queue can access both graphics and compute resources. * * @is_active: Defines if the queue is active or not. * @@ -347,10 +352,9 @@ struct queue_properties { * @properties: The queue properties. * * @mec: Used only in no cp scheduling mode and identifies to micro engine id - * that the queue should be execute on. + * that the queue should be execute on. * - * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe - * id. + * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. * * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. * @@ -432,7 +436,6 @@ struct qcm_process_device { uint32_t gds_size; uint32_t num_gws; uint32_t num_oac; - uint32_t sh_hidden_private_base; }; /* Data that is per-process-per device. */ @@ -517,8 +520,8 @@ struct kfd_process { struct mutex event_mutex; /* All events in process hashed by ID, linked on kfd_event.events. */ DECLARE_HASHTABLE(events, 4); - /* struct slot_page_header.event_pages */ - struct list_head signal_event_pages; + struct list_head signal_event_pages; /* struct slot_page_header. + event_pages */ u32 next_nonsignal_event_id; size_t signal_event_count; }; @@ -556,10 +559,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); /* Process device data iterator */ -struct kfd_process_device *kfd_get_first_process_device_data( - struct kfd_process *p); -struct kfd_process_device *kfd_get_next_process_device_data( - struct kfd_process *p, +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd); bool kfd_has_process_device_data(struct kfd_process *p); @@ -572,8 +573,7 @@ unsigned int kfd_pasid_alloc(void); void kfd_pasid_free(unsigned int pasid); /* Doorbells */ -int kfd_doorbell_init(struct kfd_dev *kfd); -void kfd_doorbell_fini(struct kfd_dev *kfd); +void kfd_doorbell_init(struct kfd_dev *kfd); int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index c74cf22..035bbc9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -79,7 +79,9 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) { struct kfd_process *process; - if (!thread->mm) + BUG_ON(!kfd_process_wq); + + if (thread->mm == NULL) return ERR_PTR(-EINVAL); /* Only the pthreads threading model is supported. */ @@ -99,7 +101,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) /* A prior open of /dev/kfd could have already created the process. */ process = find_process(thread); if (process) - pr_debug("Process already found\n"); + pr_debug("kfd: process already found\n"); if (!process) process = create_process(thread); @@ -115,7 +117,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) { struct kfd_process *process; - if (!thread->mm) + if (thread->mm == NULL) return ERR_PTR(-EINVAL); /* Only the pthreads threading model is supported. */ @@ -200,8 +202,10 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) struct kfd_process_release_work *work; struct kfd_process *p; + BUG_ON(!kfd_process_wq); + p = container_of(rcu, struct kfd_process, rcu); - WARN_ON(atomic_read(&p->mm->mm_count) <= 0); + BUG_ON(atomic_read(&p->mm->mm_count) <= 0); mmdrop(p->mm); @@ -225,8 +229,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, * mmu_notifier srcu is read locked */ p = container_of(mn, struct kfd_process, mmu_notifier); - if (WARN_ON(p->mm != mm)) - return; + BUG_ON(p->mm != mm); mutex_lock(&kfd_processes_mutex); hash_del_rcu(&p->kfd_processes); @@ -247,7 +250,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, kfd_dbgmgr_destroy(pdd->dev->dbgmgr); if (pdd->reset_wavefronts) { - pr_warn("Resetting all wave fronts\n"); + pr_warn("amdkfd: Resetting all wave fronts\n"); dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; } @@ -404,6 +407,8 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) struct kfd_process *p; struct kfd_process_device *pdd; + BUG_ON(dev == NULL); + /* * Look for the process that matches the pasid. If there is no such * process, we either released it in amdkfd's own notifier, or there @@ -444,16 +449,14 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) mutex_unlock(&p->mutex); } -struct kfd_process_device *kfd_get_first_process_device_data( - struct kfd_process *p) +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) { return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list); } -struct kfd_process_device *kfd_get_next_process_device_data( - struct kfd_process *p, +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd) { if (list_is_last(&pdd->per_device_list, &p->per_device_data)) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 4142d63..97f7f7e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -32,9 +32,12 @@ static inline struct process_queue_node *get_queue_by_qid( { struct process_queue_node *pqn; + BUG_ON(!pqm); + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { - if ((pqn->q && pqn->q->properties.queue_id == qid) || - (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) + if (pqn->q && pqn->q->properties.queue_id == qid) + return pqn; + if (pqn->kq && pqn->kq->queue->properties.queue_id == qid) return pqn; } @@ -46,13 +49,17 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, { unsigned long found; + BUG_ON(!pqm || !qid); + + pr_debug("kfd: in %s\n", __func__); + found = find_first_zero_bit(pqm->queue_slot_bitmap, KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); - pr_debug("The new slot id %lu\n", found); + pr_debug("kfd: the new slot id %lu\n", found); if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { - pr_info("Cannot open more queues for process with pasid %d\n", + pr_info("amdkfd: Can not open more queues for process with pasid %d\n", pqm->process->pasid); return -ENOMEM; } @@ -65,11 +72,13 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { + BUG_ON(!pqm); + INIT_LIST_HEAD(&pqm->queues); pqm->queue_slot_bitmap = kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_BYTE), GFP_KERNEL); - if (!pqm->queue_slot_bitmap) + if (pqm->queue_slot_bitmap == NULL) return -ENOMEM; pqm->process = p; @@ -81,6 +90,10 @@ void pqm_uninit(struct process_queue_manager *pqm) int retval; struct process_queue_node *pqn, *next; + BUG_ON(!pqm); + + pr_debug("In func %s\n", __func__); + list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { retval = pqm_destroy_queue( pqm, @@ -89,7 +102,7 @@ void pqm_uninit(struct process_queue_manager *pqm) pqn->kq->queue->properties.queue_id); if (retval != 0) { - pr_err("failed to destroy queue\n"); + pr_err("kfd: failed to destroy queue\n"); return; } } @@ -104,6 +117,8 @@ static int create_cp_queue(struct process_queue_manager *pqm, { int retval; + retval = 0; + /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; @@ -116,13 +131,16 @@ static int create_cp_queue(struct process_queue_manager *pqm, retval = init_queue(q, q_properties); if (retval != 0) - return retval; + goto err_init_queue; (*q)->device = dev; (*q)->process = pqm->process; - pr_debug("PQM After init queue"); + pr_debug("kfd: PQM After init queue"); + + return retval; +err_init_queue: return retval; } @@ -143,6 +161,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, int num_queues = 0; struct queue *cur; + BUG_ON(!pqm || !dev || !properties || !qid); + memset(&q_properties, 0, sizeof(struct queue_properties)); memcpy(&q_properties, properties, sizeof(struct queue_properties)); q = NULL; @@ -165,7 +185,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, list_for_each_entry(cur, &pdd->qpd.queues_list, list) num_queues++; if (num_queues >= dev->device_info->max_no_of_hqd/2) - return -ENOSPC; + return (-ENOSPC); } retval = find_available_queue_slot(pqm, qid); @@ -177,7 +197,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); } - pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); + pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); if (!pqn) { retval = -ENOMEM; goto err_allocate_pqn; @@ -208,7 +228,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= VMID_PER_DEVICE) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { - pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); + pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); retval = -EPERM; goto err_create_queue; } @@ -225,7 +245,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, break; case KFD_QUEUE_TYPE_DIQ: kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); - if (!kq) { + if (kq == NULL) { retval = -ENOMEM; goto err_create_queue; } @@ -236,22 +256,22 @@ int pqm_create_queue(struct process_queue_manager *pqm, kq, &pdd->qpd); break; default: - WARN(1, "Invalid queue type %d", type); - retval = -EINVAL; + BUG(); + break; } if (retval != 0) { - pr_err("DQM create queue failed\n"); + pr_debug("Error dqm create queue\n"); goto err_create_queue; } - pr_debug("PQM After DQM create queue\n"); + pr_debug("kfd: PQM After DQM create queue\n"); list_add(&pqn->process_queue_list, &pqm->queues); if (q) { *properties = q->properties; - pr_debug("PQM done creating queue\n"); + pr_debug("kfd: PQM done creating queue\n"); print_queue_properties(properties); } @@ -277,11 +297,14 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = NULL; + BUG_ON(!pqm); retval = 0; + pr_debug("kfd: In Func %s\n", __func__); + pqn = get_queue_by_qid(pqm, qid); - if (!pqn) { - pr_err("Queue id does not match any known queue\n"); + if (pqn == NULL) { + pr_err("kfd: queue id does not match any known queue\n"); return -EINVAL; } @@ -290,8 +313,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dev = pqn->kq->dev; if (pqn->q) dev = pqn->q->device; - if (WARN_ON(!dev)) - return -ENODEV; + BUG_ON(!dev); pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { @@ -331,9 +353,12 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, int retval; struct process_queue_node *pqn; + BUG_ON(!pqm); + pqn = get_queue_by_qid(pqm, qid); if (!pqn) { - pr_debug("No queue %d exists for update operation\n", qid); + pr_debug("amdkfd: No queue %d exists for update operation\n", + qid); return -EFAULT; } @@ -356,6 +381,8 @@ struct kernel_queue *pqm_get_kernel_queue( { struct process_queue_node *pqn; + BUG_ON(!pqm); + pqn = get_queue_by_qid(pqm, qid); if (pqn && pqn->kq) return pqn->kq; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a5315d4..0ab1970 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -65,15 +65,17 @@ void print_queue(struct queue *q) int init_queue(struct queue **q, const struct queue_properties *properties) { - struct queue *tmp_q; + struct queue *tmp; - tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL); - if (!tmp_q) + BUG_ON(!q); + + tmp = kzalloc(sizeof(struct queue), GFP_KERNEL); + if (!tmp) return -ENOMEM; - memcpy(&tmp_q->properties, properties, sizeof(*properties)); + memcpy(&tmp->properties, properties, sizeof(struct queue_properties)); - *q = tmp_q; + *q = tmp; return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e0b78fd..8c6e47c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -108,6 +108,9 @@ static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { + BUG_ON(!dev); + BUG_ON(!cu); + dev->node_props.cpu_cores_count = cu->num_cpu_cores; dev->node_props.cpu_core_id_base = cu->processor_id_low; if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) @@ -120,6 +123,9 @@ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { + BUG_ON(!dev); + BUG_ON(!cu); + dev->node_props.simd_id_base = cu->processor_id_low; dev->node_props.simd_count = cu->num_simd_cores; dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; @@ -142,6 +148,8 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) struct kfd_topology_device *dev; int i = 0; + BUG_ON(!cu); + pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", cu->proximity_domain, cu->hsa_capability); list_for_each_entry(dev, &topology_device_list, list) { @@ -169,6 +177,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) struct kfd_topology_device *dev; int i = 0; + BUG_ON(!mem); + pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", mem->promixity_domain); list_for_each_entry(dev, &topology_device_list, list) { @@ -213,6 +223,8 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) struct kfd_topology_device *dev; uint32_t id; + BUG_ON(!cache); + id = cache->processor_id_low; pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); @@ -262,6 +274,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) uint32_t id_from; uint32_t id_to; + BUG_ON(!iolink); + id_from = iolink->proximity_domain_from; id_to = iolink->proximity_domain_to; @@ -309,6 +323,8 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) struct crat_subtype_iolink *iolink; int ret = 0; + BUG_ON(!sub_type_hdr); + switch (sub_type_hdr->type) { case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: cu = (struct crat_subtype_computeunit *)sub_type_hdr; @@ -352,6 +368,8 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + BUG_ON(!dev); + list_del(&dev->list); while (dev->mem_props.next != &dev->mem_props) { @@ -398,7 +416,7 @@ static struct kfd_topology_device *kfd_create_topology_device(void) struct kfd_topology_device *dev; dev = kfd_alloc_struct(dev); - if (!dev) { + if (dev == NULL) { pr_err("No memory to allocate a topology device"); return NULL; } @@ -657,7 +675,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.simd_count); if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_info_once("mem_banks_count truncated from %d to %d\n", + pr_info_once("kfd: mem_banks_count truncated from %d to %d\n", dev->node_props.mem_banks_count, dev->mem_bank_count); sysfs_show_32bit_prop(buffer, "mem_banks_count", @@ -755,6 +773,8 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; + BUG_ON(!dev); + if (dev->kobj_iolink) { list_for_each_entry(iolink, &dev->io_link_props, list) if (iolink->kobj) { @@ -809,12 +829,12 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, int ret; uint32_t i; - if (WARN_ON(dev->kobj_node)) - return -EEXIST; + BUG_ON(!dev); /* * Creating the sysfs folders */ + BUG_ON(dev->kobj_node); dev->kobj_node = kfd_alloc_struct(dev->kobj_node); if (!dev->kobj_node) return -ENOMEM; @@ -947,7 +967,7 @@ static int kfd_topology_update_sysfs(void) int ret; pr_info("Creating topology SYSFS entries\n"); - if (!sys_props.kobj_topology) { + if (sys_props.kobj_topology == NULL) { sys_props.kobj_topology = kfd_alloc_struct(sys_props.kobj_topology); if (!sys_props.kobj_topology) @@ -1107,8 +1127,10 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; + BUG_ON(!gpu); + list_for_each_entry(dev, &topology_device_list, list) - if (!dev->gpu && (dev->node_props.simd_count > 0)) { + if (dev->gpu == NULL && dev->node_props.simd_count > 0) { dev->gpu = gpu; out_dev = dev; break; @@ -1131,9 +1153,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu) struct kfd_topology_device *dev; int res; + BUG_ON(!gpu); + gpu_id = kfd_generate_gpu_id(gpu); - pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id); down_write(&topology_lock); /* @@ -1156,8 +1180,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * GPU vBIOS */ - /* Update the SYSFS tree, since we added another topology - * device + /* + * Update the SYSFS tree, since we added another topology device */ if (kfd_topology_update_sysfs() < 0) kfd_topology_release_sysfs(); @@ -1176,7 +1200,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; - pr_info("Adding doorbell packet type capability\n"); + pr_info("amdkfd: adding doorbell packet type capability\n"); } res = 0; @@ -1196,6 +1220,8 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) uint32_t gpu_id; int res = -ENODEV; + BUG_ON(!gpu); + down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 94277cb..36f3766 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -41,11 +41,6 @@ struct kgd_dev; struct kgd_mem; -enum kfd_preempt_type { - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, - KFD_PREEMPT_TYPE_WAVEFRONT_RESET, -}; - enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, @@ -87,17 +82,6 @@ struct kgd2kfd_shared_resources { size_t doorbell_start_offset; }; -struct tile_config { - uint32_t *tile_config_ptr; - uint32_t *macro_tile_config_ptr; - uint32_t num_tile_configs; - uint32_t num_macro_tile_configs; - - uint32_t gb_addr_config; - uint32_t num_banks; - uint32_t num_ranks; -}; - /** * struct kfd2kgd_calls * @@ -139,11 +123,6 @@ struct tile_config { * * @get_fw_version: Returns FW versions from the header * - * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID. - * Only used for no cp scheduling mode - * - * @get_tile_config: Returns GPU-specific tiling mode information - * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -174,16 +153,14 @@ struct kfd2kgd_calls { int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); + uint32_t queue_id, uint32_t __user *wptr); int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); - int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, + int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); @@ -215,9 +192,6 @@ struct kfd2kgd_calls { uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); - void (*set_scratch_backing_va)(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); - int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); }; /** diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index f6578c9..a2ab6dc 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -75,14 +75,12 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); + uint32_t queue_id, uint32_t __user *wptr); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -484,9 +482,7 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) + uint32_t queue_id, uint32_t __user *wptr) { uint32_t wptr_shadow, is_wptr_shadow_valid; struct cik_mqd *m; @@ -640,7 +636,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { @@ -789,8 +785,7 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset) { - return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset] - / 4; + return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f7015aa..5bb2b45 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -232,34 +232,6 @@ struct kfd_ioctl_wait_events_args { __u32 wait_result; /* from KFD */ }; -struct kfd_ioctl_set_scratch_backing_va_args { - __u64 va_addr; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - -struct kfd_ioctl_get_tile_config_args { - /* to KFD: pointer to tile array */ - __u64 tile_config_ptr; - /* to KFD: pointer to macro tile array */ - __u64 macro_tile_config_ptr; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_tile_configs; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_macro_tile_configs; - - __u32 gpu_id; /* to KFD */ - __u32 gb_addr_config; /* from KFD */ - __u32 num_banks; /* from KFD */ - __u32 num_ranks; /* from KFD */ - /* struct size can be extended later if needed - * without breaking ABI compatibility - */ -}; #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) @@ -315,13 +287,7 @@ struct kfd_ioctl_get_tile_config_args { #define AMDKFD_IOC_DBG_WAVE_CONTROL \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) -#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ - AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) - -#define AMDKFD_IOC_GET_TILE_CONFIG \ - AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) - #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x13 +#define AMDKFD_COMMAND_END 0x11 #endif -- 2.7.4