diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1121-drm-amdkfd-revert-kfd-part-to-a-previous-state.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1121-drm-amdkfd-revert-kfd-part-to-a-previous-state.patch | 5722 |
1 files changed, 5722 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1121-drm-amdkfd-revert-kfd-part-to-a-previous-state.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1121-drm-amdkfd-revert-kfd-part-to-a-previous-state.patch new file mode 100644 index 00000000..becf2699 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1121-drm-amdkfd-revert-kfd-part-to-a-previous-state.patch @@ -0,0 +1,5722 @@ +From e00ab6ceb6cc2d43225b57f76f63994bd4d49054 Mon Sep 17 00:00:00 2001 +From: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +Date: Wed, 17 Oct 2018 19:04:35 +0530 +Subject: [PATCH 1121/4131] drm/amdkfd: revert kfd part to a previous state + +Revert following files to "2ba6b00 drm/amd/powerplay: add profile mode for vega10.": + + - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd* + - drivers/gpu/drm/amd/amdkfd/* + - drivers/gpu/drm/amd/include/kgd_kfd_interface.h + - include/uapi/linux/kfd_ioctl.h + - drivers/gpu/drm/radeon/radeon_kfd* + +Due to upstream, porting kfd patches to 4.13 all-open has many conflicts. +It's hard to elegantly fix these conflicts. So we revert the kfd part to a +previous commit, where we began to first port dkms patches in 4.12 hybrid. +Then sequentially port all kfd patches. + +Signed-off-by: Le.Ma <Le.Ma@amd.com> +Acked-by: Junwei Zhang <Jerry.Zhang@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +Signed-off-by: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 44 +-- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 18 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 189 ++---------- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 163 ++-------- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 183 ++++-------- + drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 102 ++++--- + drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 21 +- + drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 27 +- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 123 ++++---- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 316 ++++++++++++-------- + .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 8 +- + .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 8 +- + drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 40 ++- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 33 ++- + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 63 ++-- + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 10 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 62 ++-- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 46 ++- + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 289 ++++++++++++------ + drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 7 +- + drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h | 330 ++++++++++++++++++--- + drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h | 140 +-------- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 32 +- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 25 +- + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 71 +++-- + drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 12 +- + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 46 ++- + drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 30 +- + drivers/gpu/drm/radeon/radeon_kfd.c | 15 +- + include/uapi/linux/kfd_ioctl.h | 36 +-- + 33 files changed, 1261 insertions(+), 1235 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +index f7fa767..7ec1915 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -27,15 +27,16 @@ + #include "amdgpu_gfx.h" + #include <linux/module.h> + ++const struct kfd2kgd_calls *kfd2kgd; + const struct kgd2kfd_calls *kgd2kfd; +-bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); ++bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + + int amdgpu_amdkfd_init(void) + { + int ret; + + #if defined(CONFIG_HSA_AMD_MODULE) +- int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); ++ int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + + kgd2kfd_init_p = symbol_request(kgd2kfd_init); + +@@ -60,21 +61,8 @@ int amdgpu_amdkfd_init(void) + return ret; + } + +-void amdgpu_amdkfd_fini(void) +-{ +- if (kgd2kfd) { +- kgd2kfd->exit(); +- symbol_put(kgd2kfd_init); +- } +-} +- +-void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) ++bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) + { +- const struct kfd2kgd_calls *kfd2kgd; +- +- if (!kgd2kfd) +- return; +- + switch (adev->asic_type) { + #ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_KAVERI: +@@ -85,12 +73,25 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) + kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); + break; + default: +- dev_info(adev->dev, "kfd not supported on this ASIC\n"); +- return; ++ return false; + } + +- adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, +- adev->pdev, kfd2kgd); ++ return true; ++} ++ ++void amdgpu_amdkfd_fini(void) ++{ ++ if (kgd2kfd) { ++ kgd2kfd->exit(); ++ symbol_put(kgd2kfd_init); ++ } ++} ++ ++void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) ++{ ++ if (kgd2kfd) ++ adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, ++ adev->pdev, kfd2kgd); + } + + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) +@@ -183,8 +184,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + return -ENOMEM; + + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, +- AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, +- &(*mem)->bo); ++ AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + if (r) { + dev_err(adev->dev, + "failed to allocate BO for amdkfd (%d)\n", r); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index 1ef486b..6d3a10b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -27,7 +27,6 @@ + + #include <linux/types.h> + #include <linux/mm.h> +-#include <linux/mmu_context.h> + #include <kgd_kfd_interface.h> + + struct amdgpu_device; +@@ -41,6 +40,8 @@ struct kgd_mem { + int amdgpu_amdkfd_init(void); + void amdgpu_amdkfd_fini(void); + ++bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev); ++ + void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); + int amdgpu_amdkfd_resume(struct amdgpu_device *adev); + void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, +@@ -62,19 +63,4 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); + + uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); + +-#define read_user_wptr(mmptr, wptr, dst) \ +- ({ \ +- bool valid = false; \ +- if ((mmptr) && (wptr)) { \ +- if ((mmptr) == current->mm) { \ +- valid = !get_user((dst), (wptr)); \ +- } else if (current->mm == NULL) { \ +- use_mm(mmptr); \ +- valid = !get_user((dst), (wptr)); \ +- unuse_mm(mmptr); \ +- } \ +- } \ +- valid; \ +- }) +- + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index bdabaa3..5748504 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -39,12 +39,6 @@ + #include "gmc/gmc_7_1_sh_mask.h" + #include "cik_structs.h" + +-enum hqd_dequeue_request_type { +- NO_ACTION = 0, +- DRAIN_PIPE, +- RESET_WAVES +-}; +- + enum { + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ + MAX_WATCH_ADDRESSES = 4 +@@ -102,15 +96,12 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, +- uint32_t wptr_shift, uint32_t wptr_mask, +- struct mm_struct *mm); ++ uint32_t queue_id, uint32_t __user *wptr); + static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); + static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +- enum kfd_preempt_type reset_type, ++static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); + static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +@@ -135,33 +126,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); + + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +-static void set_scratch_backing_va(struct kgd_dev *kgd, +- uint64_t va, uint32_t vmid); +- +-/* Because of REG_GET_FIELD() being used, we put this function in the +- * asic specific file. +- */ +-static int get_tile_config(struct kgd_dev *kgd, +- struct tile_config *config) +-{ +- struct amdgpu_device *adev = (struct amdgpu_device *)kgd; +- +- config->gb_addr_config = adev->gfx.config.gb_addr_config; +- config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, +- MC_ARB_RAMCFG, NOOFBANK); +- config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, +- MC_ARB_RAMCFG, NOOFRANKS); +- +- config->tile_config_ptr = adev->gfx.config.tile_mode_array; +- config->num_tile_configs = +- ARRAY_SIZE(adev->gfx.config.tile_mode_array); +- config->macro_tile_config_ptr = +- adev->gfx.config.macrotile_mode_array; +- config->num_macro_tile_configs = +- ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); +- +- return 0; +-} + + static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, +@@ -186,9 +150,7 @@ static const struct kfd2kgd_calls kfd2kgd = { + .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, +- .get_fw_version = get_fw_version, +- .set_scratch_backing_va = set_scratch_backing_va, +- .get_tile_config = get_tile_config, ++ .get_fw_version = get_fw_version + }; + + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) +@@ -224,7 +186,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + +- uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; ++ uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +@@ -328,38 +290,20 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) + } + + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, +- uint32_t wptr_shift, uint32_t wptr_mask, +- struct mm_struct *mm) ++ uint32_t queue_id, uint32_t __user *wptr) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ uint32_t wptr_shadow, is_wptr_shadow_valid; + struct cik_mqd *m; +- uint32_t *mqd_hqd; +- uint32_t reg, wptr_val, data; + + m = get_mqd(mqd); + +- acquire_queue(kgd, pipe_id, queue_id); +- +- /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ +- mqd_hqd = &m->cp_mqd_base_addr_lo; +- +- for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) +- WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); +- +- /* Copy userspace write pointer value to register. +- * Activate doorbell logic to monitor subsequent changes. +- */ +- data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, +- CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); +- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); +- +- if (read_user_wptr(mm, wptr, wptr_val)) +- WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); +- +- data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); +- WREG32(mmCP_HQD_ACTIVE, data); ++ is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); ++ if (is_wptr_shadow_valid) ++ m->cp_hqd_pq_wptr = wptr_shadow; + ++ acquire_queue(kgd, pipe_id, queue_id); ++ gfx_v7_0_mqd_commit(adev, m); + release_queue(kgd); + + return 0; +@@ -459,99 +403,30 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) + return false; + } + +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +- enum kfd_preempt_type reset_type, ++static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; +- enum hqd_dequeue_request_type type; +- unsigned long flags, end_jiffies; +- int retry; ++ int timeout = utimeout; + + acquire_queue(kgd, pipe_id, queue_id); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + +- switch (reset_type) { +- case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: +- type = DRAIN_PIPE; +- break; +- case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: +- type = RESET_WAVES; +- break; +- default: +- type = DRAIN_PIPE; +- break; +- } ++ WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + +- /* Workaround: If IQ timer is active and the wait time is close to or +- * equal to 0, dequeueing is not safe. Wait until either the wait time +- * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is +- * cleared before continuing. Also, ensure wait times are set to at +- * least 0x3. +- */ +- local_irq_save(flags); +- preempt_disable(); +- retry = 5000; /* wait for 500 usecs at maximum */ +- while (true) { +- temp = RREG32(mmCP_HQD_IQ_TIMER); +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { +- pr_debug("HW is processing IQ\n"); +- goto loop; +- } +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) +- == 3) /* SEM-rearm is safe */ +- break; +- /* Wait time 3 is safe for CP, but our MMIO read/write +- * time is close to 1 microsecond, so check for 10 to +- * leave more buffer room +- */ +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) +- >= 10) +- break; +- pr_debug("IQ timer is active\n"); +- } else +- break; +-loop: +- if (!retry) { +- pr_err("CP HQD IQ timer status time out\n"); +- break; +- } +- ndelay(100); +- --retry; +- } +- retry = 1000; +- while (true) { +- temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); +- if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) +- break; +- pr_debug("Dequeue request is pending\n"); +- +- if (!retry) { +- pr_err("CP HQD dequeue request time out\n"); +- break; +- } +- ndelay(100); +- --retry; +- } +- local_irq_restore(flags); +- preempt_enable(); +- +- WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); +- +- end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); +- if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) ++ if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + break; +- if (time_after(jiffies, end_jiffies)) { +- pr_err("cp queue preemption time out\n"); ++ if (timeout <= 0) { ++ pr_err("kfd: cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } +- usleep_range(500, 1000); ++ msleep(20); ++ timeout -= 20; + } + + release_queue(kgd); +@@ -702,16 +577,6 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + } + +-static void set_scratch_backing_va(struct kgd_dev *kgd, +- uint64_t va, uint32_t vmid) +-{ +- struct amdgpu_device *adev = (struct amdgpu_device *) kgd; +- +- lock_srbm(kgd, 0, 0, 0, vmid); +- WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); +- unlock_srbm(kgd); +-} +- + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + { + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; +@@ -722,42 +587,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.pfp_fw->data; ++ adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.me_fw->data; ++ adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.ce_fw->data; ++ adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec_fw->data; ++ adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec2_fw->data; ++ adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.rlc_fw->data; ++ adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[0].fw->data; ++ adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[1].fw->data; ++ adev->sdma.instance[1].fw->data; + break; + + default: +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index d1a32be..c5044d5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -39,12 +39,6 @@ + #include "vi_structs.h" + #include "vid.h" + +-enum hqd_dequeue_request_type { +- NO_ACTION = 0, +- DRAIN_PIPE, +- RESET_WAVES +-}; +- + struct cik_sdma_rlc_registers; + + /* +@@ -61,15 +55,12 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, +- uint32_t wptr_shift, uint32_t wptr_mask, +- struct mm_struct *mm); ++ uint32_t queue_id, uint32_t __user *wptr); + static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); + static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +- enum kfd_preempt_type reset_type, ++static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); + static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +@@ -94,33 +85,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); + static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +-static void set_scratch_backing_va(struct kgd_dev *kgd, +- uint64_t va, uint32_t vmid); +- +-/* Because of REG_GET_FIELD() being used, we put this function in the +- * asic specific file. +- */ +-static int get_tile_config(struct kgd_dev *kgd, +- struct tile_config *config) +-{ +- struct amdgpu_device *adev = (struct amdgpu_device *)kgd; +- +- config->gb_addr_config = adev->gfx.config.gb_addr_config; +- config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, +- MC_ARB_RAMCFG, NOOFBANK); +- config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, +- MC_ARB_RAMCFG, NOOFRANKS); +- +- config->tile_config_ptr = adev->gfx.config.tile_mode_array; +- config->num_tile_configs = +- ARRAY_SIZE(adev->gfx.config.tile_mode_array); +- config->macro_tile_config_ptr = +- adev->gfx.config.macrotile_mode_array; +- config->num_macro_tile_configs = +- ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); +- +- return 0; +-} + + static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, +@@ -147,9 +111,7 @@ static const struct kfd2kgd_calls kfd2kgd = { + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, +- .get_fw_version = get_fw_version, +- .set_scratch_backing_va = set_scratch_backing_va, +- .get_tile_config = get_tile_config, ++ .get_fw_version = get_fw_version + }; + + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) +@@ -185,7 +147,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + +- uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; ++ uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +@@ -254,7 +216,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) + uint32_t mec; + uint32_t pipe; + +- mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; ++ mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); +@@ -398,102 +360,29 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) + return false; + } + +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +- enum kfd_preempt_type reset_type, ++static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; +- enum hqd_dequeue_request_type type; +- unsigned long flags, end_jiffies; +- int retry; +- struct vi_mqd *m = get_mqd(mqd); ++ int timeout = utimeout; + + acquire_queue(kgd, pipe_id, queue_id); + +- if (m->cp_hqd_vmid == 0) +- WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); ++ WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + +- switch (reset_type) { +- case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: +- type = DRAIN_PIPE; +- break; +- case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: +- type = RESET_WAVES; +- break; +- default: +- type = DRAIN_PIPE; +- break; +- } +- +- /* Workaround: If IQ timer is active and the wait time is close to or +- * equal to 0, dequeueing is not safe. Wait until either the wait time +- * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is +- * cleared before continuing. Also, ensure wait times are set to at +- * least 0x3. +- */ +- local_irq_save(flags); +- preempt_disable(); +- retry = 5000; /* wait for 500 usecs at maximum */ +- while (true) { +- temp = RREG32(mmCP_HQD_IQ_TIMER); +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { +- pr_debug("HW is processing IQ\n"); +- goto loop; +- } +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) +- == 3) /* SEM-rearm is safe */ +- break; +- /* Wait time 3 is safe for CP, but our MMIO read/write +- * time is close to 1 microsecond, so check for 10 to +- * leave more buffer room +- */ +- if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) +- >= 10) +- break; +- pr_debug("IQ timer is active\n"); +- } else +- break; +-loop: +- if (!retry) { +- pr_err("CP HQD IQ timer status time out\n"); +- break; +- } +- ndelay(100); +- --retry; +- } +- retry = 1000; +- while (true) { +- temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); +- if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) +- break; +- pr_debug("Dequeue request is pending\n"); +- +- if (!retry) { +- pr_err("CP HQD dequeue request time out\n"); +- break; +- } +- ndelay(100); +- --retry; +- } +- local_irq_restore(flags); +- preempt_enable(); +- +- WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); +- +- end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); +- if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) ++ if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + break; +- if (time_after(jiffies, end_jiffies)) { +- pr_err("cp queue preemption time out.\n"); ++ if (timeout <= 0) { ++ pr_err("kfd: cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } +- usleep_range(500, 1000); ++ msleep(20); ++ timeout -= 20; + } + + release_queue(kgd); +@@ -607,16 +496,6 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + return 0; + } + +-static void set_scratch_backing_va(struct kgd_dev *kgd, +- uint64_t va, uint32_t vmid) +-{ +- struct amdgpu_device *adev = (struct amdgpu_device *) kgd; +- +- lock_srbm(kgd, 0, 0, 0, vmid); +- WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); +- unlock_srbm(kgd); +-} +- + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + { + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; +@@ -627,42 +506,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.pfp_fw->data; ++ adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.me_fw->data; ++ adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.ce_fw->data; ++ adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec_fw->data; ++ adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.mec2_fw->data; ++ adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *) +- adev->gfx.rlc_fw->data; ++ adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[0].fw->data; ++ adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *) +- adev->sdma.instance[1].fw->data; ++ adev->sdma.instance[1].fw->data; + break; + + default: +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index abdafb9..3016098 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -142,12 +142,12 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, + struct kfd_ioctl_create_queue_args *args) + { + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { +- pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); ++ pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + return -EINVAL; + } + + if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { +- pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); ++ pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + return -EINVAL; + } + +@@ -155,26 +155,26 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, + (!access_ok(VERIFY_WRITE, + (const void __user *) args->ring_base_address, + sizeof(uint64_t)))) { +- pr_err("Can't access ring base address\n"); ++ pr_err("kfd: can't access ring base address\n"); + return -EFAULT; + } + + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { +- pr_err("Ring size must be a power of 2 or 0\n"); ++ pr_err("kfd: ring size must be a power of 2 or 0\n"); + return -EINVAL; + } + + if (!access_ok(VERIFY_WRITE, + (const void __user *) args->read_pointer_address, + sizeof(uint32_t))) { +- pr_err("Can't access read pointer\n"); ++ pr_err("kfd: can't access read pointer\n"); + return -EFAULT; + } + + if (!access_ok(VERIFY_WRITE, + (const void __user *) args->write_pointer_address, + sizeof(uint32_t))) { +- pr_err("Can't access write pointer\n"); ++ pr_err("kfd: can't access write pointer\n"); + return -EFAULT; + } + +@@ -182,7 +182,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, + !access_ok(VERIFY_WRITE, + (const void __user *) args->eop_buffer_address, + sizeof(uint32_t))) { +- pr_debug("Can't access eop buffer"); ++ pr_debug("kfd: can't access eop buffer"); + return -EFAULT; + } + +@@ -190,7 +190,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, + !access_ok(VERIFY_WRITE, + (const void __user *) args->ctx_save_restore_address, + sizeof(uint32_t))) { +- pr_debug("Can't access ctx save restore buffer"); ++ pr_debug("kfd: can't access ctx save restore buffer"); + return -EFAULT; + } + +@@ -219,27 +219,27 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, + else + q_properties->format = KFD_QUEUE_FORMAT_PM4; + +- pr_debug("Queue Percentage: %d, %d\n", ++ pr_debug("Queue Percentage (%d, %d)\n", + q_properties->queue_percent, args->queue_percentage); + +- pr_debug("Queue Priority: %d, %d\n", ++ pr_debug("Queue Priority (%d, %d)\n", + q_properties->priority, args->queue_priority); + +- pr_debug("Queue Address: 0x%llX, 0x%llX\n", ++ pr_debug("Queue Address (0x%llX, 0x%llX)\n", + q_properties->queue_address, args->ring_base_address); + +- pr_debug("Queue Size: 0x%llX, %u\n", ++ pr_debug("Queue Size (0x%llX, %u)\n", + q_properties->queue_size, args->ring_size); + +- pr_debug("Queue r/w Pointers: %p, %p\n", +- q_properties->read_ptr, +- q_properties->write_ptr); ++ pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n", ++ (uint64_t) q_properties->read_ptr, ++ (uint64_t) q_properties->write_ptr); + +- pr_debug("Queue Format: %d\n", q_properties->format); ++ pr_debug("Queue Format (%d)\n", q_properties->format); + +- pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); ++ pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address); + +- pr_debug("Queue CTX save area: 0x%llX\n", ++ pr_debug("Queue CTX save arex (0x%llX)\n", + q_properties->ctx_save_restore_area_address); + + return 0; +@@ -257,16 +257,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + + memset(&q_properties, 0, sizeof(struct queue_properties)); + +- pr_debug("Creating queue ioctl\n"); ++ pr_debug("kfd: creating queue ioctl\n"); + + err = set_queue_properties_from_user(&q_properties, args); + if (err) + return err; + +- pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); ++ pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id); + dev = kfd_device_by_id(args->gpu_id); +- if (!dev) { +- pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); ++ if (dev == NULL) { ++ pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id); + return -EINVAL; + } + +@@ -278,7 +278,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + goto err_bind_process; + } + +- pr_debug("Creating queue for PASID %d on gpu 0x%x\n", ++ pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n", + p->pasid, + dev->id); + +@@ -296,15 +296,15 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + + mutex_unlock(&p->mutex); + +- pr_debug("Queue id %d was created successfully\n", args->queue_id); ++ pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); + +- pr_debug("Ring buffer address == 0x%016llX\n", ++ pr_debug("ring buffer address == 0x%016llX\n", + args->ring_base_address); + +- pr_debug("Read ptr address == 0x%016llX\n", ++ pr_debug("read ptr address == 0x%016llX\n", + args->read_pointer_address); + +- pr_debug("Write ptr address == 0x%016llX\n", ++ pr_debug("write ptr address == 0x%016llX\n", + args->write_pointer_address); + + return 0; +@@ -321,7 +321,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, + int retval; + struct kfd_ioctl_destroy_queue_args *args = data; + +- pr_debug("Destroying queue id %d for pasid %d\n", ++ pr_debug("kfd: destroying queue id %d for PASID %d\n", + args->queue_id, + p->pasid); + +@@ -341,12 +341,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, + struct queue_properties properties; + + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { +- pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); ++ pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + return -EINVAL; + } + + if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { +- pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); ++ pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + return -EINVAL; + } + +@@ -354,12 +354,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, + (!access_ok(VERIFY_WRITE, + (const void __user *) args->ring_base_address, + sizeof(uint64_t)))) { +- pr_err("Can't access ring base address\n"); ++ pr_err("kfd: can't access ring base address\n"); + return -EFAULT; + } + + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { +- pr_err("Ring size must be a power of 2 or 0\n"); ++ pr_err("kfd: ring size must be a power of 2 or 0\n"); + return -EINVAL; + } + +@@ -368,7 +368,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, + properties.queue_percent = args->queue_percentage; + properties.priority = args->queue_priority; + +- pr_debug("Updating queue id %d for pasid %d\n", ++ pr_debug("kfd: updating queue id %d for PASID %d\n", + args->queue_id, p->pasid); + + mutex_lock(&p->mutex); +@@ -400,7 +400,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, + } + + dev = kfd_device_by_id(args->gpu_id); +- if (!dev) ++ if (dev == NULL) + return -EINVAL; + + mutex_lock(&p->mutex); +@@ -443,7 +443,7 @@ static int kfd_ioctl_dbg_register(struct file *filep, + long status = 0; + + dev = kfd_device_by_id(args->gpu_id); +- if (!dev) ++ if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { +@@ -460,11 +460,12 @@ static int kfd_ioctl_dbg_register(struct file *filep, + */ + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { +- status = PTR_ERR(pdd); +- goto out; ++ mutex_unlock(&p->mutex); ++ mutex_unlock(kfd_get_dbgmgr_mutex()); ++ return PTR_ERR(pdd); + } + +- if (!dev->dbgmgr) { ++ if (dev->dbgmgr == NULL) { + /* In case of a legal call, we have no dbgmgr yet */ + create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); + if (create_ok) { +@@ -479,7 +480,6 @@ static int kfd_ioctl_dbg_register(struct file *filep, + status = -EINVAL; + } + +-out: + mutex_unlock(&p->mutex); + mutex_unlock(kfd_get_dbgmgr_mutex()); + +@@ -494,7 +494,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, + long status; + + dev = kfd_device_by_id(args->gpu_id); +- if (!dev) ++ if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { +@@ -505,7 +505,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, + mutex_lock(kfd_get_dbgmgr_mutex()); + + status = kfd_dbgmgr_unregister(dev->dbgmgr, p); +- if (!status) { ++ if (status == 0) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } +@@ -539,7 +539,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, + memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); + + dev = kfd_device_by_id(args->gpu_id); +- if (!dev) ++ if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { +@@ -580,8 +580,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, + args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; + + if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { +- status = -EINVAL; +- goto out; ++ kfree(args_buff); ++ return -EINVAL; + } + + watch_mask_value = (uint64_t) args_buff[args_idx]; +@@ -604,8 +604,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, + } + + if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { +- status = -EINVAL; +- goto out; ++ kfree(args_buff); ++ return -EINVAL; + } + + /* Currently HSA Event is not supported for DBG */ +@@ -617,7 +617,6 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, + + mutex_unlock(kfd_get_dbgmgr_mutex()); + +-out: + kfree(args_buff); + + return status; +@@ -647,7 +646,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep, + sizeof(wac_info.trapId); + + dev = kfd_device_by_id(args->gpu_id); +- if (!dev) ++ if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { +@@ -784,9 +783,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, + "scratch_limit %llX\n", pdd->scratch_limit); + + args->num_of_nodes++; +- +- pdd = kfd_get_next_process_device_data(p, pdd); +- } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); ++ } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && ++ (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + } + + mutex_unlock(&p->mutex); +@@ -849,84 +847,9 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, + + return err; + } +-static int kfd_ioctl_set_scratch_backing_va(struct file *filep, +- struct kfd_process *p, void *data) +-{ +- struct kfd_ioctl_set_scratch_backing_va_args *args = data; +- struct kfd_process_device *pdd; +- struct kfd_dev *dev; +- long err; +- +- dev = kfd_device_by_id(args->gpu_id); +- if (!dev) +- return -EINVAL; +- +- mutex_lock(&p->mutex); +- +- pdd = kfd_bind_process_to_device(dev, p); +- if (IS_ERR(pdd)) { +- err = PTR_ERR(pdd); +- goto bind_process_to_device_fail; +- } +- +- pdd->qpd.sh_hidden_private_base = args->va_addr; +- +- mutex_unlock(&p->mutex); +- +- if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) +- dev->kfd2kgd->set_scratch_backing_va( +- dev->kgd, args->va_addr, pdd->qpd.vmid); +- +- return 0; +- +-bind_process_to_device_fail: +- mutex_unlock(&p->mutex); +- return err; +-} +- +-static int kfd_ioctl_get_tile_config(struct file *filep, +- struct kfd_process *p, void *data) +-{ +- struct kfd_ioctl_get_tile_config_args *args = data; +- struct kfd_dev *dev; +- struct tile_config config; +- int err = 0; +- +- dev = kfd_device_by_id(args->gpu_id); +- +- dev->kfd2kgd->get_tile_config(dev->kgd, &config); +- +- args->gb_addr_config = config.gb_addr_config; +- args->num_banks = config.num_banks; +- args->num_ranks = config.num_ranks; +- +- if (args->num_tile_configs > config.num_tile_configs) +- args->num_tile_configs = config.num_tile_configs; +- err = copy_to_user((void __user *)args->tile_config_ptr, +- config.tile_config_ptr, +- args->num_tile_configs * sizeof(uint32_t)); +- if (err) { +- args->num_tile_configs = 0; +- return -EFAULT; +- } +- +- if (args->num_macro_tile_configs > config.num_macro_tile_configs) +- args->num_macro_tile_configs = +- config.num_macro_tile_configs; +- err = copy_to_user((void __user *)args->macro_tile_config_ptr, +- config.macro_tile_config_ptr, +- args->num_macro_tile_configs * sizeof(uint32_t)); +- if (err) { +- args->num_macro_tile_configs = 0; +- return -EFAULT; +- } +- +- return 0; +-} + + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ +- [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ +- .cmd_drv = 0, .name = #ioctl} ++ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} + + /** Ioctl table */ + static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { +@@ -977,12 +900,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, + kfd_ioctl_dbg_wave_control, 0), +- +- AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, +- kfd_ioctl_set_scratch_backing_va, 0), +- +- AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, +- kfd_ioctl_get_tile_config, 0) + }; + + #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +index 0aa021a..d5e19b5 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +@@ -42,6 +42,8 @@ + + static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) + { ++ BUG_ON(!dev || !dev->kfd2kgd); ++ + dev->kfd2kgd->address_watch_disable(dev->kgd); + } + +@@ -60,8 +62,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + unsigned int *ib_packet_buff; + int status; + +- if (WARN_ON(!size_in_bytes)) +- return -EINVAL; ++ BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); + + kq = dbgdev->kq; + +@@ -76,8 +77,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + status = kq->ops.acquire_packet_buffer(kq, + pq_packets_size_in_bytes / sizeof(uint32_t), + &ib_packet_buff); +- if (status) { +- pr_err("acquire_packet_buffer failed\n"); ++ if (status != 0) { ++ pr_err("amdkfd: acquire_packet_buffer failed\n"); + return status; + } + +@@ -114,8 +115,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), + &mem_obj); + +- if (status) { +- pr_err("Failed to allocate GART memory\n"); ++ if (status != 0) { ++ pr_err("amdkfd: Failed to allocate GART memory\n"); + kq->ops.rollback_packet(kq); + return status; + } +@@ -167,6 +168,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + + static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) + { ++ BUG_ON(!dbgdev); ++ + /* + * no action is needed in this case, + * just make sure diq will not be used +@@ -184,12 +187,14 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) + struct kernel_queue *kq = NULL; + int status; + ++ BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); ++ + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, + &properties, 0, KFD_QUEUE_TYPE_DIQ, + &qid); + + if (status) { +- pr_err("Failed to create DIQ\n"); ++ pr_err("amdkfd: Failed to create DIQ\n"); + return status; + } + +@@ -197,8 +202,8 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) + + kq = pqm_get_kernel_queue(dbgdev->pqm, qid); + +- if (!kq) { +- pr_err("Error getting DIQ\n"); ++ if (kq == NULL) { ++ pr_err("amdkfd: Error getting DIQ\n"); + pqm_destroy_queue(dbgdev->pqm, qid); + return -EFAULT; + } +@@ -210,6 +215,8 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) + + static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) + { ++ BUG_ON(!dbgdev || !dbgdev->dev); ++ + /* disable watch address */ + dbgdev_address_watch_disable_nodiq(dbgdev->dev); + return 0; +@@ -220,6 +227,8 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) + /* todo - disable address watch */ + int status; + ++ BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); ++ + status = pqm_destroy_queue(dbgdev->pqm, + dbgdev->kq->queue->properties.queue_id); + dbgdev->kq = NULL; +@@ -236,12 +245,14 @@ static void dbgdev_address_watch_set_registers( + { + union ULARGE_INTEGER addr; + ++ BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); ++ + addr.quad_part = 0; + addrHi->u32All = 0; + addrLo->u32All = 0; + cntl->u32All = 0; + +- if (adw_info->watch_mask) ++ if (adw_info->watch_mask != NULL) + cntl->bitfields.mask = + (uint32_t) (adw_info->watch_mask[index] & + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); +@@ -268,7 +279,7 @@ static void dbgdev_address_watch_set_registers( + } + + static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, +- struct dbg_address_watch_info *adw_info) ++ struct dbg_address_watch_info *adw_info) + { + union TCP_WATCH_ADDR_H_BITS addrHi; + union TCP_WATCH_ADDR_L_BITS addrLo; +@@ -276,11 +287,13 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, + struct kfd_process_device *pdd; + unsigned int i; + ++ BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); ++ + /* taking the vmid for that process on the safe way using pdd */ + pdd = kfd_get_process_device_data(dbgdev->dev, + adw_info->process); + if (!pdd) { +- pr_err("Failed to get pdd for wave control no DIQ\n"); ++ pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + return -EFAULT; + } + +@@ -290,16 +303,17 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, + + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0)) { +- pr_err("num_watch_points is invalid\n"); ++ pr_err("amdkfd: num_watch_points is invalid\n"); + return -EINVAL; + } + +- if (!adw_info->watch_mode || !adw_info->watch_address) { +- pr_err("adw_info fields are not valid\n"); ++ if ((adw_info->watch_mode == NULL) || ++ (adw_info->watch_address == NULL)) { ++ pr_err("amdkfd: adw_info fields are not valid\n"); + return -EINVAL; + } + +- for (i = 0; i < adw_info->num_watch_points; i++) { ++ for (i = 0 ; i < adw_info->num_watch_points ; i++) { + dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, + &cntl, i, pdd->qpd.vmid); + +@@ -334,7 +348,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, + } + + static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, +- struct dbg_address_watch_info *adw_info) ++ struct dbg_address_watch_info *adw_info) + { + struct pm4__set_config_reg *packets_vec; + union TCP_WATCH_ADDR_H_BITS addrHi; +@@ -349,25 +363,28 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + /* we do not control the vmid in DIQ mode, just a place holder */ + unsigned int vmid = 0; + ++ BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); ++ + addrHi.u32All = 0; + addrLo.u32All = 0; + cntl.u32All = 0; + + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0)) { +- pr_err("num_watch_points is invalid\n"); ++ pr_err("amdkfd: num_watch_points is invalid\n"); + return -EINVAL; + } + +- if (!adw_info->watch_mode || !adw_info->watch_address) { +- pr_err("adw_info fields are not valid\n"); ++ if ((NULL == adw_info->watch_mode) || ++ (NULL == adw_info->watch_address)) { ++ pr_err("amdkfd: adw_info fields are not valid\n"); + return -EINVAL; + } + + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); + +- if (status) { +- pr_err("Failed to allocate GART memory\n"); ++ if (status != 0) { ++ pr_err("amdkfd: Failed to allocate GART memory\n"); + return status; + } + +@@ -425,6 +442,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + i, + ADDRESS_WATCH_REG_CNTL); + ++ aw_reg_add_dword /= sizeof(uint32_t); ++ + packets_vec[0].bitfields2.reg_offset = + aw_reg_add_dword - AMD_CONFIG_REG_BASE; + +@@ -436,6 +455,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + i, + ADDRESS_WATCH_REG_ADDR_HI); + ++ aw_reg_add_dword /= sizeof(uint32_t); ++ + packets_vec[1].bitfields2.reg_offset = + aw_reg_add_dword - AMD_CONFIG_REG_BASE; + packets_vec[1].reg_data[0] = addrHi.u32All; +@@ -446,6 +467,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + i, + ADDRESS_WATCH_REG_ADDR_LO); + ++ aw_reg_add_dword /= sizeof(uint32_t); ++ + packets_vec[2].bitfields2.reg_offset = + aw_reg_add_dword - AMD_CONFIG_REG_BASE; + packets_vec[2].reg_data[0] = addrLo.u32All; +@@ -462,6 +485,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + i, + ADDRESS_WATCH_REG_CNTL); + ++ aw_reg_add_dword /= sizeof(uint32_t); ++ + packets_vec[3].bitfields2.reg_offset = + aw_reg_add_dword - AMD_CONFIG_REG_BASE; + packets_vec[3].reg_data[0] = cntl.u32All; +@@ -473,8 +498,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + packet_buff_uint, + ib_size); + +- if (status) { +- pr_err("Failed to submit IB to DIQ\n"); ++ if (status != 0) { ++ pr_err("amdkfd: Failed to submit IB to DIQ\n"); + break; + } + } +@@ -493,6 +518,8 @@ static int dbgdev_wave_control_set_registers( + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct HsaDbgWaveMsgAMDGen2 *pMsg; + ++ BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); ++ + reg_sq_cmd.u32All = 0; + reg_gfx_index.u32All = 0; + pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; +@@ -593,16 +620,18 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + struct pm4__set_config_reg *packets_vec; + size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; + ++ BUG_ON(!dbgdev || !wac_info); ++ + reg_sq_cmd.u32All = 0; + + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index); + if (status) { +- pr_err("Failed to set wave control registers\n"); ++ pr_err("amdkfd: Failed to set wave control registers\n"); + return status; + } + +- /* we do not control the VMID in DIQ, so reset it to a known value */ ++ /* we do not control the VMID in DIQ,so reset it to a known value */ + reg_sq_cmd.bits.vm_id = 0; + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); +@@ -638,7 +667,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); + + if (status != 0) { +- pr_err("Failed to allocate GART memory\n"); ++ pr_err("amdkfd: Failed to allocate GART memory\n"); + return status; + } + +@@ -690,8 +719,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + packet_buff_uint, + ib_size); + +- if (status) +- pr_err("Failed to submit IB to DIQ\n"); ++ if (status != 0) ++ pr_err("amdkfd: Failed to submit IB to DIQ\n"); + + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + +@@ -706,19 +735,21 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + ++ BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); ++ + reg_sq_cmd.u32All = 0; + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); + + if (!pdd) { +- pr_err("Failed to get pdd for wave control no DIQ\n"); ++ pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + return -EFAULT; + } + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index); + if (status) { +- pr_err("Failed to set wave control registers\n"); ++ pr_err("amdkfd: Failed to set wave control registers\n"); + return status; + } + +@@ -787,13 +818,12 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) + + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. + * ATC_VMID15_PASID_MAPPING +- * to check which VMID the current process is mapped to. +- */ ++ * to check which VMID the current process is mapped to. */ + + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + (dev->kgd, vmid)) { +- if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid ++ if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + (dev->kgd, vmid) == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid %d\n", + vmid, p->pasid); +@@ -803,7 +833,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) + } + + if (vmid > last_vmid_to_scan) { +- pr_err("Didn't find vmid for pasid %d\n", p->pasid); ++ pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); + return -EFAULT; + } + +@@ -830,6 +860,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) + void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, + enum DBGDEV_TYPE type) + { ++ BUG_ON(!pdbgdev || !pdev); ++ + pdbgdev->dev = pdev; + pdbgdev->kq = NULL; + pdbgdev->type = type; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +index 3da25f7..56d6763 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +@@ -44,6 +44,8 @@ struct mutex *kfd_get_dbgmgr_mutex(void) + + static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) + { ++ BUG_ON(!pmgr); ++ + kfree(pmgr->dbgdev); + + pmgr->dbgdev = NULL; +@@ -53,7 +55,7 @@ static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) + + void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) + { +- if (pmgr) { ++ if (pmgr != NULL) { + kfd_dbgmgr_uninitialize(pmgr); + kfree(pmgr); + } +@@ -64,12 +66,12 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) + enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; + struct kfd_dbgmgr *new_buff; + +- if (WARN_ON(!pdev->init_complete)) +- return false; ++ BUG_ON(pdev == NULL); ++ BUG_ON(!pdev->init_complete); + + new_buff = kfd_alloc_struct(new_buff); + if (!new_buff) { +- pr_err("Failed to allocate dbgmgr instance\n"); ++ pr_err("amdkfd: Failed to allocate dbgmgr instance\n"); + return false; + } + +@@ -77,7 +79,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) + new_buff->dev = pdev; + new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); + if (!new_buff->dbgdev) { +- pr_err("Failed to allocate dbgdev instance\n"); ++ pr_err("amdkfd: Failed to allocate dbgdev instance\n"); + kfree(new_buff); + return false; + } +@@ -94,6 +96,8 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) + + long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) + { ++ BUG_ON(!p || !pmgr || !pmgr->dbgdev); ++ + if (pmgr->pasid != 0) { + pr_debug("H/W debugger is already active using pasid %d\n", + pmgr->pasid); +@@ -114,6 +118,8 @@ long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) + + long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) + { ++ BUG_ON(!p || !pmgr || !pmgr->dbgdev); ++ + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != p->pasid) { + pr_debug("H/W debugger is not registered by calling pasid %d\n", +@@ -131,6 +137,8 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) + long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + struct dbg_wave_control_info *wac_info) + { ++ BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info); ++ + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != wac_info->process->pasid) { + pr_debug("H/W debugger support was not registered for requester pasid %d\n", +@@ -144,6 +152,9 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, + struct dbg_address_watch_info *adw_info) + { ++ BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info); ++ ++ + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != adw_info->process->pasid) { + pr_debug("H/W debugger support was not registered for requester pasid %d\n", +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +index a04a1fe..257a745 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +@@ -30,11 +30,13 @@ + #pragma pack(push, 4) + + enum HSA_DBG_WAVEOP { +- HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ +- HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ +- HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ +- HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */ +- HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ ++ HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ ++ HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ ++ HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ ++ HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter ++ debug mode */ ++ HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take ++ a trap */ + HSA_DBG_NUM_WAVEOP = 5, + HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF + }; +@@ -79,13 +81,15 @@ struct HsaDbgWaveMsgAMDGen2 { + uint32_t UserData:8; /* user data */ + uint32_t ShaderArray:1; /* Shader array */ + uint32_t Priv:1; /* Privileged */ +- uint32_t Reserved0:4; /* Reserved, should be 0 */ ++ uint32_t Reserved0:4; /* This field is reserved, ++ should be 0 */ + uint32_t WaveId:4; /* wave id */ + uint32_t SIMD:2; /* SIMD id */ + uint32_t HSACU:4; /* Compute unit */ + uint32_t ShaderEngine:2;/* Shader engine */ + uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ +- uint32_t Reserved1:4; /* Reserved, should be 0 */ ++ uint32_t Reserved1:4; /* This field is reserved, ++ should be 0 */ + } ui32; + uint32_t Value; + }; +@@ -117,23 +121,20 @@ struct HsaDbgWaveMessage { + * in the user mode instruction stream. The OS scheduler event is typically + * associated and signaled by an interrupt issued by the GPU, but other HSA + * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced +- * by the KFD by this mechanism, too. +- */ ++ * by the KFD by this mechanism, too. */ + + /* these are the new definitions for events */ + enum HSA_EVENTTYPE { + HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ + HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ + HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change +- * (start/stop) +- */ ++ (start/stop) */ + HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ + HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ + HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ + HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ + HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state +- * (EOP pm4) +- */ ++ (EOP pm4) */ + /* ... */ + HSA_EVENTTYPE_MAXID, + HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 61fff25..3f95f7c 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -26,7 +26,7 @@ + #include <linux/slab.h> + #include "kfd_priv.h" + #include "kfd_device_queue_manager.h" +-#include "kfd_pm4_headers_vi.h" ++#include "kfd_pm4_headers.h" + + #define MQD_SIZE_ALIGNED 768 + +@@ -98,14 +98,11 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did) + + for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { + if (supported_devices[i].did == did) { +- WARN_ON(!supported_devices[i].device_info); ++ BUG_ON(supported_devices[i].device_info == NULL); + return supported_devices[i].device_info; + } + } + +- dev_warn(kfd_device, "DID %04x is missing in supported_devices\n", +- did); +- + return NULL; + } + +@@ -117,10 +114,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + const struct kfd_device_info *device_info = + lookup_device_info(pdev->device); + +- if (!device_info) { +- dev_err(kfd_device, "kgd2kfd_probe failed\n"); ++ if (!device_info) + return NULL; +- } + + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); + if (!kfd) +@@ -157,16 +152,15 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) + } + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { +- dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", ++ dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, +- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) +- != 0); ++ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0); + return false; + } + + pasid_limit = min_t(unsigned int, +- (unsigned int)(1 << kfd->device_info->max_pasid_bits), ++ (unsigned int)1 << kfd->device_info->max_pasid_bits, + iommu_info.max_pasids); + /* + * last pasid is used for kernel queues doorbells +@@ -217,8 +211,9 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, + flags); + + dev = kfd_device_by_pci_dev(pdev); +- if (!WARN_ON(!dev)) +- kfd_signal_iommu_event(dev, pasid, address, ++ BUG_ON(dev == NULL); ++ ++ kfd_signal_iommu_event(dev, pasid, address, + flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); + + return AMD_IOMMU_INV_PRI_RSP_INVALID; +@@ -239,9 +234,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + * calculate max size of runlist packet. + * There can be only 2 packets at once + */ +- size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) + +- max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) +- + sizeof(struct pm4_mes_runlist)) * 2; ++ size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + ++ max_num_of_queues_per_device * ++ sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2; + + /* Add size of HIQ & DIQ */ + size += KFD_KERNEL_QUEUE_SIZE * 2; +@@ -252,37 +247,42 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + if (kfd->kfd2kgd->init_gtt_mem_allocation( + kfd->kgd, size, &kfd->gtt_mem, + &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){ +- dev_err(kfd_device, "Could not allocate %d bytes\n", size); ++ dev_err(kfd_device, ++ "Could not allocate %d bytes for device (%x:%x)\n", ++ size, kfd->pdev->vendor, kfd->pdev->device); + goto out; + } + +- dev_info(kfd_device, "Allocated %d bytes on gart\n", size); ++ dev_info(kfd_device, ++ "Allocated %d bytes on gart for device(%x:%x)\n", ++ size, kfd->pdev->vendor, kfd->pdev->device); + + /* Initialize GTT sa with 512 byte chunk size */ + if (kfd_gtt_sa_init(kfd, size, 512) != 0) { +- dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); ++ dev_err(kfd_device, ++ "Error initializing gtt sub-allocator\n"); + goto kfd_gtt_sa_init_error; + } + +- if (kfd_doorbell_init(kfd)) { +- dev_err(kfd_device, +- "Error initializing doorbell aperture\n"); +- goto kfd_doorbell_error; +- } ++ kfd_doorbell_init(kfd); + +- if (kfd_topology_add_device(kfd)) { +- dev_err(kfd_device, "Error adding device to topology\n"); ++ if (kfd_topology_add_device(kfd) != 0) { ++ dev_err(kfd_device, ++ "Error adding device (%x:%x) to topology\n", ++ kfd->pdev->vendor, kfd->pdev->device); + goto kfd_topology_add_device_error; + } + + if (kfd_interrupt_init(kfd)) { +- dev_err(kfd_device, "Error initializing interrupts\n"); ++ dev_err(kfd_device, ++ "Error initializing interrupts for device (%x:%x)\n", ++ kfd->pdev->vendor, kfd->pdev->device); + goto kfd_interrupt_error; + } + + if (!device_iommu_pasid_init(kfd)) { + dev_err(kfd_device, +- "Error initializing iommuv2 for device %x:%x\n", ++ "Error initializing iommuv2 for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto device_iommu_pasid_error; + } +@@ -292,13 +292,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + + kfd->dqm = device_queue_manager_init(kfd); + if (!kfd->dqm) { +- dev_err(kfd_device, "Error initializing queue manager\n"); ++ dev_err(kfd_device, ++ "Error initializing queue manager for device (%x:%x)\n", ++ kfd->pdev->vendor, kfd->pdev->device); + goto device_queue_manager_error; + } + +- if (kfd->dqm->ops.start(kfd->dqm)) { ++ if (kfd->dqm->ops.start(kfd->dqm) != 0) { + dev_err(kfd_device, +- "Error starting queue manager for device %x:%x\n", ++ "Error starting queuen manager for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto dqm_start_error; + } +@@ -306,10 +308,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + kfd->dbgmgr = NULL; + + kfd->init_complete = true; +- dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, ++ dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, + kfd->pdev->device); + +- pr_debug("Starting kfd with the following scheduling policy %d\n", ++ pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", + sched_policy); + + goto out; +@@ -323,13 +325,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + kfd_interrupt_error: + kfd_topology_remove_device(kfd); + kfd_topology_add_device_error: +- kfd_doorbell_fini(kfd); +-kfd_doorbell_error: + kfd_gtt_sa_fini(kfd); + kfd_gtt_sa_init_error: + kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); + dev_err(kfd_device, +- "device %x:%x NOT added due to errors\n", ++ "device (%x:%x) NOT added due to errors\n", + kfd->pdev->vendor, kfd->pdev->device); + out: + return kfd->init_complete; +@@ -342,7 +342,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) + amd_iommu_free_device(kfd->pdev); + kfd_interrupt_exit(kfd); + kfd_topology_remove_device(kfd); +- kfd_doorbell_fini(kfd); + kfd_gtt_sa_fini(kfd); + kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); + } +@@ -352,6 +351,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) + + void kgd2kfd_suspend(struct kfd_dev *kfd) + { ++ BUG_ON(kfd == NULL); ++ + if (kfd->init_complete) { + kfd->dqm->ops.stop(kfd->dqm); + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); +@@ -365,15 +366,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd) + unsigned int pasid_limit; + int err; + ++ BUG_ON(kfd == NULL); ++ + pasid_limit = kfd_get_pasid_limit(); + + if (kfd->init_complete) { + err = amd_iommu_init_device(kfd->pdev, pasid_limit); +- if (err < 0) { +- dev_err(kfd_device, "failed to initialize iommu\n"); ++ if (err < 0) + return -ENXIO; +- } +- + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); +@@ -402,27 +402,26 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) + static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, + unsigned int chunk_size) + { +- unsigned int num_of_longs; ++ unsigned int num_of_bits; + +- if (WARN_ON(buf_size < chunk_size)) +- return -EINVAL; +- if (WARN_ON(buf_size == 0)) +- return -EINVAL; +- if (WARN_ON(chunk_size == 0)) +- return -EINVAL; ++ BUG_ON(!kfd); ++ BUG_ON(!kfd->gtt_mem); ++ BUG_ON(buf_size < chunk_size); ++ BUG_ON(buf_size == 0); ++ BUG_ON(chunk_size == 0); + + kfd->gtt_sa_chunk_size = chunk_size; + kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; + +- num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / +- BITS_PER_LONG; ++ num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE; ++ BUG_ON(num_of_bits == 0); + +- kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); ++ kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL); + + if (!kfd->gtt_sa_bitmap) + return -ENOMEM; + +- pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", ++ pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", + kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); + + mutex_init(&kfd->gtt_sa_lock); +@@ -456,6 +455,8 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + { + unsigned int found, start_search, cur_size; + ++ BUG_ON(!kfd); ++ + if (size == 0) + return -EINVAL; + +@@ -466,7 +467,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + if ((*mem_obj) == NULL) + return -ENOMEM; + +- pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); ++ pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size); + + start_search = 0; + +@@ -478,7 +479,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + kfd->gtt_sa_num_of_chunks, + start_search); + +- pr_debug("Found = %d\n", found); ++ pr_debug("kfd: found = %d\n", found); + + /* If there wasn't any free chunk, bail out */ + if (found == kfd->gtt_sa_num_of_chunks) +@@ -496,12 +497,12 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + found, + kfd->gtt_sa_chunk_size); + +- pr_debug("gpu_addr = %p, cpu_addr = %p\n", ++ pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n", + (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); + + /* If we need only one chunk, mark it as allocated and get out */ + if (size <= kfd->gtt_sa_chunk_size) { +- pr_debug("Single bit\n"); ++ pr_debug("kfd: single bit\n"); + set_bit(found, kfd->gtt_sa_bitmap); + goto kfd_gtt_out; + } +@@ -536,7 +537,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + + } while (cur_size > 0); + +- pr_debug("range_start = %d, range_end = %d\n", ++ pr_debug("kfd: range_start = %d, range_end = %d\n", + (*mem_obj)->range_start, (*mem_obj)->range_end); + + /* Mark the chunks as allocated */ +@@ -550,7 +551,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + return 0; + + kfd_gtt_no_free_chunk: +- pr_debug("Allocation failed with mem_obj = %p\n", mem_obj); ++ pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj); + mutex_unlock(&kfd->gtt_sa_lock); + kfree(mem_obj); + return -ENOMEM; +@@ -560,11 +561,13 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) + { + unsigned int bit; + ++ BUG_ON(!kfd); ++ + /* Act like kfree when trying to free a NULL object */ + if (!mem_obj) + return 0; + +- pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", ++ pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n", + mem_obj, mem_obj->range_start, mem_obj->range_end); + + mutex_lock(&kfd->gtt_sa_lock); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 53a66e8..42de22b 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -79,17 +79,20 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) + + unsigned int get_queues_num(struct device_queue_manager *dqm) + { ++ BUG_ON(!dqm || !dqm->dev); + return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, + KGD_MAX_QUEUES); + } + + unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) + { ++ BUG_ON(!dqm || !dqm->dev); + return dqm->dev->shared_resources.num_queue_per_pipe; + } + + unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) + { ++ BUG_ON(!dqm || !dqm->dev); + return dqm->dev->shared_resources.num_pipe_per_mec; + } + +@@ -118,7 +121,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, + + /* Kaveri kfd vmid's starts from vmid 8 */ + allocated_vmid = bit + KFD_VMID_START_OFFSET; +- pr_debug("vmid allocation %d\n", allocated_vmid); ++ pr_debug("kfd: vmid allocation %d\n", allocated_vmid); + qpd->vmid = allocated_vmid; + q->properties.vmid = allocated_vmid; + +@@ -149,38 +152,42 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, + { + int retval; + ++ BUG_ON(!dqm || !q || !qpd || !allocated_vmid); ++ ++ pr_debug("kfd: In func %s\n", __func__); + print_queue(q); + + mutex_lock(&dqm->lock); + + if (dqm->total_queue_count >= max_num_of_queues_per_device) { +- pr_warn("Can't create new usermode queue because %d queues were already created\n", ++ pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", + dqm->total_queue_count); +- retval = -EPERM; +- goto out_unlock; ++ mutex_unlock(&dqm->lock); ++ return -EPERM; + } + + if (list_empty(&qpd->queues_list)) { + retval = allocate_vmid(dqm, qpd, q); +- if (retval) +- goto out_unlock; ++ if (retval != 0) { ++ mutex_unlock(&dqm->lock); ++ return retval; ++ } + } + *allocated_vmid = qpd->vmid; + q->properties.vmid = qpd->vmid; + + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + retval = create_compute_queue_nocpsch(dqm, q, qpd); +- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) ++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + retval = create_sdma_queue_nocpsch(dqm, q, qpd); +- else +- retval = -EINVAL; + +- if (retval) { ++ if (retval != 0) { + if (list_empty(&qpd->queues_list)) { + deallocate_vmid(dqm, qpd, q); + *allocated_vmid = 0; + } +- goto out_unlock; ++ mutex_unlock(&dqm->lock); ++ return retval; + } + + list_add(&q->list, &qpd->queues_list); +@@ -198,9 +205,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, + pr_debug("Total of %d queues are accountable so far\n", + dqm->total_queue_count); + +-out_unlock: + mutex_unlock(&dqm->lock); +- return retval; ++ return 0; + } + + static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) +@@ -210,8 +216,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) + + set = false; + +- for (pipe = dqm->next_pipe_to_allocate, i = 0; +- i < get_pipes_per_mec(dqm); ++ for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm); + pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { + + if (!is_pipe_enabled(dqm, 0, pipe)) +@@ -234,7 +239,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) + if (!set) + return -EBUSY; + +- pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); ++ pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", ++ __func__, q->pipe, q->queue); + /* horizontal hqd allocation */ + dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); + +@@ -254,38 +260,36 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + int retval; + struct mqd_manager *mqd; + ++ BUG_ON(!dqm || !q || !qpd); ++ + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); +- if (!mqd) ++ if (mqd == NULL) + return -ENOMEM; + + retval = allocate_hqd(dqm, q); +- if (retval) ++ if (retval != 0) + return retval; + + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); +- if (retval) +- goto out_deallocate_hqd; +- +- pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", +- q->pipe, q->queue); ++ if (retval != 0) { ++ deallocate_hqd(dqm, q); ++ return retval; ++ } + +- dqm->dev->kfd2kgd->set_scratch_backing_va( +- dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); ++ pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", ++ q->pipe, ++ q->queue); + +- retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, +- q->process->mm); +- if (retval) +- goto out_uninit_mqd; ++ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, ++ q->queue, (uint32_t __user *) q->properties.write_ptr); ++ if (retval != 0) { ++ deallocate_hqd(dqm, q); ++ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); ++ return retval; ++ } + + return 0; +- +-out_uninit_mqd: +- mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +-out_deallocate_hqd: +- deallocate_hqd(dqm, q); +- +- return retval; + } + + static int destroy_queue_nocpsch(struct device_queue_manager *dqm, +@@ -295,8 +299,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + int retval; + struct mqd_manager *mqd; + ++ BUG_ON(!dqm || !q || !q->mqd || !qpd); ++ + retval = 0; + ++ pr_debug("kfd: In Func %s\n", __func__); ++ + mutex_lock(&dqm->lock); + + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { +@@ -315,7 +323,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } else { +- pr_debug("q->properties.type %d is invalid\n", ++ pr_debug("q->properties.type is invalid (%d)\n", + q->properties.type); + retval = -EINVAL; + goto out; +@@ -326,7 +334,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, + q->pipe, q->queue); + +- if (retval) ++ if (retval != 0) + goto out; + + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +@@ -356,12 +364,14 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + struct mqd_manager *mqd; + bool prev_active = false; + ++ BUG_ON(!dqm || !q || !q->mqd); ++ + mutex_lock(&dqm->lock); + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); +- if (!mqd) { +- retval = -ENOMEM; +- goto out_unlock; ++ if (mqd == NULL) { ++ mutex_unlock(&dqm->lock); ++ return -ENOMEM; + } + + if (q->properties.is_active) +@@ -375,13 +385,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + if ((q->properties.is_active) && (!prev_active)) + dqm->queue_count++; +- else if (!q->properties.is_active && prev_active) ++ else if ((!q->properties.is_active) && (prev_active)) + dqm->queue_count--; + + if (sched_policy != KFD_SCHED_POLICY_NO_HWS) + retval = execute_queues_cpsch(dqm, false); + +-out_unlock: + mutex_unlock(&dqm->lock); + return retval; + } +@@ -391,16 +400,15 @@ static struct mqd_manager *get_mqd_manager_nocpsch( + { + struct mqd_manager *mqd; + +- if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) +- return NULL; ++ BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); + +- pr_debug("mqd type %d\n", type); ++ pr_debug("kfd: In func %s mqd type %d\n", __func__, type); + + mqd = dqm->mqds[type]; + if (!mqd) { + mqd = mqd_manager_init(type, dqm->dev); +- if (!mqd) +- pr_err("mqd manager is NULL"); ++ if (mqd == NULL) ++ pr_err("kfd: mqd manager is NULL"); + dqm->mqds[type] = mqd; + } + +@@ -413,7 +421,11 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, + struct device_process_node *n; + int retval; + +- n = kzalloc(sizeof(*n), GFP_KERNEL); ++ BUG_ON(!dqm || !qpd); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ ++ n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); + if (!n) + return -ENOMEM; + +@@ -437,6 +449,10 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm, + int retval; + struct device_process_node *cur, *next; + ++ BUG_ON(!dqm || !qpd); ++ ++ pr_debug("In func %s\n", __func__); ++ + pr_debug("qpd->queues_list is %s\n", + list_empty(&qpd->queues_list) ? "empty" : "not empty"); + +@@ -477,39 +493,51 @@ static void init_interrupts(struct device_queue_manager *dqm) + { + unsigned int i; + ++ BUG_ON(dqm == NULL); ++ + for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) + if (is_pipe_enabled(dqm, 0, i)) + dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); + } + ++static int init_scheduler(struct device_queue_manager *dqm) ++{ ++ int retval = 0; ++ ++ BUG_ON(!dqm); ++ ++ pr_debug("kfd: In %s\n", __func__); ++ ++ return retval; ++} ++ + static int initialize_nocpsch(struct device_queue_manager *dqm) + { +- int pipe, queue; ++ int i; + +- pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); ++ BUG_ON(!dqm); + +- dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), +- sizeof(unsigned int), GFP_KERNEL); +- if (!dqm->allocated_queues) +- return -ENOMEM; ++ pr_debug("kfd: In func %s num of pipes: %d\n", ++ __func__, get_pipes_per_mec(dqm)); + + mutex_init(&dqm->lock); + INIT_LIST_HEAD(&dqm->queues); + dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->sdma_queue_count = 0; +- +- for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { +- int pipe_offset = pipe * get_queues_per_pipe(dqm); +- +- for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) +- if (test_bit(pipe_offset + queue, +- dqm->dev->shared_resources.queue_bitmap)) +- dqm->allocated_queues[pipe] |= 1 << queue; ++ dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), ++ sizeof(unsigned int), GFP_KERNEL); ++ if (!dqm->allocated_queues) { ++ mutex_destroy(&dqm->lock); ++ return -ENOMEM; + } + ++ for (i = 0; i < get_pipes_per_mec(dqm); i++) ++ dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1; ++ + dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; + dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + ++ init_scheduler(dqm); + return 0; + } + +@@ -517,7 +545,9 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) + { + int i; + +- WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); ++ BUG_ON(!dqm); ++ ++ BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + + kfree(dqm->allocated_queues); + for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) +@@ -574,34 +604,33 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + return -ENOMEM; + + retval = allocate_sdma_queue(dqm, &q->sdma_id); +- if (retval) ++ if (retval != 0) + return retval; + + q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; + +- pr_debug("SDMA id is: %d\n", q->sdma_id); +- pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); +- pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); ++ pr_debug("kfd: sdma id is: %d\n", q->sdma_id); ++ pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); ++ pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); + + dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); +- if (retval) +- goto out_deallocate_sdma_queue; ++ if (retval != 0) { ++ deallocate_sdma_queue(dqm, q->sdma_id); ++ return retval; ++ } + +- retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); +- if (retval) +- goto out_uninit_mqd; ++ retval = mqd->load_mqd(mqd, q->mqd, 0, ++ 0, NULL); ++ if (retval != 0) { ++ deallocate_sdma_queue(dqm, q->sdma_id); ++ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); ++ return retval; ++ } + + return 0; +- +-out_uninit_mqd: +- mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +-out_deallocate_sdma_queue: +- deallocate_sdma_queue(dqm, q->sdma_id); +- +- return retval; + } + + /* +@@ -613,6 +642,10 @@ static int set_sched_resources(struct device_queue_manager *dqm) + int i, mec; + struct scheduling_resources res; + ++ BUG_ON(!dqm); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; + res.vmid_mask <<= KFD_VMID_START_OFFSET; + +@@ -630,8 +663,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the +- * definition of res.queue_mask needs updating +- */ ++ * definition of res.queue_mask needs updating */ + if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { + pr_err("Invalid queue enabled by amdgpu: %d\n", i); + break; +@@ -642,9 +674,9 @@ static int set_sched_resources(struct device_queue_manager *dqm) + res.gws_mask = res.oac_mask = res.gds_heap_base = + res.gds_heap_size = 0; + +- pr_debug("Scheduling resources:\n" +- "vmid mask: 0x%8X\n" +- "queue mask: 0x%8llX\n", ++ pr_debug("kfd: scheduling resources:\n" ++ " vmid mask: 0x%8X\n" ++ " queue mask: 0x%8llX\n", + res.vmid_mask, res.queue_mask); + + return pm_send_set_resources(&dqm->packets, &res); +@@ -654,7 +686,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm) + { + int retval; + +- pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); ++ BUG_ON(!dqm); ++ ++ pr_debug("kfd: In func %s num of pipes: %d\n", ++ __func__, get_pipes_per_mec(dqm)); + + mutex_init(&dqm->lock); + INIT_LIST_HEAD(&dqm->queues); +@@ -662,9 +697,13 @@ static int initialize_cpsch(struct device_queue_manager *dqm) + dqm->sdma_queue_count = 0; + dqm->active_runlist = false; + retval = dqm->ops_asic_specific.initialize(dqm); +- if (retval) +- mutex_destroy(&dqm->lock); ++ if (retval != 0) ++ goto fail_init_pipelines; + ++ return 0; ++ ++fail_init_pipelines: ++ mutex_destroy(&dqm->lock); + return retval; + } + +@@ -673,23 +712,25 @@ static int start_cpsch(struct device_queue_manager *dqm) + struct device_process_node *node; + int retval; + ++ BUG_ON(!dqm); ++ + retval = 0; + + retval = pm_init(&dqm->packets, dqm); +- if (retval) ++ if (retval != 0) + goto fail_packet_manager_init; + + retval = set_sched_resources(dqm); +- if (retval) ++ if (retval != 0) + goto fail_set_sched_resources; + +- pr_debug("Allocating fence memory\n"); ++ pr_debug("kfd: allocating fence memory\n"); + + /* allocate fence memory on the gart */ + retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), + &dqm->fence_mem); + +- if (retval) ++ if (retval != 0) + goto fail_allocate_vidmem; + + dqm->fence_addr = dqm->fence_mem->cpu_ptr; +@@ -717,6 +758,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) + struct device_process_node *node; + struct kfd_process_device *pdd; + ++ BUG_ON(!dqm); ++ + destroy_queues_cpsch(dqm, true, true); + + list_for_each_entry(node, &dqm->queues, list) { +@@ -733,9 +776,13 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, + struct kernel_queue *kq, + struct qcm_process_device *qpd) + { ++ BUG_ON(!dqm || !kq || !qpd); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + mutex_lock(&dqm->lock); + if (dqm->total_queue_count >= max_num_of_queues_per_device) { +- pr_warn("Can't create new kernel queue because %d queues were already created\n", ++ pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n", + dqm->total_queue_count); + mutex_unlock(&dqm->lock); + return -EPERM; +@@ -762,6 +809,10 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, + struct kernel_queue *kq, + struct qcm_process_device *qpd) + { ++ BUG_ON(!dqm || !kq); ++ ++ pr_debug("kfd: In %s\n", __func__); ++ + mutex_lock(&dqm->lock); + /* here we actually preempt the DIQ */ + destroy_queues_cpsch(dqm, true, false); +@@ -793,6 +844,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + int retval; + struct mqd_manager *mqd; + ++ BUG_ON(!dqm || !q || !qpd); ++ + retval = 0; + + if (allocate_vmid) +@@ -801,7 +854,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + mutex_lock(&dqm->lock); + + if (dqm->total_queue_count >= max_num_of_queues_per_device) { +- pr_warn("Can't create new usermode queue because %d queues were already created\n", ++ pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", + dqm->total_queue_count); + retval = -EPERM; + goto out; +@@ -813,15 +866,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + +- if (!mqd) { +- retval = -ENOMEM; +- goto out; ++ if (mqd == NULL) { ++ mutex_unlock(&dqm->lock); ++ return -ENOMEM; + } + + dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); +- if (retval) ++ if (retval != 0) + goto out; + + list_add(&q->list, &qpd->queues_list); +@@ -831,7 +884,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + } + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) +- dqm->sdma_queue_count++; ++ dqm->sdma_queue_count++; + /* + * Unconditionally increment this counter, regardless of the queue's + * type or whether the queue is active. +@@ -850,11 +903,12 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + unsigned int fence_value, + unsigned long timeout) + { ++ BUG_ON(!fence_addr); + timeout += jiffies; + + while (*fence_addr != fence_value) { + if (time_after(jiffies, timeout)) { +- pr_err("qcm fence wait loop timeout expired\n"); ++ pr_err("kfd: qcm fence wait loop timeout expired\n"); + return -ETIME; + } + schedule(); +@@ -878,6 +932,8 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_preempt_type_filter preempt_type; + struct kfd_process_device *pdd; + ++ BUG_ON(!dqm); ++ + retval = 0; + + if (lock) +@@ -885,7 +941,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, + if (!dqm->active_runlist) + goto out; + +- pr_debug("Before destroying queues, sdma queue count is : %u\n", ++ pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", + dqm->sdma_queue_count); + + if (dqm->sdma_queue_count > 0) { +@@ -899,7 +955,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, + + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, + preempt_type, 0, false, 0); +- if (retval) ++ if (retval != 0) + goto out; + + *dqm->fence_addr = KFD_FENCE_INIT; +@@ -908,7 +964,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, + /* should be timed out */ + retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, + QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); +- if (retval) { ++ if (retval != 0) { + pdd = kfd_get_process_device_data(dqm->dev, + kfd_get_process(current)); + pdd->reset_wavefronts = true; +@@ -927,12 +983,14 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) + { + int retval; + ++ BUG_ON(!dqm); ++ + if (lock) + mutex_lock(&dqm->lock); + + retval = destroy_queues_cpsch(dqm, false, false); +- if (retval) { +- pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); ++ if (retval != 0) { ++ pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); + goto out; + } + +@@ -947,8 +1005,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) + } + + retval = pm_send_runlist(&dqm->packets, &dqm->queues); +- if (retval) { +- pr_err("failed to execute runlist"); ++ if (retval != 0) { ++ pr_err("kfd: failed to execute runlist"); + goto out; + } + dqm->active_runlist = true; +@@ -967,6 +1025,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + struct mqd_manager *mqd; + bool preempt_all_queues; + ++ BUG_ON(!dqm || !qpd || !q); ++ + preempt_all_queues = false; + + retval = 0; +@@ -1038,6 +1098,8 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, + { + bool retval; + ++ pr_debug("kfd: In func %s\n", __func__); ++ + mutex_lock(&dqm->lock); + + if (alternate_aperture_size == 0) { +@@ -1058,11 +1120,14 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, + uint64_t base = (uintptr_t)alternate_aperture_base; + uint64_t limit = base + alternate_aperture_size - 1; + +- if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || +- (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { +- retval = false; ++ if (limit <= base) ++ goto out; ++ ++ if ((base & APE1_FIXED_BITS_MASK) != 0) ++ goto out; ++ ++ if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) + goto out; +- } + + qpd->sh_mem_ape1_base = base >> 16; + qpd->sh_mem_ape1_limit = limit >> 16; +@@ -1079,22 +1144,27 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, + if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) + program_sh_mem_settings(dqm, qpd); + +- pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", ++ pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", + qpd->sh_mem_config, qpd->sh_mem_ape1_base, + qpd->sh_mem_ape1_limit); + +-out: + mutex_unlock(&dqm->lock); + return retval; ++ ++out: ++ mutex_unlock(&dqm->lock); ++ return false; + } + + struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) + { + struct device_queue_manager *dqm; + +- pr_debug("Loading device queue manager\n"); ++ BUG_ON(!dev); + +- dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); ++ pr_debug("kfd: loading device queue manager\n"); ++ ++ dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); + if (!dqm) + return NULL; + +@@ -1132,8 +1202,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) + dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + break; + default: +- pr_err("Invalid scheduling policy %d\n", sched_policy); +- goto out_free; ++ BUG(); ++ break; + } + + switch (dev->device_info->asic_family) { +@@ -1146,16 +1216,18 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) + break; + } + +- if (!dqm->ops.initialize(dqm)) +- return dqm; ++ if (dqm->ops.initialize(dqm) != 0) { ++ kfree(dqm); ++ return NULL; ++ } + +-out_free: +- kfree(dqm); +- return NULL; ++ return dqm; + } + + void device_queue_manager_uninit(struct device_queue_manager *dqm) + { ++ BUG_ON(!dqm); ++ + dqm->ops.uninitialize(dqm); + kfree(dqm); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +index 72c3cba..48dc056 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +@@ -24,7 +24,6 @@ + #include "kfd_device_queue_manager.h" + #include "cik_regs.h" + #include "oss/oss_2_4_sh_mask.h" +-#include "gca/gfx_7_2_sh_mask.h" + + static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, +@@ -66,7 +65,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) + * for LDS/Scratch and GPUVM. + */ + +- WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || ++ BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + top_address_nybble == 0); + + return PRIVATE_BASE(top_address_nybble << 12) | +@@ -105,6 +104,8 @@ static int register_process_cik(struct device_queue_manager *dqm, + struct kfd_process_device *pdd; + unsigned int temp; + ++ BUG_ON(!dqm || !qpd); ++ + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ +@@ -124,10 +125,9 @@ static int register_process_cik(struct device_queue_manager *dqm, + } else { + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); +- qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; + } + +- pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", ++ pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +index 40e9ddd..7e9cae9 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +@@ -67,7 +67,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) + * for LDS/Scratch and GPUVM. + */ + +- WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || ++ BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + top_address_nybble == 0); + + return top_address_nybble << 12 | +@@ -110,6 +110,8 @@ static int register_process_vi(struct device_queue_manager *dqm, + struct kfd_process_device *pdd; + unsigned int temp; + ++ BUG_ON(!dqm || !qpd); ++ + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ +@@ -135,11 +137,9 @@ static int register_process_vi(struct device_queue_manager *dqm, + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << + SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; +- qpd->sh_mem_config |= 1 << +- SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; + } + +- pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", ++ pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +index acf4d2a..453c5d6 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +@@ -59,7 +59,7 @@ static inline size_t doorbell_process_allocation(void) + } + + /* Doorbell calculations for device init. */ +-int kfd_doorbell_init(struct kfd_dev *kfd) ++void kfd_doorbell_init(struct kfd_dev *kfd) + { + size_t doorbell_start_offset; + size_t doorbell_aperture_size; +@@ -95,35 +95,26 @@ int kfd_doorbell_init(struct kfd_dev *kfd) + kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, + doorbell_process_allocation()); + +- if (!kfd->doorbell_kernel_ptr) +- return -ENOMEM; ++ BUG_ON(!kfd->doorbell_kernel_ptr); + +- pr_debug("Doorbell initialization:\n"); +- pr_debug("doorbell base == 0x%08lX\n", ++ pr_debug("kfd: doorbell initialization:\n"); ++ pr_debug("kfd: doorbell base == 0x%08lX\n", + (uintptr_t)kfd->doorbell_base); + +- pr_debug("doorbell_id_offset == 0x%08lX\n", ++ pr_debug("kfd: doorbell_id_offset == 0x%08lX\n", + kfd->doorbell_id_offset); + +- pr_debug("doorbell_process_limit == 0x%08lX\n", ++ pr_debug("kfd: doorbell_process_limit == 0x%08lX\n", + doorbell_process_limit); + +- pr_debug("doorbell_kernel_offset == 0x%08lX\n", ++ pr_debug("kfd: doorbell_kernel_offset == 0x%08lX\n", + (uintptr_t)kfd->doorbell_base); + +- pr_debug("doorbell aperture size == 0x%08lX\n", ++ pr_debug("kfd: doorbell aperture size == 0x%08lX\n", + kfd->shared_resources.doorbell_aperture_size); + +- pr_debug("doorbell kernel address == 0x%08lX\n", ++ pr_debug("kfd: doorbell kernel address == 0x%08lX\n", + (uintptr_t)kfd->doorbell_kernel_ptr); +- +- return 0; +-} +- +-void kfd_doorbell_fini(struct kfd_dev *kfd) +-{ +- if (kfd->doorbell_kernel_ptr) +- iounmap(kfd->doorbell_kernel_ptr); + } + + int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) +@@ -140,7 +131,7 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) + + /* Find kfd device according to gpu id */ + dev = kfd_device_by_id(vma->vm_pgoff); +- if (!dev) ++ if (dev == NULL) + return -EINVAL; + + /* Calculate physical address of doorbell */ +@@ -151,11 +142,12 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + +- pr_debug("Mapping doorbell page\n" ++ pr_debug("kfd: mapping doorbell page in %s\n" + " target user address == 0x%08llX\n" + " physical address == 0x%08llX\n" + " vm_flags == 0x%04lX\n" + " size == 0x%04lX\n", ++ __func__, + (unsigned long long) vma->vm_start, address, vma->vm_flags, + doorbell_process_allocation()); + +@@ -174,6 +166,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + { + u32 inx; + ++ BUG_ON(!kfd || !doorbell_off); ++ + mutex_lock(&kfd->doorbell_mutex); + inx = find_first_zero_bit(kfd->doorbell_available_index, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); +@@ -191,7 +185,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + *doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() / + sizeof(u32)) + inx; + +- pr_debug("Get kernel queue doorbell\n" ++ pr_debug("kfd: get kernel queue doorbell\n" + " doorbell offset == 0x%08X\n" + " kernel address == 0x%08lX\n", + *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); +@@ -203,6 +197,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) + { + unsigned int inx; + ++ BUG_ON(!kfd || !db_addr); ++ + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); + + mutex_lock(&kfd->doorbell_mutex); +@@ -214,7 +210,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) + { + if (db) { + writel(value, db); +- pr_debug("Writing %d to doorbell address 0x%p\n", value, db); ++ pr_debug("writing %d to doorbell address 0x%p\n", value, db); + } + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index 5979158..d1ce83d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -110,7 +110,7 @@ static bool allocate_free_slot(struct kfd_process *process, + *out_page = page; + *out_slot_index = slot; + +- pr_debug("Allocated event signal slot in page %p, slot %d\n", ++ pr_debug("allocated event signal slot in page %p, slot %d\n", + page, slot); + + return true; +@@ -155,9 +155,9 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) + struct signal_page, + event_pages)->page_index + 1; + +- pr_debug("Allocated new event signal page at %p, for process %p\n", ++ pr_debug("allocated new event signal page at %p, for process %p\n", + page, p); +- pr_debug("Page index is %d\n", page->page_index); ++ pr_debug("page index is %d\n", page->page_index); + + list_add(&page->event_pages, &p->signal_event_pages); + +@@ -194,8 +194,7 @@ static void release_event_notification_slot(struct signal_page *page, + page->free_slots++; + + /* We don't free signal pages, they are retained by the process +- * and reused until it exits. +- */ ++ * and reused until it exits. */ + } + + static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, +@@ -247,7 +246,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) + + for (id = p->next_nonsignal_event_id; + id < KFD_LAST_NONSIGNAL_EVENT_ID && +- lookup_event_by_id(p, id); ++ lookup_event_by_id(p, id) != NULL; + id++) + ; + +@@ -266,7 +265,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) + + for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; + id < KFD_LAST_NONSIGNAL_EVENT_ID && +- lookup_event_by_id(p, id); ++ lookup_event_by_id(p, id) != NULL; + id++) + ; + +@@ -292,13 +291,13 @@ static int create_signal_event(struct file *devkfd, + struct kfd_event *ev) + { + if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { +- pr_warn("Signal event wasn't created because limit was reached\n"); ++ pr_warn("amdkfd: Signal event wasn't created because limit was reached\n"); + return -ENOMEM; + } + + if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, + &ev->signal_slot_index)) { +- pr_warn("Signal event wasn't created because out of kernel memory\n"); ++ pr_warn("amdkfd: Signal event wasn't created because out of kernel memory\n"); + return -ENOMEM; + } + +@@ -310,7 +309,11 @@ static int create_signal_event(struct file *devkfd, + ev->event_id = make_signal_event_id(ev->signal_page, + ev->signal_slot_index); + +- pr_debug("Signal event number %zu created with id %d, address %p\n", ++ pr_debug("signal event number %zu created with id %d, address %p\n", ++ p->signal_event_count, ev->event_id, ++ ev->user_signal_address); ++ ++ pr_debug("signal event number %zu created with id %d, address %p\n", + p->signal_event_count, ev->event_id, + ev->user_signal_address); + +@@ -342,7 +345,7 @@ void kfd_event_init_process(struct kfd_process *p) + + static void destroy_event(struct kfd_process *p, struct kfd_event *ev) + { +- if (ev->signal_page) { ++ if (ev->signal_page != NULL) { + release_event_notification_slot(ev->signal_page, + ev->signal_slot_index); + p->signal_event_count--; +@@ -581,7 +584,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + * search faster. + */ + struct signal_page *page; +- unsigned int i; ++ unsigned i; + + list_for_each_entry(page, &p->signal_event_pages, event_pages) + for (i = 0; i < SLOTS_PER_PAGE; i++) +@@ -813,7 +816,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) + /* check required size is logical */ + if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != + get_order(vma->vm_end - vma->vm_start)) { +- pr_err("Event page mmap requested illegal size\n"); ++ pr_err("amdkfd: event page mmap requested illegal size\n"); + return -EINVAL; + } + +@@ -822,7 +825,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) + page = lookup_signal_page_by_index(p, page_index); + if (!page) { + /* Probably KFD bug, but mmap is user-accessible. */ +- pr_debug("Signal page could not be found for page_index %u\n", ++ pr_debug("signal page could not be found for page_index %u\n", + page_index); + return -EINVAL; + } +@@ -833,7 +836,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE + | VM_DONTDUMP | VM_PFNMAP; + +- pr_debug("Mapping signal page\n"); ++ pr_debug("mapping signal page\n"); + pr_debug(" start user address == 0x%08lx\n", vma->vm_start); + pr_debug(" end user address == 0x%08lx\n", vma->vm_end); + pr_debug(" pfn == 0x%016lX\n", pfn); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +index c59384b..2b65510 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +@@ -304,7 +304,7 @@ int kfd_init_apertures(struct kfd_process *process) + id < NUM_OF_SUPPORTED_GPUS) { + + pdd = kfd_create_process_device_data(dev, process); +- if (!pdd) { ++ if (pdd == NULL) { + pr_err("Failed to create process device data\n"); + return -1; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +index 70b3a99c..7f134aa 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +@@ -179,7 +179,7 @@ static void interrupt_wq(struct work_struct *work) + bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) + { + /* integer and bitwise OR so there is no boolean short-circuiting */ +- unsigned int wanted = 0; ++ unsigned wanted = 0; + + wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, + ih_ring_entry); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +index 681b639..d135cd0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +@@ -41,11 +41,11 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + int retval; + union PM4_MES_TYPE_3_HEADER nop; + +- if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) +- return false; ++ BUG_ON(!kq || !dev); ++ BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ); + +- pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, +- queue_size); ++ pr_debug("amdkfd: In func %s initializing queue type %d size %d\n", ++ __func__, KFD_QUEUE_TYPE_HIQ, queue_size); + + memset(&prop, 0, sizeof(prop)); + memset(&nop, 0, sizeof(nop)); +@@ -63,23 +63,23 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + KFD_MQD_TYPE_HIQ); + break; + default: +- pr_err("Invalid queue type %d\n", type); +- return false; ++ BUG(); ++ break; + } + +- if (!kq->mqd) ++ if (kq->mqd == NULL) + return false; + + prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); + +- if (!prop.doorbell_ptr) { +- pr_err("Failed to initialize doorbell"); ++ if (prop.doorbell_ptr == NULL) { ++ pr_err("amdkfd: error init doorbell"); + goto err_get_kernel_doorbell; + } + + retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); + if (retval != 0) { +- pr_err("Failed to init pq queues size %d\n", queue_size); ++ pr_err("amdkfd: error init pq queues size (%d)\n", queue_size); + goto err_pq_allocate_vidmem; + } + +@@ -87,7 +87,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + kq->pq_gpu_addr = kq->pq->gpu_addr; + + retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); +- if (!retval) ++ if (retval == false) + goto err_eop_allocate_vidmem; + + retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), +@@ -139,12 +139,11 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + + /* assign HIQ to HQD */ + if (type == KFD_QUEUE_TYPE_HIQ) { +- pr_debug("Assigning hiq to hqd\n"); ++ pr_debug("assigning hiq to hqd\n"); + kq->queue->pipe = KFD_CIK_HIQ_PIPE; + kq->queue->queue = KFD_CIK_HIQ_QUEUE; + kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, +- kq->queue->queue, &kq->queue->properties, +- NULL); ++ kq->queue->queue, NULL); + } else { + /* allocate fence for DIQ */ + +@@ -181,6 +180,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + + static void uninitialize(struct kernel_queue *kq) + { ++ BUG_ON(!kq); ++ + if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) + kq->mqd->destroy_mqd(kq->mqd, + NULL, +@@ -210,6 +211,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, + uint32_t wptr, rptr; + unsigned int *queue_address; + ++ BUG_ON(!kq || !buffer_ptr); ++ + rptr = *kq->rptr_kernel; + wptr = *kq->wptr_kernel; + queue_address = (unsigned int *)kq->pq_kernel_addr; +@@ -249,7 +252,11 @@ static void submit_packet(struct kernel_queue *kq) + { + #ifdef DEBUG + int i; ++#endif ++ ++ BUG_ON(!kq); + ++#ifdef DEBUG + for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) { + pr_debug("0x%2X ", kq->pq_kernel_addr[i]); + if (i % 15 == 0) +@@ -265,6 +272,7 @@ static void submit_packet(struct kernel_queue *kq) + + static void rollback_packet(struct kernel_queue *kq) + { ++ BUG_ON(!kq); + kq->pending_wptr = *kq->queue->properties.write_ptr; + } + +@@ -273,7 +281,9 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + { + struct kernel_queue *kq; + +- kq = kzalloc(sizeof(*kq), GFP_KERNEL); ++ BUG_ON(!dev); ++ ++ kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL); + if (!kq) + return NULL; + +@@ -294,7 +304,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + } + + if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) { +- pr_err("Failed to init kernel queue\n"); ++ pr_err("amdkfd: failed to init kernel queue\n"); + kfree(kq); + return NULL; + } +@@ -303,37 +313,32 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + + void kernel_queue_uninit(struct kernel_queue *kq) + { ++ BUG_ON(!kq); ++ + kq->ops.uninitialize(kq); + kfree(kq); + } + +-/* FIXME: Can this test be removed? */ + static __attribute__((unused)) void test_kq(struct kfd_dev *dev) + { + struct kernel_queue *kq; + uint32_t *buffer, i; + int retval; + +- pr_err("Starting kernel queue test\n"); ++ BUG_ON(!dev); ++ ++ pr_err("amdkfd: starting kernel queue test\n"); + + kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); +- if (unlikely(!kq)) { +- pr_err(" Failed to initialize HIQ\n"); +- pr_err("Kernel queue test failed\n"); +- return; +- } ++ BUG_ON(!kq); + + retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); +- if (unlikely(retval != 0)) { +- pr_err(" Failed to acquire packet buffer\n"); +- pr_err("Kernel queue test failed\n"); +- return; +- } ++ BUG_ON(retval != 0); + for (i = 0; i < 5; i++) + buffer[i] = kq->nop_packet; + kq->ops.submit_packet(kq); + +- pr_err("Ending kernel queue test\n"); ++ pr_err("amdkfd: ending kernel queue test\n"); + } + + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +index 0d73bea..850a562 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +@@ -61,8 +61,7 @@ MODULE_PARM_DESC(send_sigterm, + + static int amdkfd_init_completed; + +-int kgd2kfd_init(unsigned int interface_version, +- const struct kgd2kfd_calls **g2f) ++int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f) + { + if (!amdkfd_init_completed) + return -EPROBE_DEFER; +@@ -91,7 +90,7 @@ static int __init kfd_module_init(void) + /* Verify module parameters */ + if ((sched_policy < KFD_SCHED_POLICY_HWS) || + (sched_policy > KFD_SCHED_POLICY_NO_HWS)) { +- pr_err("sched_policy has invalid value\n"); ++ pr_err("kfd: sched_policy has invalid value\n"); + return -1; + } + +@@ -99,13 +98,13 @@ static int __init kfd_module_init(void) + if ((max_num_of_queues_per_device < 1) || + (max_num_of_queues_per_device > + KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) { +- pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n"); ++ pr_err("kfd: max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n"); + return -1; + } + + err = kfd_pasid_init(); + if (err < 0) +- return err; ++ goto err_pasid; + + err = kfd_chardev_init(); + if (err < 0) +@@ -127,6 +126,7 @@ static int __init kfd_module_init(void) + kfd_chardev_exit(); + err_ioctl: + kfd_pasid_exit(); ++err_pasid: + return err; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +index 1f3a6ba..213a71e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +@@ -67,8 +67,7 @@ struct mqd_manager { + + int (*load_mqd)(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, +- struct queue_properties *p, +- struct mm_struct *mms); ++ uint32_t __user *wptr); + + int (*update_mqd)(struct mqd_manager *mm, void *mqd, + struct queue_properties *q); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +index 164fa4b..e492692 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +@@ -44,6 +44,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + struct cik_mqd *m; + int retval; + ++ BUG_ON(!mm || !q || !mqd); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); + +@@ -97,7 +101,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + m->cp_hqd_iq_rptr = AQL_ENABLE; + + *mqd = m; +- if (gart_addr) ++ if (gart_addr != NULL) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + +@@ -111,6 +115,8 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + int retval; + struct cik_sdma_rlc_registers *m; + ++ BUG_ON(!mm || !mqd || !mqd_mem_obj); ++ + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct cik_sdma_rlc_registers), + mqd_mem_obj); +@@ -123,7 +129,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); + + *mqd = m; +- if (gart_addr) ++ if (gart_addr != NULL) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); +@@ -134,31 +140,27 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) + { ++ BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } + + static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) + { ++ BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } + + static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, struct queue_properties *p, +- struct mm_struct *mms) ++ uint32_t queue_id, uint32_t __user *wptr) + { +- /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ +- uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); +- uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); +- +- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, +- (uint32_t __user *)p->write_ptr, +- wptr_shift, wptr_mask, mms); ++ return mm->dev->kfd2kgd->hqd_load ++ (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); + } + + static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, +- uint32_t pipe_id, uint32_t queue_id, +- struct queue_properties *p, struct mm_struct *mms) ++ uint32_t pipe_id, uint32_t queue_id, ++ uint32_t __user *wptr) + { + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); + } +@@ -168,6 +170,10 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + { + struct cik_mqd *m; + ++ BUG_ON(!mm || !q || !mqd); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; +@@ -182,17 +188,21 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); +- m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); ++ m->cp_hqd_pq_doorbell_control = DOORBELL_EN | ++ DOORBELL_OFFSET(q->doorbell_off); + + m->cp_hqd_vmid = q->vmid; + +- if (q->format == KFD_QUEUE_FORMAT_AQL) ++ if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_pq_control |= NO_UPDATE_RPTR; ++ } + ++ m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { ++ m->cp_hqd_active = 1; + q->is_active = true; + } + +@@ -204,6 +214,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + { + struct cik_sdma_rlc_registers *m; + ++ BUG_ON(!mm || !mqd || !q); ++ + m = get_sdma_mqd(mqd); + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | +@@ -242,7 +254,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) + { +- return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, ++ return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, + pipe_id, queue_id); + } + +@@ -289,6 +301,10 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct cik_mqd *m; + int retval; + ++ BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); + +@@ -343,6 +359,10 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + { + struct cik_mqd *m; + ++ BUG_ON(!mm || !q || !mqd); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | +@@ -380,6 +400,8 @@ struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) + { + struct cik_sdma_rlc_registers *m; + ++ BUG_ON(!mqd); ++ + m = (struct cik_sdma_rlc_registers *)mqd; + + return m; +@@ -390,10 +412,12 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + { + struct mqd_manager *mqd; + +- if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) +- return NULL; ++ BUG_ON(!dev); ++ BUG_ON(type >= KFD_MQD_TYPE_MAX); ++ ++ pr_debug("kfd: In func %s\n", __func__); + +- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); ++ mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + if (!mqd) + return NULL; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +index 73cbfe1..a9b9882 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +@@ -85,7 +85,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + m->cp_hqd_iq_rptr = 1; + + *mqd = m; +- if (gart_addr) ++ if (gart_addr != NULL) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + +@@ -94,15 +94,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + + static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, +- struct queue_properties *p, struct mm_struct *mms) ++ uint32_t __user *wptr) + { +- /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ +- uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); +- uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); +- +- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, +- (uint32_t __user *)p->write_ptr, +- wptr_shift, wptr_mask, mms); ++ return mm->dev->kfd2kgd->hqd_load ++ (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); + } + + static int __update_mqd(struct mqd_manager *mm, void *mqd, +@@ -111,6 +106,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + { + struct vi_mqd *m; + ++ BUG_ON(!mm || !q || !mqd); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT | +@@ -118,7 +117,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT; + m->cp_hqd_pq_control |= + ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; +- pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); ++ pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); +@@ -127,9 +126,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + + m->cp_hqd_pq_doorbell_control = ++ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT | + q->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; +- pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", ++ pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT | +@@ -139,15 +139,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | + mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT; + +- /* +- * HW does not clamp this field correctly. Maximum EOP queue size +- * is constrained by per-SE EOP done signal count, which is 8-bit. +- * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit +- * more than (EOP entry count - 1) so a queue size of 0x800 dwords +- * is safe, giving a maximum field value of 0xA. +- */ +- m->cp_hqd_eop_control |= min(0xA, +- ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); ++ m->cp_hqd_eop_control |= ++ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = +@@ -163,10 +156,12 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; + } + ++ m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { ++ m->cp_hqd_active = 1; + q->is_active = true; + } + +@@ -186,13 +181,14 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, + uint32_t queue_id) + { + return mm->dev->kfd2kgd->hqd_destroy +- (mm->dev->kgd, mqd, type, timeout, ++ (mm->dev->kgd, type, timeout, + pipe_id, queue_id); + } + + static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) + { ++ BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } + +@@ -242,10 +238,12 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + { + struct mqd_manager *mqd; + +- if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) +- return NULL; ++ BUG_ON(!dev); ++ BUG_ON(type >= KFD_MQD_TYPE_MAX); ++ ++ pr_debug("kfd: In func %s\n", __func__); + +- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); ++ mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + if (!mqd) + return NULL; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +index 308571b..a470019 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +@@ -26,6 +26,7 @@ + #include "kfd_device_queue_manager.h" + #include "kfd_kernel_queue.h" + #include "kfd_priv.h" ++#include "kfd_pm4_headers.h" + #include "kfd_pm4_headers_vi.h" + #include "kfd_pm4_opcodes.h" + +@@ -34,8 +35,7 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, + { + unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); + +- WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, +- "Runlist IB overflow"); ++ BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes); + *wptr = temp; + } + +@@ -43,12 +43,12 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) + { + union PM4_MES_TYPE_3_HEADER header; + +- header.u32All = 0; ++ header.u32all = 0; + header.opcode = opcode; + header.count = packet_size/sizeof(uint32_t) - 2; + header.type = PM4_TYPE_3; + +- return header.u32All; ++ return header.u32all; + } + + static void pm_calc_rlib_size(struct packet_manager *pm, +@@ -58,6 +58,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + unsigned int process_count, queue_count; + unsigned int map_queue_size; + ++ BUG_ON(!pm || !rlib_size || !over_subscription); ++ + process_count = pm->dqm->processes_count; + queue_count = pm->dqm->queue_count; + +@@ -65,12 +67,15 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + *over_subscription = false; + if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { + *over_subscription = true; +- pr_debug("Over subscribed runlist\n"); ++ pr_debug("kfd: over subscribed runlist\n"); + } + +- map_queue_size = sizeof(struct pm4_mes_map_queues); ++ map_queue_size = ++ (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ? ++ sizeof(struct pm4_mes_map_queues) : ++ sizeof(struct pm4_map_queues); + /* calculate run list ib allocation size */ +- *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + ++ *rlib_size = process_count * sizeof(struct pm4_map_process) + + queue_count * map_queue_size; + + /* +@@ -78,9 +83,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + * when over subscription + */ + if (*over_subscription) +- *rlib_size += sizeof(struct pm4_mes_runlist); ++ *rlib_size += sizeof(struct pm4_runlist); + +- pr_debug("runlist ib size %d\n", *rlib_size); ++ pr_debug("kfd: runlist ib size %d\n", *rlib_size); + } + + static int pm_allocate_runlist_ib(struct packet_manager *pm, +@@ -91,16 +96,17 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, + { + int retval; + +- if (WARN_ON(pm->allocated)) +- return -EINVAL; ++ BUG_ON(!pm); ++ BUG_ON(pm->allocated); ++ BUG_ON(is_over_subscription == NULL); + + pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + + retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, + &pm->ib_buffer_obj); + +- if (retval) { +- pr_err("Failed to allocate runlist IB\n"); ++ if (retval != 0) { ++ pr_err("kfd: failed to allocate runlist IB\n"); + return retval; + } + +@@ -115,16 +121,15 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, + static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) + { +- struct pm4_mes_runlist *packet; ++ struct pm4_runlist *packet; + +- if (WARN_ON(!ib)) +- return -EFAULT; ++ BUG_ON(!pm || !buffer || !ib); + +- packet = (struct pm4_mes_runlist *)buffer; ++ packet = (struct pm4_runlist *)buffer; + +- memset(buffer, 0, sizeof(struct pm4_mes_runlist)); +- packet->header.u32All = build_pm4_header(IT_RUN_LIST, +- sizeof(struct pm4_mes_runlist)); ++ memset(buffer, 0, sizeof(struct pm4_runlist)); ++ packet->header.u32all = build_pm4_header(IT_RUN_LIST, ++ sizeof(struct pm4_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; +@@ -139,16 +144,20 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, + static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd) + { +- struct pm4_mes_map_process *packet; ++ struct pm4_map_process *packet; + struct queue *cur; + uint32_t num_queues; + +- packet = (struct pm4_mes_map_process *)buffer; ++ BUG_ON(!pm || !buffer || !qpd); ++ ++ packet = (struct pm4_map_process *)buffer; ++ ++ pr_debug("kfd: In func %s\n", __func__); + +- memset(buffer, 0, sizeof(struct pm4_mes_map_process)); ++ memset(buffer, 0, sizeof(struct pm4_map_process)); + +- packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, +- sizeof(struct pm4_mes_map_process)); ++ packet->header.u32all = build_pm4_header(IT_MAP_PROCESS, ++ sizeof(struct pm4_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; +@@ -174,17 +183,21 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + +-static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, ++static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) + { + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + ++ BUG_ON(!pm || !buffer || !q); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ + packet = (struct pm4_mes_map_queues *)buffer; +- memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); ++ memset(buffer, 0, sizeof(struct pm4_map_queues)); + + packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, +- sizeof(struct pm4_mes_map_queues)); ++ sizeof(struct pm4_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; +@@ -212,8 +225,10 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, + use_static = false; /* no static queues under SDMA */ + break; + default: +- WARN(1, "queue type %d", q->properties.type); +- return -EINVAL; ++ pr_err("kfd: in %s queue type %d\n", __func__, ++ q->properties.type); ++ BUG(); ++ break; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; +@@ -233,6 +248,68 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + ++static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, ++ struct queue *q, bool is_static) ++{ ++ struct pm4_map_queues *packet; ++ bool use_static = is_static; ++ ++ BUG_ON(!pm || !buffer || !q); ++ ++ pr_debug("kfd: In func %s\n", __func__); ++ ++ packet = (struct pm4_map_queues *)buffer; ++ memset(buffer, 0, sizeof(struct pm4_map_queues)); ++ ++ packet->header.u32all = build_pm4_header(IT_MAP_QUEUES, ++ sizeof(struct pm4_map_queues)); ++ packet->bitfields2.alloc_format = ++ alloc_format__mes_map_queues__one_per_pipe; ++ packet->bitfields2.num_queues = 1; ++ packet->bitfields2.queue_sel = ++ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots; ++ ++ packet->bitfields2.vidmem = (q->properties.is_interop) ? ++ vidmem__mes_map_queues__uses_video_memory : ++ vidmem__mes_map_queues__uses_no_video_memory; ++ ++ switch (q->properties.type) { ++ case KFD_QUEUE_TYPE_COMPUTE: ++ case KFD_QUEUE_TYPE_DIQ: ++ packet->bitfields2.engine_sel = ++ engine_sel__mes_map_queues__compute; ++ break; ++ case KFD_QUEUE_TYPE_SDMA: ++ packet->bitfields2.engine_sel = ++ engine_sel__mes_map_queues__sdma0; ++ use_static = false; /* no static queues under SDMA */ ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ ++ packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = ++ q->properties.doorbell_off; ++ ++ packet->mes_map_queues_ordinals[0].bitfields3.is_static = ++ (use_static) ? 1 : 0; ++ ++ packet->mes_map_queues_ordinals[0].mqd_addr_lo = ++ lower_32_bits(q->gart_mqd_addr); ++ ++ packet->mes_map_queues_ordinals[0].mqd_addr_hi = ++ upper_32_bits(q->gart_mqd_addr); ++ ++ packet->mes_map_queues_ordinals[0].wptr_addr_lo = ++ lower_32_bits((uint64_t)q->properties.write_ptr); ++ ++ packet->mes_map_queues_ordinals[0].wptr_addr_hi = ++ upper_32_bits((uint64_t)q->properties.write_ptr); ++ ++ return 0; ++} ++ + static int pm_create_runlist_ib(struct packet_manager *pm, + struct list_head *queues, + uint64_t *rl_gpu_addr, +@@ -247,16 +324,19 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + struct kernel_queue *kq; + bool is_over_subscription; + ++ BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr); ++ + rl_wptr = retval = proccesses_mapped = 0; + + retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, + &alloc_size_bytes, &is_over_subscription); +- if (retval) ++ if (retval != 0) + return retval; + + *rl_size_bytes = alloc_size_bytes; + +- pr_debug("Building runlist ib process count: %d queues count %d\n", ++ pr_debug("kfd: In func %s\n", __func__); ++ pr_debug("kfd: building runlist ib process count: %d queues count %d\n", + pm->dqm->processes_count, pm->dqm->queue_count); + + /* build the run list ib packet */ +@@ -264,35 +344,42 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + qpd = cur->qpd; + /* build map process packet */ + if (proccesses_mapped >= pm->dqm->processes_count) { +- pr_debug("Not enough space left in runlist IB\n"); ++ pr_debug("kfd: not enough space left in runlist IB\n"); + pm_release_ib(pm); + return -ENOMEM; + } + + retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); +- if (retval) ++ if (retval != 0) + return retval; + + proccesses_mapped++; +- inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), ++ inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), + alloc_size_bytes); + + list_for_each_entry(kq, &qpd->priv_queue_list, list) { + if (!kq->queue->properties.is_active) + continue; + +- pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", ++ pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", + kq->queue->queue, qpd->is_debug); + +- retval = pm_create_map_queue(pm, ++ if (pm->dqm->dev->device_info->asic_family == ++ CHIP_CARRIZO) ++ retval = pm_create_map_queue_vi(pm, ++ &rl_buffer[rl_wptr], ++ kq->queue, ++ qpd->is_debug); ++ else ++ retval = pm_create_map_queue(pm, + &rl_buffer[rl_wptr], + kq->queue, + qpd->is_debug); +- if (retval) ++ if (retval != 0) + return retval; + + inc_wptr(&rl_wptr, +- sizeof(struct pm4_mes_map_queues), ++ sizeof(struct pm4_map_queues), + alloc_size_bytes); + } + +@@ -300,44 +387,51 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + if (!q->properties.is_active) + continue; + +- pr_debug("static_queue, mapping user queue %d, is debug status %d\n", ++ pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", + q->queue, qpd->is_debug); + +- retval = pm_create_map_queue(pm, ++ if (pm->dqm->dev->device_info->asic_family == ++ CHIP_CARRIZO) ++ retval = pm_create_map_queue_vi(pm, ++ &rl_buffer[rl_wptr], ++ q, ++ qpd->is_debug); ++ else ++ retval = pm_create_map_queue(pm, + &rl_buffer[rl_wptr], + q, + qpd->is_debug); + +- if (retval) ++ if (retval != 0) + return retval; + + inc_wptr(&rl_wptr, +- sizeof(struct pm4_mes_map_queues), ++ sizeof(struct pm4_map_queues), + alloc_size_bytes); + } + } + +- pr_debug("Finished map process and queues to runlist\n"); ++ pr_debug("kfd: finished map process and queues to runlist\n"); + + if (is_over_subscription) +- retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], +- *rl_gpu_addr, +- alloc_size_bytes / sizeof(uint32_t), +- true); ++ pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, ++ alloc_size_bytes / sizeof(uint32_t), true); + + for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) + pr_debug("0x%2X ", rl_buffer[i]); + pr_debug("\n"); + +- return retval; ++ return 0; + } + + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) + { ++ BUG_ON(!dqm); ++ + pm->dqm = dqm; + mutex_init(&pm->lock); + pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); +- if (!pm->priv_queue) { ++ if (pm->priv_queue == NULL) { + mutex_destroy(&pm->lock); + return -ENOMEM; + } +@@ -348,6 +442,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) + + void pm_uninit(struct packet_manager *pm) + { ++ BUG_ON(!pm); ++ + mutex_destroy(&pm->lock); + kernel_queue_uninit(pm->priv_queue); + } +@@ -355,22 +451,25 @@ void pm_uninit(struct packet_manager *pm) + int pm_send_set_resources(struct packet_manager *pm, + struct scheduling_resources *res) + { +- struct pm4_mes_set_resources *packet; +- int retval = 0; ++ struct pm4_set_resources *packet; ++ ++ BUG_ON(!pm || !res); ++ ++ pr_debug("kfd: In func %s\n", __func__); + + mutex_lock(&pm->lock); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + sizeof(*packet) / sizeof(uint32_t), +- (unsigned int **)&packet); +- if (!packet) { +- pr_err("Failed to allocate buffer on kernel queue\n"); +- retval = -ENOMEM; +- goto out; ++ (unsigned int **)&packet); ++ if (packet == NULL) { ++ mutex_unlock(&pm->lock); ++ pr_err("kfd: failed to allocate buffer on kernel queue\n"); ++ return -ENOMEM; + } + +- memset(packet, 0, sizeof(struct pm4_mes_set_resources)); +- packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, +- sizeof(struct pm4_mes_set_resources)); ++ memset(packet, 0, sizeof(struct pm4_set_resources)); ++ packet->header.u32all = build_pm4_header(IT_SET_RESOURCES, ++ sizeof(struct pm4_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; +@@ -388,10 +487,9 @@ int pm_send_set_resources(struct packet_manager *pm, + + pm->priv_queue->ops.submit_packet(pm->priv_queue); + +-out: + mutex_unlock(&pm->lock); + +- return retval; ++ return 0; + } + + int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) +@@ -401,24 +499,26 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + size_t rl_ib_size, packet_size_dwords; + int retval; + ++ BUG_ON(!pm || !dqm_queues); ++ + retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, + &rl_ib_size); +- if (retval) ++ if (retval != 0) + goto fail_create_runlist_ib; + +- pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); ++ pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr); + +- packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); ++ packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t); + mutex_lock(&pm->lock); + + retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + packet_size_dwords, &rl_buffer); +- if (retval) ++ if (retval != 0) + goto fail_acquire_packet_buffer; + + retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, + rl_ib_size / sizeof(uint32_t), false); +- if (retval) ++ if (retval != 0) + goto fail_create_runlist; + + pm->priv_queue->ops.submit_packet(pm->priv_queue); +@@ -432,7 +532,8 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + fail_acquire_packet_buffer: + mutex_unlock(&pm->lock); + fail_create_runlist_ib: +- pm_release_ib(pm); ++ if (pm->allocated) ++ pm_release_ib(pm); + return retval; + } + +@@ -440,21 +541,20 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, + uint32_t fence_value) + { + int retval; +- struct pm4_mes_query_status *packet; ++ struct pm4_query_status *packet; + +- if (WARN_ON(!fence_address)) +- return -EFAULT; ++ BUG_ON(!pm || !fence_address); + + mutex_lock(&pm->lock); + retval = pm->priv_queue->ops.acquire_packet_buffer( + pm->priv_queue, +- sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), ++ sizeof(struct pm4_query_status) / sizeof(uint32_t), + (unsigned int **)&packet); +- if (retval) ++ if (retval != 0) + goto fail_acquire_packet_buffer; + +- packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, +- sizeof(struct pm4_mes_query_status)); ++ packet->header.u32all = build_pm4_header(IT_QUERY_STATUS, ++ sizeof(struct pm4_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = +@@ -468,6 +568,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + pm->priv_queue->ops.submit_packet(pm->priv_queue); ++ mutex_unlock(&pm->lock); ++ ++ return 0; + + fail_acquire_packet_buffer: + mutex_unlock(&pm->lock); +@@ -481,22 +584,24 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + { + int retval; + uint32_t *buffer; +- struct pm4_mes_unmap_queues *packet; ++ struct pm4_unmap_queues *packet; ++ ++ BUG_ON(!pm); + + mutex_lock(&pm->lock); + retval = pm->priv_queue->ops.acquire_packet_buffer( + pm->priv_queue, +- sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), ++ sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), + &buffer); +- if (retval) ++ if (retval != 0) + goto err_acquire_packet_buffer; + +- packet = (struct pm4_mes_unmap_queues *)buffer; +- memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); +- pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", ++ packet = (struct pm4_unmap_queues *)buffer; ++ memset(buffer, 0, sizeof(struct pm4_unmap_queues)); ++ pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", + mode, reset, type); +- packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, +- sizeof(struct pm4_mes_unmap_queues)); ++ packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, ++ sizeof(struct pm4_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: +@@ -508,9 +613,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: +- WARN(1, "queue type %d", type); +- retval = -EINVAL; +- goto err_invalid; ++ BUG(); ++ break; + } + + if (reset) +@@ -534,17 +638,16 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + break; + case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = +- queue_sel__mes_unmap_queues__unmap_all_queues; ++ queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; + break; + case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = +- queue_sel__mes_unmap_queues__unmap_all_non_static_queues; ++ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; + break; + default: +- WARN(1, "filter %d", mode); +- retval = -EINVAL; +- goto err_invalid; ++ BUG(); ++ break; + } + + pm->priv_queue->ops.submit_packet(pm->priv_queue); +@@ -552,8 +655,6 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + mutex_unlock(&pm->lock); + return 0; + +-err_invalid: +- pm->priv_queue->ops.rollback_packet(pm->priv_queue); + err_acquire_packet_buffer: + mutex_unlock(&pm->lock); + return retval; +@@ -561,6 +662,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + + void pm_release_ib(struct packet_manager *pm) + { ++ BUG_ON(!pm); ++ + mutex_lock(&pm->lock); + if (pm->allocated) { + kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +index 1e06de0..6cfe7f1 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +@@ -32,8 +32,7 @@ int kfd_pasid_init(void) + { + pasid_limit = KFD_MAX_NUM_OF_PROCESSES; + +- pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), +- GFP_KERNEL); ++ pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); + if (!pasid_bitmap) + return -ENOMEM; + +@@ -92,6 +91,6 @@ unsigned int kfd_pasid_alloc(void) + + void kfd_pasid_free(unsigned int pasid) + { +- if (!WARN_ON(pasid == 0 || pasid >= pasid_limit)) +- clear_bit(pasid, pasid_bitmap); ++ BUG_ON(pasid == 0 || pasid >= pasid_limit); ++ clear_bit(pasid, pasid_bitmap); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +index e50f73d..5b393f3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +@@ -28,19 +28,112 @@ + #define PM4_MES_HEADER_DEFINED + union PM4_MES_TYPE_3_HEADER { + struct { +- /* reserved */ +- uint32_t reserved1:8; +- /* IT opcode */ +- uint32_t opcode:8; +- /* number of DWORDs - 1 in the information body */ +- uint32_t count:14; +- /* packet identifier. It should be 3 for type 3 packets */ +- uint32_t type:2; ++ uint32_t reserved1:8; /* < reserved */ ++ uint32_t opcode:8; /* < IT opcode */ ++ uint32_t count:14; /* < number of DWORDs - 1 ++ * in the information body. ++ */ ++ uint32_t type:2; /* < packet identifier. ++ * It should be 3 for type 3 packets ++ */ + }; + uint32_t u32all; + }; + #endif /* PM4_MES_HEADER_DEFINED */ + ++/* --------------------MES_SET_RESOURCES-------------------- */ ++ ++#ifndef PM4_MES_SET_RESOURCES_DEFINED ++#define PM4_MES_SET_RESOURCES_DEFINED ++enum set_resources_queue_type_enum { ++ queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, ++ queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, ++ queue_type__mes_set_resources__hsa_debug_interface_queue = 4 ++}; ++ ++struct pm4_set_resources { ++ union { ++ union PM4_MES_TYPE_3_HEADER header; /* header */ ++ uint32_t ordinal1; ++ }; ++ ++ union { ++ struct { ++ uint32_t vmid_mask:16; ++ uint32_t unmap_latency:8; ++ uint32_t reserved1:5; ++ enum set_resources_queue_type_enum queue_type:3; ++ } bitfields2; ++ uint32_t ordinal2; ++ }; ++ ++ uint32_t queue_mask_lo; ++ uint32_t queue_mask_hi; ++ uint32_t gws_mask_lo; ++ uint32_t gws_mask_hi; ++ ++ union { ++ struct { ++ uint32_t oac_mask:16; ++ uint32_t reserved2:16; ++ } bitfields7; ++ uint32_t ordinal7; ++ }; ++ ++ union { ++ struct { ++ uint32_t gds_heap_base:6; ++ uint32_t reserved3:5; ++ uint32_t gds_heap_size:6; ++ uint32_t reserved4:15; ++ } bitfields8; ++ uint32_t ordinal8; ++ }; ++ ++}; ++#endif ++ ++/*--------------------MES_RUN_LIST-------------------- */ ++ ++#ifndef PM4_MES_RUN_LIST_DEFINED ++#define PM4_MES_RUN_LIST_DEFINED ++ ++struct pm4_runlist { ++ union { ++ union PM4_MES_TYPE_3_HEADER header; /* header */ ++ uint32_t ordinal1; ++ }; ++ ++ union { ++ struct { ++ uint32_t reserved1:2; ++ uint32_t ib_base_lo:30; ++ } bitfields2; ++ uint32_t ordinal2; ++ }; ++ ++ union { ++ struct { ++ uint32_t ib_base_hi:16; ++ uint32_t reserved2:16; ++ } bitfields3; ++ uint32_t ordinal3; ++ }; ++ ++ union { ++ struct { ++ uint32_t ib_size:20; ++ uint32_t chain:1; ++ uint32_t offload_polling:1; ++ uint32_t reserved3:1; ++ uint32_t valid:1; ++ uint32_t reserved4:8; ++ } bitfields4; ++ uint32_t ordinal4; ++ }; ++ ++}; ++#endif + + /*--------------------MES_MAP_PROCESS-------------------- */ + +@@ -93,58 +186,217 @@ struct pm4_map_process { + }; + #endif + +-#ifndef PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH +-#define PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH ++/*--------------------MES_MAP_QUEUES--------------------*/ ++ ++#ifndef PM4_MES_MAP_QUEUES_DEFINED ++#define PM4_MES_MAP_QUEUES_DEFINED ++enum map_queues_queue_sel_enum { ++ queue_sel__mes_map_queues__map_to_specified_queue_slots = 0, ++ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1, ++ queue_sel__mes_map_queues__enable_process_queues = 2 ++}; + +-struct pm4_map_process_scratch_kv { ++enum map_queues_vidmem_enum { ++ vidmem__mes_map_queues__uses_no_video_memory = 0, ++ vidmem__mes_map_queues__uses_video_memory = 1 ++}; ++ ++enum map_queues_alloc_format_enum { ++ alloc_format__mes_map_queues__one_per_pipe = 0, ++ alloc_format__mes_map_queues__all_on_one_pipe = 1 ++}; ++ ++enum map_queues_engine_sel_enum { ++ engine_sel__mes_map_queues__compute = 0, ++ engine_sel__mes_map_queues__sdma0 = 2, ++ engine_sel__mes_map_queues__sdma1 = 3 ++}; ++ ++struct pm4_map_queues { + union { +- union PM4_MES_TYPE_3_HEADER header; /* header */ +- uint32_t ordinal1; ++ union PM4_MES_TYPE_3_HEADER header; /* header */ ++ uint32_t ordinal1; + }; + + union { + struct { +- uint32_t pasid:16; +- uint32_t reserved1:8; +- uint32_t diq_enable:1; +- uint32_t process_quantum:7; ++ uint32_t reserved1:4; ++ enum map_queues_queue_sel_enum queue_sel:2; ++ uint32_t reserved2:2; ++ uint32_t vmid:4; ++ uint32_t reserved3:4; ++ enum map_queues_vidmem_enum vidmem:2; ++ uint32_t reserved4:6; ++ enum map_queues_alloc_format_enum alloc_format:2; ++ enum map_queues_engine_sel_enum engine_sel:3; ++ uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + ++ struct { ++ union { ++ struct { ++ uint32_t is_static:1; ++ uint32_t reserved5:1; ++ uint32_t doorbell_offset:21; ++ uint32_t reserved6:3; ++ uint32_t queue:6; ++ } bitfields3; ++ uint32_t ordinal3; ++ }; ++ ++ uint32_t mqd_addr_lo; ++ uint32_t mqd_addr_hi; ++ uint32_t wptr_addr_lo; ++ uint32_t wptr_addr_hi; ++ ++ } mes_map_queues_ordinals[1]; /* 1..N of these ordinal groups */ ++ ++}; ++#endif ++ ++/*--------------------MES_QUERY_STATUS--------------------*/ ++ ++#ifndef PM4_MES_QUERY_STATUS_DEFINED ++#define PM4_MES_QUERY_STATUS_DEFINED ++enum query_status_interrupt_sel_enum { ++ interrupt_sel__mes_query_status__completion_status = 0, ++ interrupt_sel__mes_query_status__process_status = 1, ++ interrupt_sel__mes_query_status__queue_status = 2 ++}; ++ ++enum query_status_command_enum { ++ command__mes_query_status__interrupt_only = 0, ++ command__mes_query_status__fence_only_immediate = 1, ++ command__mes_query_status__fence_only_after_write_ack = 2, ++ command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 ++}; ++ ++enum query_status_engine_sel_enum { ++ engine_sel__mes_query_status__compute = 0, ++ engine_sel__mes_query_status__sdma0_queue = 2, ++ engine_sel__mes_query_status__sdma1_queue = 3 ++}; ++ ++struct pm4_query_status { ++ union { ++ union PM4_MES_TYPE_3_HEADER header; /* header */ ++ uint32_t ordinal1; ++ }; ++ + union { + struct { +- uint32_t page_table_base:28; +- uint32_t reserved2:4; +- } bitfields3; ++ uint32_t context_id:28; ++ enum query_status_interrupt_sel_enum interrupt_sel:2; ++ enum query_status_command_enum command:2; ++ } bitfields2; ++ uint32_t ordinal2; ++ }; ++ ++ union { ++ struct { ++ uint32_t pasid:16; ++ uint32_t reserved1:16; ++ } bitfields3a; ++ struct { ++ uint32_t reserved2:2; ++ uint32_t doorbell_offset:21; ++ uint32_t reserved3:3; ++ enum query_status_engine_sel_enum engine_sel:3; ++ uint32_t reserved4:3; ++ } bitfields3b; + uint32_t ordinal3; + }; + +- uint32_t reserved3; +- uint32_t sh_mem_bases; +- uint32_t sh_mem_config; +- uint32_t sh_mem_ape1_base; +- uint32_t sh_mem_ape1_limit; +- uint32_t sh_hidden_private_base_vmid; +- uint32_t reserved4; +- uint32_t reserved5; +- uint32_t gds_addr_lo; +- uint32_t gds_addr_hi; ++ uint32_t addr_lo; ++ uint32_t addr_hi; ++ uint32_t data_lo; ++ uint32_t data_hi; ++}; ++#endif ++ ++/*--------------------MES_UNMAP_QUEUES--------------------*/ ++ ++#ifndef PM4_MES_UNMAP_QUEUES_DEFINED ++#define PM4_MES_UNMAP_QUEUES_DEFINED ++enum unmap_queues_action_enum { ++ action__mes_unmap_queues__preempt_queues = 0, ++ action__mes_unmap_queues__reset_queues = 1, ++ action__mes_unmap_queues__disable_process_queues = 2 ++}; ++ ++enum unmap_queues_queue_sel_enum { ++ queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, ++ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, ++ queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2, ++ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3 ++}; ++ ++enum unmap_queues_engine_sel_enum { ++ engine_sel__mes_unmap_queues__compute = 0, ++ engine_sel__mes_unmap_queues__sdma0 = 2, ++ engine_sel__mes_unmap_queues__sdma1 = 3 ++}; ++ ++struct pm4_unmap_queues { ++ union { ++ union PM4_MES_TYPE_3_HEADER header; /* header */ ++ uint32_t ordinal1; ++ }; ++ ++ union { ++ struct { ++ enum unmap_queues_action_enum action:2; ++ uint32_t reserved1:2; ++ enum unmap_queues_queue_sel_enum queue_sel:2; ++ uint32_t reserved2:20; ++ enum unmap_queues_engine_sel_enum engine_sel:3; ++ uint32_t num_queues:3; ++ } bitfields2; ++ uint32_t ordinal2; ++ }; ++ ++ union { ++ struct { ++ uint32_t pasid:16; ++ uint32_t reserved3:16; ++ } bitfields3a; ++ struct { ++ uint32_t reserved4:2; ++ uint32_t doorbell_offset0:21; ++ uint32_t reserved5:9; ++ } bitfields3b; ++ uint32_t ordinal3; ++ }; + + union { + struct { +- uint32_t num_gws:6; + uint32_t reserved6:2; +- uint32_t num_oac:4; +- uint32_t reserved7:4; +- uint32_t gds_size:6; +- uint32_t num_queues:10; +- } bitfields14; +- uint32_t ordinal14; ++ uint32_t doorbell_offset1:21; ++ uint32_t reserved7:9; ++ } bitfields4; ++ uint32_t ordinal4; ++ }; ++ ++ union { ++ struct { ++ uint32_t reserved8:2; ++ uint32_t doorbell_offset2:21; ++ uint32_t reserved9:9; ++ } bitfields5; ++ uint32_t ordinal5; ++ }; ++ ++ union { ++ struct { ++ uint32_t reserved10:2; ++ uint32_t doorbell_offset3:21; ++ uint32_t reserved11:9; ++ } bitfields6; ++ uint32_t ordinal6; + }; + +- uint32_t completion_signal_lo32; +-uint32_t completion_signal_hi32; + }; + #endif + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +index 7c8d9b3..08c7219 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +@@ -30,12 +30,10 @@ union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1 : 8; /* < reserved */ + uint32_t opcode : 8; /* < IT opcode */ +- uint32_t count : 14;/* < Number of DWORDS - 1 in the +- * information body +- */ +- uint32_t type : 2; /* < packet identifier +- * It should be 3 for type 3 packets +- */ ++ uint32_t count : 14;/* < number of DWORDs - 1 in the ++ information body. */ ++ uint32_t type : 2; /* < packet identifier. ++ It should be 3 for type 3 packets */ + }; + uint32_t u32All; + }; +@@ -126,10 +124,9 @@ struct pm4_mes_runlist { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; +- uint32_t reserved2:1; ++ uint32_t reserved3:1; + uint32_t valid:1; +- uint32_t process_cnt:4; +- uint32_t reserved3:4; ++ uint32_t reserved4:8; + } bitfields4; + uint32_t ordinal4; + }; +@@ -144,8 +141,8 @@ struct pm4_mes_runlist { + + struct pm4_mes_map_process { + union { +- union PM4_MES_TYPE_3_HEADER header; /* header */ +- uint32_t ordinal1; ++ union PM4_MES_TYPE_3_HEADER header; /* header */ ++ uint32_t ordinal1; + }; + + union { +@@ -156,48 +153,36 @@ struct pm4_mes_map_process { + uint32_t process_quantum:7; + } bitfields2; + uint32_t ordinal2; +- }; ++}; + + union { + struct { + uint32_t page_table_base:28; +- uint32_t reserved3:4; ++ uint32_t reserved2:4; + } bitfields3; + uint32_t ordinal3; + }; + +- uint32_t reserved; +- + uint32_t sh_mem_bases; +- uint32_t sh_mem_config; + uint32_t sh_mem_ape1_base; + uint32_t sh_mem_ape1_limit; +- +- uint32_t sh_hidden_private_base_vmid; +- +- uint32_t reserved2; +- uint32_t reserved3; +- ++ uint32_t sh_mem_config; + uint32_t gds_addr_lo; + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:6; +- uint32_t reserved4:2; ++ uint32_t reserved3:2; + uint32_t num_oac:4; +- uint32_t reserved5:4; ++ uint32_t reserved4:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields10; + uint32_t ordinal10; + }; + +- uint32_t completion_signal_lo; +- uint32_t completion_signal_hi; +- + }; +- + #endif + + /*--------------------MES_MAP_QUEUES--------------------*/ +@@ -350,7 +335,7 @@ enum mes_unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__sdmal = 3 + }; + +-struct pm4_mes_unmap_queues { ++struct PM4_MES_UNMAP_QUEUES { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; +@@ -410,101 +395,4 @@ struct pm4_mes_unmap_queues { + }; + #endif + +-#ifndef PM4_MEC_RELEASE_MEM_DEFINED +-#define PM4_MEC_RELEASE_MEM_DEFINED +-enum RELEASE_MEM_event_index_enum { +- event_index___release_mem__end_of_pipe = 5, +- event_index___release_mem__shader_done = 6 +-}; +- +-enum RELEASE_MEM_cache_policy_enum { +- cache_policy___release_mem__lru = 0, +- cache_policy___release_mem__stream = 1, +- cache_policy___release_mem__bypass = 2 +-}; +- +-enum RELEASE_MEM_dst_sel_enum { +- dst_sel___release_mem__memory_controller = 0, +- dst_sel___release_mem__tc_l2 = 1, +- dst_sel___release_mem__queue_write_pointer_register = 2, +- dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 +-}; +- +-enum RELEASE_MEM_int_sel_enum { +- int_sel___release_mem__none = 0, +- int_sel___release_mem__send_interrupt_only = 1, +- int_sel___release_mem__send_interrupt_after_write_confirm = 2, +- int_sel___release_mem__send_data_after_write_confirm = 3 +-}; +- +-enum RELEASE_MEM_data_sel_enum { +- data_sel___release_mem__none = 0, +- data_sel___release_mem__send_32_bit_low = 1, +- data_sel___release_mem__send_64_bit_data = 2, +- data_sel___release_mem__send_gpu_clock_counter = 3, +- data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, +- data_sel___release_mem__store_gds_data_to_memory = 5 +-}; +- +-struct pm4_mec_release_mem { +- union { +- union PM4_MES_TYPE_3_HEADER header; /*header */ +- unsigned int ordinal1; +- }; +- +- union { +- struct { +- unsigned int event_type:6; +- unsigned int reserved1:2; +- enum RELEASE_MEM_event_index_enum event_index:4; +- unsigned int tcl1_vol_action_ena:1; +- unsigned int tc_vol_action_ena:1; +- unsigned int reserved2:1; +- unsigned int tc_wb_action_ena:1; +- unsigned int tcl1_action_ena:1; +- unsigned int tc_action_ena:1; +- unsigned int reserved3:6; +- unsigned int atc:1; +- enum RELEASE_MEM_cache_policy_enum cache_policy:2; +- unsigned int reserved4:5; +- } bitfields2; +- unsigned int ordinal2; +- }; +- +- union { +- struct { +- unsigned int reserved5:16; +- enum RELEASE_MEM_dst_sel_enum dst_sel:2; +- unsigned int reserved6:6; +- enum RELEASE_MEM_int_sel_enum int_sel:3; +- unsigned int reserved7:2; +- enum RELEASE_MEM_data_sel_enum data_sel:3; +- } bitfields3; +- unsigned int ordinal3; +- }; +- +- union { +- struct { +- unsigned int reserved8:2; +- unsigned int address_lo_32b:30; +- } bitfields4; +- struct { +- unsigned int reserved9:3; +- unsigned int address_lo_64b:29; +- } bitfields5; +- unsigned int ordinal4; +- }; +- +- unsigned int address_hi; +- +- unsigned int data_lo; +- +- unsigned int data_hi; +-}; +-#endif +- +-enum { +- CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 +-}; +- + #endif +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index b397ec7..4750cab 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -239,6 +239,11 @@ enum kfd_preempt_type_filter { + KFD_PREEMPT_TYPE_FILTER_BY_PASID + }; + ++enum kfd_preempt_type { ++ KFD_PREEMPT_TYPE_WAVEFRONT, ++ KFD_PREEMPT_TYPE_WAVEFRONT_RESET ++}; ++ + /** + * enum kfd_queue_type + * +@@ -289,13 +294,13 @@ enum kfd_queue_format { + * @write_ptr: Defines the number of dwords written to the ring buffer. + * + * @doorbell_ptr: This field aim is to notify the H/W of new packet written to +- * the queue ring buffer. This field should be similar to write_ptr and the +- * user should update this field after he updated the write_ptr. ++ * the queue ring buffer. This field should be similar to write_ptr and the user ++ * should update this field after he updated the write_ptr. + * + * @doorbell_off: The doorbell offset in the doorbell pci-bar. + * +- * @is_interop: Defines if this is a interop queue. Interop queue means that +- * the queue can access both graphics and compute resources. ++ * @is_interop: Defines if this is a interop queue. Interop queue means that the ++ * queue can access both graphics and compute resources. + * + * @is_active: Defines if the queue is active or not. + * +@@ -347,10 +352,9 @@ struct queue_properties { + * @properties: The queue properties. + * + * @mec: Used only in no cp scheduling mode and identifies to micro engine id +- * that the queue should be execute on. ++ * that the queue should be execute on. + * +- * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe +- * id. ++ * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. + * + * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. + * +@@ -432,7 +436,6 @@ struct qcm_process_device { + uint32_t gds_size; + uint32_t num_gws; + uint32_t num_oac; +- uint32_t sh_hidden_private_base; + }; + + /* Data that is per-process-per device. */ +@@ -517,8 +520,8 @@ struct kfd_process { + struct mutex event_mutex; + /* All events in process hashed by ID, linked on kfd_event.events. */ + DECLARE_HASHTABLE(events, 4); +- /* struct slot_page_header.event_pages */ +- struct list_head signal_event_pages; ++ struct list_head signal_event_pages; /* struct slot_page_header. ++ event_pages */ + u32 next_nonsignal_event_id; + size_t signal_event_count; + }; +@@ -556,10 +559,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + struct kfd_process *p); + + /* Process device data iterator */ +-struct kfd_process_device *kfd_get_first_process_device_data( +- struct kfd_process *p); +-struct kfd_process_device *kfd_get_next_process_device_data( +- struct kfd_process *p, ++struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); ++struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, + struct kfd_process_device *pdd); + bool kfd_has_process_device_data(struct kfd_process *p); + +@@ -572,8 +573,7 @@ unsigned int kfd_pasid_alloc(void); + void kfd_pasid_free(unsigned int pasid); + + /* Doorbells */ +-int kfd_doorbell_init(struct kfd_dev *kfd); +-void kfd_doorbell_fini(struct kfd_dev *kfd); ++void kfd_doorbell_init(struct kfd_dev *kfd); + int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); + u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + unsigned int *doorbell_off); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index c74cf22..035bbc9 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -79,7 +79,9 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) + { + struct kfd_process *process; + +- if (!thread->mm) ++ BUG_ON(!kfd_process_wq); ++ ++ if (thread->mm == NULL) + return ERR_PTR(-EINVAL); + + /* Only the pthreads threading model is supported. */ +@@ -99,7 +101,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) + /* A prior open of /dev/kfd could have already created the process. */ + process = find_process(thread); + if (process) +- pr_debug("Process already found\n"); ++ pr_debug("kfd: process already found\n"); + + if (!process) + process = create_process(thread); +@@ -115,7 +117,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) + { + struct kfd_process *process; + +- if (!thread->mm) ++ if (thread->mm == NULL) + return ERR_PTR(-EINVAL); + + /* Only the pthreads threading model is supported. */ +@@ -200,8 +202,10 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) + struct kfd_process_release_work *work; + struct kfd_process *p; + ++ BUG_ON(!kfd_process_wq); ++ + p = container_of(rcu, struct kfd_process, rcu); +- WARN_ON(atomic_read(&p->mm->mm_count) <= 0); ++ BUG_ON(atomic_read(&p->mm->mm_count) <= 0); + + mmdrop(p->mm); + +@@ -225,8 +229,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, + * mmu_notifier srcu is read locked + */ + p = container_of(mn, struct kfd_process, mmu_notifier); +- if (WARN_ON(p->mm != mm)) +- return; ++ BUG_ON(p->mm != mm); + + mutex_lock(&kfd_processes_mutex); + hash_del_rcu(&p->kfd_processes); +@@ -247,7 +250,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, + kfd_dbgmgr_destroy(pdd->dev->dbgmgr); + + if (pdd->reset_wavefronts) { +- pr_warn("Resetting all wave fronts\n"); ++ pr_warn("amdkfd: Resetting all wave fronts\n"); + dbgdev_wave_reset_wavefronts(pdd->dev, p); + pdd->reset_wavefronts = false; + } +@@ -404,6 +407,8 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) + struct kfd_process *p; + struct kfd_process_device *pdd; + ++ BUG_ON(dev == NULL); ++ + /* + * Look for the process that matches the pasid. If there is no such + * process, we either released it in amdkfd's own notifier, or there +@@ -444,16 +449,14 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) + mutex_unlock(&p->mutex); + } + +-struct kfd_process_device *kfd_get_first_process_device_data( +- struct kfd_process *p) ++struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) + { + return list_first_entry(&p->per_device_data, + struct kfd_process_device, + per_device_list); + } + +-struct kfd_process_device *kfd_get_next_process_device_data( +- struct kfd_process *p, ++struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, + struct kfd_process_device *pdd) + { + if (list_is_last(&pdd->per_device_list, &p->per_device_data)) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +index 4142d63..97f7f7e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +@@ -32,9 +32,12 @@ static inline struct process_queue_node *get_queue_by_qid( + { + struct process_queue_node *pqn; + ++ BUG_ON(!pqm); ++ + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { +- if ((pqn->q && pqn->q->properties.queue_id == qid) || +- (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) ++ if (pqn->q && pqn->q->properties.queue_id == qid) ++ return pqn; ++ if (pqn->kq && pqn->kq->queue->properties.queue_id == qid) + return pqn; + } + +@@ -46,13 +49,17 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, + { + unsigned long found; + ++ BUG_ON(!pqm || !qid); ++ ++ pr_debug("kfd: in %s\n", __func__); ++ + found = find_first_zero_bit(pqm->queue_slot_bitmap, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + +- pr_debug("The new slot id %lu\n", found); ++ pr_debug("kfd: the new slot id %lu\n", found); + + if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { +- pr_info("Cannot open more queues for process with pasid %d\n", ++ pr_info("amdkfd: Can not open more queues for process with pasid %d\n", + pqm->process->pasid); + return -ENOMEM; + } +@@ -65,11 +72,13 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, + + int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) + { ++ BUG_ON(!pqm); ++ + INIT_LIST_HEAD(&pqm->queues); + pqm->queue_slot_bitmap = + kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + BITS_PER_BYTE), GFP_KERNEL); +- if (!pqm->queue_slot_bitmap) ++ if (pqm->queue_slot_bitmap == NULL) + return -ENOMEM; + pqm->process = p; + +@@ -81,6 +90,10 @@ void pqm_uninit(struct process_queue_manager *pqm) + int retval; + struct process_queue_node *pqn, *next; + ++ BUG_ON(!pqm); ++ ++ pr_debug("In func %s\n", __func__); ++ + list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { + retval = pqm_destroy_queue( + pqm, +@@ -89,7 +102,7 @@ void pqm_uninit(struct process_queue_manager *pqm) + pqn->kq->queue->properties.queue_id); + + if (retval != 0) { +- pr_err("failed to destroy queue\n"); ++ pr_err("kfd: failed to destroy queue\n"); + return; + } + } +@@ -104,6 +117,8 @@ static int create_cp_queue(struct process_queue_manager *pqm, + { + int retval; + ++ retval = 0; ++ + /* Doorbell initialized in user space*/ + q_properties->doorbell_ptr = NULL; + +@@ -116,13 +131,16 @@ static int create_cp_queue(struct process_queue_manager *pqm, + + retval = init_queue(q, q_properties); + if (retval != 0) +- return retval; ++ goto err_init_queue; + + (*q)->device = dev; + (*q)->process = pqm->process; + +- pr_debug("PQM After init queue"); ++ pr_debug("kfd: PQM After init queue"); ++ ++ return retval; + ++err_init_queue: + return retval; + } + +@@ -143,6 +161,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, + int num_queues = 0; + struct queue *cur; + ++ BUG_ON(!pqm || !dev || !properties || !qid); ++ + memset(&q_properties, 0, sizeof(struct queue_properties)); + memcpy(&q_properties, properties, sizeof(struct queue_properties)); + q = NULL; +@@ -165,7 +185,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + list_for_each_entry(cur, &pdd->qpd.queues_list, list) + num_queues++; + if (num_queues >= dev->device_info->max_no_of_hqd/2) +- return -ENOSPC; ++ return (-ENOSPC); + } + + retval = find_available_queue_slot(pqm, qid); +@@ -177,7 +197,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); + } + +- pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); ++ pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); + if (!pqn) { + retval = -ENOMEM; + goto err_allocate_pqn; +@@ -208,7 +228,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + ((dev->dqm->processes_count >= VMID_PER_DEVICE) || + (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { +- pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); ++ pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); + retval = -EPERM; + goto err_create_queue; + } +@@ -225,7 +245,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + break; + case KFD_QUEUE_TYPE_DIQ: + kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); +- if (!kq) { ++ if (kq == NULL) { + retval = -ENOMEM; + goto err_create_queue; + } +@@ -236,22 +256,22 @@ int pqm_create_queue(struct process_queue_manager *pqm, + kq, &pdd->qpd); + break; + default: +- WARN(1, "Invalid queue type %d", type); +- retval = -EINVAL; ++ BUG(); ++ break; + } + + if (retval != 0) { +- pr_err("DQM create queue failed\n"); ++ pr_debug("Error dqm create queue\n"); + goto err_create_queue; + } + +- pr_debug("PQM After DQM create queue\n"); ++ pr_debug("kfd: PQM After DQM create queue\n"); + + list_add(&pqn->process_queue_list, &pqm->queues); + + if (q) { + *properties = q->properties; +- pr_debug("PQM done creating queue\n"); ++ pr_debug("kfd: PQM done creating queue\n"); + print_queue_properties(properties); + } + +@@ -277,11 +297,14 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) + + dqm = NULL; + ++ BUG_ON(!pqm); + retval = 0; + ++ pr_debug("kfd: In Func %s\n", __func__); ++ + pqn = get_queue_by_qid(pqm, qid); +- if (!pqn) { +- pr_err("Queue id does not match any known queue\n"); ++ if (pqn == NULL) { ++ pr_err("kfd: queue id does not match any known queue\n"); + return -EINVAL; + } + +@@ -290,8 +313,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) + dev = pqn->kq->dev; + if (pqn->q) + dev = pqn->q->device; +- if (WARN_ON(!dev)) +- return -ENODEV; ++ BUG_ON(!dev); + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { +@@ -331,9 +353,12 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, + int retval; + struct process_queue_node *pqn; + ++ BUG_ON(!pqm); ++ + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { +- pr_debug("No queue %d exists for update operation\n", qid); ++ pr_debug("amdkfd: No queue %d exists for update operation\n", ++ qid); + return -EFAULT; + } + +@@ -356,6 +381,8 @@ struct kernel_queue *pqm_get_kernel_queue( + { + struct process_queue_node *pqn; + ++ BUG_ON(!pqm); ++ + pqn = get_queue_by_qid(pqm, qid); + if (pqn && pqn->kq) + return pqn->kq; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +index a5315d4..0ab1970 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +@@ -65,15 +65,17 @@ void print_queue(struct queue *q) + + int init_queue(struct queue **q, const struct queue_properties *properties) + { +- struct queue *tmp_q; ++ struct queue *tmp; + +- tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL); +- if (!tmp_q) ++ BUG_ON(!q); ++ ++ tmp = kzalloc(sizeof(struct queue), GFP_KERNEL); ++ if (!tmp) + return -ENOMEM; + +- memcpy(&tmp_q->properties, properties, sizeof(*properties)); ++ memcpy(&tmp->properties, properties, sizeof(struct queue_properties)); + +- *q = tmp_q; ++ *q = tmp; + return 0; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +index e0b78fd..8c6e47c 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +@@ -108,6 +108,9 @@ static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) + static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) + { ++ BUG_ON(!dev); ++ BUG_ON(!cu); ++ + dev->node_props.cpu_cores_count = cu->num_cpu_cores; + dev->node_props.cpu_core_id_base = cu->processor_id_low; + if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) +@@ -120,6 +123,9 @@ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, + static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) + { ++ BUG_ON(!dev); ++ BUG_ON(!cu); ++ + dev->node_props.simd_id_base = cu->processor_id_low; + dev->node_props.simd_count = cu->num_simd_cores; + dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; +@@ -142,6 +148,8 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) + struct kfd_topology_device *dev; + int i = 0; + ++ BUG_ON(!cu); ++ + pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", + cu->proximity_domain, cu->hsa_capability); + list_for_each_entry(dev, &topology_device_list, list) { +@@ -169,6 +177,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) + struct kfd_topology_device *dev; + int i = 0; + ++ BUG_ON(!mem); ++ + pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", + mem->promixity_domain); + list_for_each_entry(dev, &topology_device_list, list) { +@@ -213,6 +223,8 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) + struct kfd_topology_device *dev; + uint32_t id; + ++ BUG_ON(!cache); ++ + id = cache->processor_id_low; + + pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); +@@ -262,6 +274,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) + uint32_t id_from; + uint32_t id_to; + ++ BUG_ON(!iolink); ++ + id_from = iolink->proximity_domain_from; + id_to = iolink->proximity_domain_to; + +@@ -309,6 +323,8 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) + struct crat_subtype_iolink *iolink; + int ret = 0; + ++ BUG_ON(!sub_type_hdr); ++ + switch (sub_type_hdr->type) { + case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: + cu = (struct crat_subtype_computeunit *)sub_type_hdr; +@@ -352,6 +368,8 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) + struct kfd_cache_properties *cache; + struct kfd_iolink_properties *iolink; + ++ BUG_ON(!dev); ++ + list_del(&dev->list); + + while (dev->mem_props.next != &dev->mem_props) { +@@ -398,7 +416,7 @@ static struct kfd_topology_device *kfd_create_topology_device(void) + struct kfd_topology_device *dev; + + dev = kfd_alloc_struct(dev); +- if (!dev) { ++ if (dev == NULL) { + pr_err("No memory to allocate a topology device"); + return NULL; + } +@@ -657,7 +675,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, + dev->node_props.simd_count); + + if (dev->mem_bank_count < dev->node_props.mem_banks_count) { +- pr_info_once("mem_banks_count truncated from %d to %d\n", ++ pr_info_once("kfd: mem_banks_count truncated from %d to %d\n", + dev->node_props.mem_banks_count, + dev->mem_bank_count); + sysfs_show_32bit_prop(buffer, "mem_banks_count", +@@ -755,6 +773,8 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) + struct kfd_cache_properties *cache; + struct kfd_mem_properties *mem; + ++ BUG_ON(!dev); ++ + if (dev->kobj_iolink) { + list_for_each_entry(iolink, &dev->io_link_props, list) + if (iolink->kobj) { +@@ -809,12 +829,12 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, + int ret; + uint32_t i; + +- if (WARN_ON(dev->kobj_node)) +- return -EEXIST; ++ BUG_ON(!dev); + + /* + * Creating the sysfs folders + */ ++ BUG_ON(dev->kobj_node); + dev->kobj_node = kfd_alloc_struct(dev->kobj_node); + if (!dev->kobj_node) + return -ENOMEM; +@@ -947,7 +967,7 @@ static int kfd_topology_update_sysfs(void) + int ret; + + pr_info("Creating topology SYSFS entries\n"); +- if (!sys_props.kobj_topology) { ++ if (sys_props.kobj_topology == NULL) { + sys_props.kobj_topology = + kfd_alloc_struct(sys_props.kobj_topology); + if (!sys_props.kobj_topology) +@@ -1107,8 +1127,10 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) + struct kfd_topology_device *dev; + struct kfd_topology_device *out_dev = NULL; + ++ BUG_ON(!gpu); ++ + list_for_each_entry(dev, &topology_device_list, list) +- if (!dev->gpu && (dev->node_props.simd_count > 0)) { ++ if (dev->gpu == NULL && dev->node_props.simd_count > 0) { + dev->gpu = gpu; + out_dev = dev; + break; +@@ -1131,9 +1153,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + struct kfd_topology_device *dev; + int res; + ++ BUG_ON(!gpu); ++ + gpu_id = kfd_generate_gpu_id(gpu); + +- pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); ++ pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + + down_write(&topology_lock); + /* +@@ -1156,8 +1180,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + * GPU vBIOS + */ + +- /* Update the SYSFS tree, since we added another topology +- * device ++ /* ++ * Update the SYSFS tree, since we added another topology device + */ + if (kfd_topology_update_sysfs() < 0) + kfd_topology_release_sysfs(); +@@ -1176,7 +1200,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + + if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { + dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; +- pr_info("Adding doorbell packet type capability\n"); ++ pr_info("amdkfd: adding doorbell packet type capability\n"); + } + + res = 0; +@@ -1196,6 +1220,8 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) + uint32_t gpu_id; + int res = -ENODEV; + ++ BUG_ON(!gpu); ++ + down_write(&topology_lock); + + list_for_each_entry(dev, &topology_device_list, list) +diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +index 94277cb..36f3766 100644 +--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h ++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +@@ -41,11 +41,6 @@ struct kgd_dev; + + struct kgd_mem; + +-enum kfd_preempt_type { +- KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, +- KFD_PREEMPT_TYPE_WAVEFRONT_RESET, +-}; +- + enum kgd_memory_pool { + KGD_POOL_SYSTEM_CACHEABLE = 1, + KGD_POOL_SYSTEM_WRITECOMBINE = 2, +@@ -87,17 +82,6 @@ struct kgd2kfd_shared_resources { + size_t doorbell_start_offset; + }; + +-struct tile_config { +- uint32_t *tile_config_ptr; +- uint32_t *macro_tile_config_ptr; +- uint32_t num_tile_configs; +- uint32_t num_macro_tile_configs; +- +- uint32_t gb_addr_config; +- uint32_t num_banks; +- uint32_t num_ranks; +-}; +- + /** + * struct kfd2kgd_calls + * +@@ -139,11 +123,6 @@ struct tile_config { + * + * @get_fw_version: Returns FW versions from the header + * +- * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID. +- * Only used for no cp scheduling mode +- * +- * @get_tile_config: Returns GPU-specific tiling mode information +- * + * This structure contains function pointers to services that the kgd driver + * provides to amdkfd driver. + * +@@ -174,16 +153,14 @@ struct kfd2kgd_calls { + int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); + + int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, +- uint32_t wptr_shift, uint32_t wptr_mask, +- struct mm_struct *mm); ++ uint32_t queue_id, uint32_t __user *wptr); + + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); + + bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + +- int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, ++ int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); + +@@ -215,9 +192,6 @@ struct kfd2kgd_calls { + + uint16_t (*get_fw_version)(struct kgd_dev *kgd, + enum kgd_engine_type type); +- void (*set_scratch_backing_va)(struct kgd_dev *kgd, +- uint64_t va, uint32_t vmid); +- int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); + }; + + /** +diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c +index f6578c9..a2ab6dc 100644 +--- a/drivers/gpu/drm/radeon/radeon_kfd.c ++++ b/drivers/gpu/drm/radeon/radeon_kfd.c +@@ -75,14 +75,12 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, +- uint32_t wptr_shift, uint32_t wptr_mask, +- struct mm_struct *mm); ++ uint32_t queue_id, uint32_t __user *wptr); + static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); + static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, ++static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); + static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +@@ -484,9 +482,7 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) + } + + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +- uint32_t queue_id, uint32_t __user *wptr, +- uint32_t wptr_shift, uint32_t wptr_mask, +- struct mm_struct *mm) ++ uint32_t queue_id, uint32_t __user *wptr) + { + uint32_t wptr_shadow, is_wptr_shadow_valid; + struct cik_mqd *m; +@@ -640,7 +636,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) + return false; + } + +-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, ++static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) + { +@@ -789,8 +785,7 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) + { +- return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset] +- / 4; ++ return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; + } + + static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) +diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h +index f7015aa..5bb2b45 100644 +--- a/include/uapi/linux/kfd_ioctl.h ++++ b/include/uapi/linux/kfd_ioctl.h +@@ -232,34 +232,6 @@ struct kfd_ioctl_wait_events_args { + __u32 wait_result; /* from KFD */ + }; + +-struct kfd_ioctl_set_scratch_backing_va_args { +- __u64 va_addr; /* to KFD */ +- __u32 gpu_id; /* to KFD */ +- __u32 pad; +-}; +- +-struct kfd_ioctl_get_tile_config_args { +- /* to KFD: pointer to tile array */ +- __u64 tile_config_ptr; +- /* to KFD: pointer to macro tile array */ +- __u64 macro_tile_config_ptr; +- /* to KFD: array size allocated by user mode +- * from KFD: array size filled by kernel +- */ +- __u32 num_tile_configs; +- /* to KFD: array size allocated by user mode +- * from KFD: array size filled by kernel +- */ +- __u32 num_macro_tile_configs; +- +- __u32 gpu_id; /* to KFD */ +- __u32 gb_addr_config; /* from KFD */ +- __u32 num_banks; /* from KFD */ +- __u32 num_ranks; /* from KFD */ +- /* struct size can be extended later if needed +- * without breaking ABI compatibility +- */ +-}; + + #define AMDKFD_IOCTL_BASE 'K' + #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) +@@ -315,13 +287,7 @@ struct kfd_ioctl_get_tile_config_args { + #define AMDKFD_IOC_DBG_WAVE_CONTROL \ + AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) + +-#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ +- AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) +- +-#define AMDKFD_IOC_GET_TILE_CONFIG \ +- AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) +- + #define AMDKFD_COMMAND_START 0x01 +-#define AMDKFD_COMMAND_END 0x13 ++#define AMDKFD_COMMAND_END 0x11 + + #endif +-- +2.7.4 + |