diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch new file mode 100644 index 00000000..60c5babc --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch @@ -0,0 +1,322 @@ +From 600b8f93502e13d7600caa9ae38edf398c42f566 Mon Sep 17 00:00:00 2001 +From: Philip Cox <Philip.Cox@amd.com> +Date: Thu, 5 Sep 2019 09:08:57 -0400 +Subject: [PATCH 4383/4736] drm/amdkfd: Add kfd debugger support for gfx10 + +Adding code to the gfx10 code path to support the kfd debugger +functionality. + +Change-Id: Ifc822fa877ffdabb7b8e3ad167515aaaddbc6e98 +Signed-off-by: Philip Cox <Philip.Cox@amd.com> +--- + .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 147 ++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 ++ + .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 72 +++++++-- + 3 files changed, 219 insertions(+), 16 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +index 5eb289e887b3..d8fc3ba71628 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +@@ -813,6 +813,147 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base); + } + ++uint32_t kgd_gfx_v10_enable_debug_trap(struct kgd_dev *kgd, ++ uint32_t trap_debug_wave_launch_mode, ++ uint32_t vmid) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ uint32_t data = 0; ++ uint32_t orig_wave_cntl_value; ++ uint32_t orig_stall_vmid; ++ ++ mutex_lock(&adev->grbm_idx_mutex); ++ ++ orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC, ++ 0, ++ mmSPI_GDBG_WAVE_CNTL)); ++ orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value, ++ SPI_GDBG_WAVE_CNTL, ++ STALL_VMID); ++ ++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); ++ ++ data = 0; ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); ++ ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid); ++ ++ mutex_unlock(&adev->grbm_idx_mutex); ++ ++ return 0; ++} ++ ++uint32_t kgd_gfx_v10_disable_debug_trap(struct kgd_dev *kgd) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ ++ mutex_lock(&adev->grbm_idx_mutex); ++ ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); ++ ++ mutex_unlock(&adev->grbm_idx_mutex); ++ ++ return 0; ++} ++ ++uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct kgd_dev *kgd, ++ uint32_t trap_override, ++ uint32_t trap_mask) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ uint32_t data = 0; ++ ++ mutex_lock(&adev->grbm_idx_mutex); ++ ++ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); ++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); ++ ++ data = 0; ++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, ++ EXCP_EN, trap_mask); ++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, ++ REPLACE, trap_override); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); ++ ++ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); ++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); ++ ++ mutex_unlock(&adev->grbm_idx_mutex); ++ ++ return 0; ++} ++ ++uint32_t kgd_gfx_v10_set_wave_launch_mode(struct kgd_dev *kgd, ++ uint8_t wave_launch_mode, ++ uint32_t vmid) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ uint32_t data = 0; ++ bool is_stall_mode; ++ bool is_mode_set; ++ ++ is_stall_mode = (wave_launch_mode == 4); ++ is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4); ++ ++ mutex_lock(&adev->grbm_idx_mutex); ++ ++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, ++ VMID_MASK, is_mode_set ? 1 << vmid : 0); ++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, ++ MODE, is_mode_set ? wave_launch_mode : 0); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); ++ ++ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); ++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, ++ STALL_VMID, is_stall_mode ? 1 << vmid : 0); ++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, ++ STALL_RA, is_stall_mode ? 1 : 0); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); ++ ++ mutex_unlock(&adev->grbm_idx_mutex); ++ ++ return 0; ++} ++ ++/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values ++ * The values read are: ++ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. ++ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. ++ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. ++ * gws_wait_time -- Wait Count for Global Wave Syncs. ++ * que_sleep_wait_time -- Wait Count for Dequeue Retry. ++ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. ++ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. ++ * deq_retry_wait_time -- Wait Count for Global Wave Syncs. ++ */ ++void kgd_gfx_v10_get_iq_wait_times(struct kgd_dev *kgd, ++ uint32_t *wait_times) ++ ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ ++ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); ++} ++ ++void kgd_gfx_v10_build_grace_period_packet_info(struct kgd_dev *kgd, ++ uint32_t wait_times, ++ uint32_t grace_period, ++ uint32_t *reg_offset, ++ uint32_t *reg_data) ++{ ++ *reg_data = wait_times; ++ ++ *reg_data = REG_SET_FIELD(*reg_data, ++ CP_IQ_WAIT_TIME2, ++ SCH_WAVE, ++ grace_period); ++ ++ *reg_offset = mmCP_IQ_WAIT_TIME2; ++} ++ + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, +@@ -836,4 +977,10 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, ++ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, ++ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, ++ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, ++ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, ++ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, ++ .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, + }; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +index c7a6f98bf6b8..e4b4f4b09329 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +@@ -1597,6 +1597,8 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) + { + int i; + uint32_t sh_mem_bases; ++ uint32_t trap_config_vmid_mask = 0; ++ uint32_t data; + + /* + * Configure apertures: +@@ -1612,9 +1614,23 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) + /* CP and shaders */ + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); ++ ++ /* Calculate trap config vmid mask */ ++ trap_config_vmid_mask |= (1 << i); + } + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); ++ ++ data = 0; ++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, ++ VMID_SEL, trap_config_vmid_mask); ++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, ++ TRAP_EN, 1); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); ++ ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); ++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + } + + static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c +index efc6c37ec37e..7a695b9a2bcd 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c +@@ -73,6 +73,7 @@ static int pm_map_process_v10(struct packet_manager *pm, + { + struct pm4_mes_map_process *packet; + uint64_t vm_page_table_base_addr = qpd->page_table_base; ++ struct kfd_dev *kfd = pm->dqm->dev; + + packet = (struct pm4_mes_map_process *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); +@@ -89,6 +90,11 @@ static int pm_map_process_v10(struct packet_manager *pm, + + packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + ++ if (kfd->dqm->trap_debug_vmid) { ++ packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid; ++ packet->bitfields2.new_debug = 1; ++ } ++ + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + if (qpd->tba_addr) { +@@ -206,6 +212,40 @@ static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + ++static int pm_set_grace_period_v10(struct packet_manager *pm, ++ uint32_t *buffer, ++ uint32_t grace_period) ++{ ++ struct pm4_mec_write_data_mmio *packet; ++ uint32_t reg_offset = 0; ++ uint32_t reg_data = 0; ++ ++ pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( ++ pm->dqm->dev->kgd, ++ pm->dqm->wait_times, ++ grace_period, ++ ®_offset, ++ ®_data); ++ ++ if (grace_period == USE_DEFAULT_GRACE_PERIOD) ++ reg_data = pm->dqm->wait_times; ++ ++ packet = (struct pm4_mec_write_data_mmio *)buffer; ++ memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); ++ ++ packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA, ++ sizeof(struct pm4_mec_write_data_mmio)); ++ ++ packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register; ++ packet->bitfields2.addr_incr = ++ addr_incr___write_data__do_not_increment_address; ++ ++ packet->bitfields3.dst_mmreg_addr = reg_offset; ++ ++ packet->data = reg_data; ++ ++ return 0; ++} + static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, +@@ -330,21 +370,21 @@ static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer) + } + + const struct packet_manager_funcs kfd_v10_pm_funcs = { +- .map_process = pm_map_process_v10, +- .runlist = pm_runlist_v10, +- .set_resources = pm_set_resources_vi, +- .map_queues = pm_map_queues_v10, +- .unmap_queues = pm_unmap_queues_v10, +- .set_grace_period = NULL, +- .query_status = pm_query_status_v10, +- .release_mem = pm_release_mem_v10, +- .map_process_size = sizeof(struct pm4_mes_map_process), +- .runlist_size = sizeof(struct pm4_mes_runlist), +- .set_resources_size = sizeof(struct pm4_mes_set_resources), +- .map_queues_size = sizeof(struct pm4_mes_map_queues), +- .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), +- .set_grace_period_size = 0, +- .query_status_size = sizeof(struct pm4_mes_query_status), +- .release_mem_size = sizeof(struct pm4_mec_release_mem) ++ .map_process = pm_map_process_v10, ++ .runlist = pm_runlist_v10, ++ .set_resources = pm_set_resources_vi, ++ .map_queues = pm_map_queues_v10, ++ .unmap_queues = pm_unmap_queues_v10, ++ .set_grace_period = pm_set_grace_period_v10, ++ .query_status = pm_query_status_v10, ++ .release_mem = pm_release_mem_v10, ++ .map_process_size = sizeof(struct pm4_mes_map_process), ++ .runlist_size = sizeof(struct pm4_mes_runlist), ++ .set_resources_size = sizeof(struct pm4_mes_set_resources), ++ .map_queues_size = sizeof(struct pm4_mes_map_queues), ++ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), ++ .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), ++ .query_status_size = sizeof(struct pm4_mes_query_status), ++ .release_mem_size = sizeof(struct pm4_mec_release_mem) + }; + +-- +2.17.1 + |