aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch322
1 files changed, 322 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch
new file mode 100644
index 00000000..60c5babc
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4383-drm-amdkfd-Add-kfd-debugger-support-for-gfx10.patch
@@ -0,0 +1,322 @@
+From 600b8f93502e13d7600caa9ae38edf398c42f566 Mon Sep 17 00:00:00 2001
+From: Philip Cox <Philip.Cox@amd.com>
+Date: Thu, 5 Sep 2019 09:08:57 -0400
+Subject: [PATCH 4383/4736] drm/amdkfd: Add kfd debugger support for gfx10
+
+Adding code to the gfx10 code path to support the kfd debugger
+functionality.
+
+Change-Id: Ifc822fa877ffdabb7b8e3ad167515aaaddbc6e98
+Signed-off-by: Philip Cox <Philip.Cox@amd.com>
+---
+ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 147 ++++++++++++++++++
+ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 ++
+ .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 72 +++++++--
+ 3 files changed, 219 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+index 5eb289e887b3..d8fc3ba71628 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+@@ -813,6 +813,147 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+ }
+
++uint32_t kgd_gfx_v10_enable_debug_trap(struct kgd_dev *kgd,
++ uint32_t trap_debug_wave_launch_mode,
++ uint32_t vmid)
++{
++ struct amdgpu_device *adev = get_amdgpu_device(kgd);
++ uint32_t data = 0;
++ uint32_t orig_wave_cntl_value;
++ uint32_t orig_stall_vmid;
++
++ mutex_lock(&adev->grbm_idx_mutex);
++
++ orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
++ 0,
++ mmSPI_GDBG_WAVE_CNTL));
++ orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
++ SPI_GDBG_WAVE_CNTL,
++ STALL_VMID);
++
++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
++
++ data = 0;
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
++
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
++
++ mutex_unlock(&adev->grbm_idx_mutex);
++
++ return 0;
++}
++
++uint32_t kgd_gfx_v10_disable_debug_trap(struct kgd_dev *kgd)
++{
++ struct amdgpu_device *adev = get_amdgpu_device(kgd);
++
++ mutex_lock(&adev->grbm_idx_mutex);
++
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
++
++ mutex_unlock(&adev->grbm_idx_mutex);
++
++ return 0;
++}
++
++uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct kgd_dev *kgd,
++ uint32_t trap_override,
++ uint32_t trap_mask)
++{
++ struct amdgpu_device *adev = get_amdgpu_device(kgd);
++ uint32_t data = 0;
++
++ mutex_lock(&adev->grbm_idx_mutex);
++
++ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
++
++ data = 0;
++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
++ EXCP_EN, trap_mask);
++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
++ REPLACE, trap_override);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
++
++ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
++
++ mutex_unlock(&adev->grbm_idx_mutex);
++
++ return 0;
++}
++
++uint32_t kgd_gfx_v10_set_wave_launch_mode(struct kgd_dev *kgd,
++ uint8_t wave_launch_mode,
++ uint32_t vmid)
++{
++ struct amdgpu_device *adev = get_amdgpu_device(kgd);
++ uint32_t data = 0;
++ bool is_stall_mode;
++ bool is_mode_set;
++
++ is_stall_mode = (wave_launch_mode == 4);
++ is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
++
++ mutex_lock(&adev->grbm_idx_mutex);
++
++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
++ VMID_MASK, is_mode_set ? 1 << vmid : 0);
++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
++ MODE, is_mode_set ? wave_launch_mode : 0);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
++
++ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
++ STALL_VMID, is_stall_mode ? 1 << vmid : 0);
++ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
++ STALL_RA, is_stall_mode ? 1 : 0);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
++
++ mutex_unlock(&adev->grbm_idx_mutex);
++
++ return 0;
++}
++
++/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
++ * The values read are:
++ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
++ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
++ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
++ * gws_wait_time -- Wait Count for Global Wave Syncs.
++ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
++ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
++ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
++ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
++ */
++void kgd_gfx_v10_get_iq_wait_times(struct kgd_dev *kgd,
++ uint32_t *wait_times)
++
++{
++ struct amdgpu_device *adev = get_amdgpu_device(kgd);
++
++ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
++}
++
++void kgd_gfx_v10_build_grace_period_packet_info(struct kgd_dev *kgd,
++ uint32_t wait_times,
++ uint32_t grace_period,
++ uint32_t *reg_offset,
++ uint32_t *reg_data)
++{
++ *reg_data = wait_times;
++
++ *reg_data = REG_SET_FIELD(*reg_data,
++ CP_IQ_WAIT_TIME2,
++ SCH_WAVE,
++ grace_period);
++
++ *reg_offset = mmCP_IQ_WAIT_TIME2;
++}
++
+ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+@@ -836,4 +977,10 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .get_hive_id = amdgpu_amdkfd_get_hive_id,
++ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
++ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
++ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
++ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
++ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
++ .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+index c7a6f98bf6b8..e4b4f4b09329 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+@@ -1597,6 +1597,8 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
+ {
+ int i;
+ uint32_t sh_mem_bases;
++ uint32_t trap_config_vmid_mask = 0;
++ uint32_t data;
+
+ /*
+ * Configure apertures:
+@@ -1612,9 +1614,23 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
++
++ /* Calculate trap config vmid mask */
++ trap_config_vmid_mask |= (1 << i);
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
++
++ data = 0;
++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
++ VMID_SEL, trap_config_vmid_mask);
++ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
++ TRAP_EN, 1);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
++
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
++ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+ }
+
+ static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+index efc6c37ec37e..7a695b9a2bcd 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+@@ -73,6 +73,7 @@ static int pm_map_process_v10(struct packet_manager *pm,
+ {
+ struct pm4_mes_map_process *packet;
+ uint64_t vm_page_table_base_addr = qpd->page_table_base;
++ struct kfd_dev *kfd = pm->dqm->dev;
+
+ packet = (struct pm4_mes_map_process *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+@@ -89,6 +90,11 @@ static int pm_map_process_v10(struct packet_manager *pm,
+
+ packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
++ if (kfd->dqm->trap_debug_vmid) {
++ packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid;
++ packet->bitfields2.new_debug = 1;
++ }
++
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ if (qpd->tba_addr) {
+@@ -206,6 +212,40 @@ static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer,
+ return 0;
+ }
+
++static int pm_set_grace_period_v10(struct packet_manager *pm,
++ uint32_t *buffer,
++ uint32_t grace_period)
++{
++ struct pm4_mec_write_data_mmio *packet;
++ uint32_t reg_offset = 0;
++ uint32_t reg_data = 0;
++
++ pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
++ pm->dqm->dev->kgd,
++ pm->dqm->wait_times,
++ grace_period,
++ &reg_offset,
++ &reg_data);
++
++ if (grace_period == USE_DEFAULT_GRACE_PERIOD)
++ reg_data = pm->dqm->wait_times;
++
++ packet = (struct pm4_mec_write_data_mmio *)buffer;
++ memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
++
++ packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA,
++ sizeof(struct pm4_mec_write_data_mmio));
++
++ packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register;
++ packet->bitfields2.addr_incr =
++ addr_incr___write_data__do_not_increment_address;
++
++ packet->bitfields3.dst_mmreg_addr = reg_offset;
++
++ packet->data = reg_data;
++
++ return 0;
++}
+ static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter filter,
+@@ -330,21 +370,21 @@ static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
+ }
+
+ const struct packet_manager_funcs kfd_v10_pm_funcs = {
+- .map_process = pm_map_process_v10,
+- .runlist = pm_runlist_v10,
+- .set_resources = pm_set_resources_vi,
+- .map_queues = pm_map_queues_v10,
+- .unmap_queues = pm_unmap_queues_v10,
+- .set_grace_period = NULL,
+- .query_status = pm_query_status_v10,
+- .release_mem = pm_release_mem_v10,
+- .map_process_size = sizeof(struct pm4_mes_map_process),
+- .runlist_size = sizeof(struct pm4_mes_runlist),
+- .set_resources_size = sizeof(struct pm4_mes_set_resources),
+- .map_queues_size = sizeof(struct pm4_mes_map_queues),
+- .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+- .set_grace_period_size = 0,
+- .query_status_size = sizeof(struct pm4_mes_query_status),
+- .release_mem_size = sizeof(struct pm4_mec_release_mem)
++ .map_process = pm_map_process_v10,
++ .runlist = pm_runlist_v10,
++ .set_resources = pm_set_resources_vi,
++ .map_queues = pm_map_queues_v10,
++ .unmap_queues = pm_unmap_queues_v10,
++ .set_grace_period = pm_set_grace_period_v10,
++ .query_status = pm_query_status_v10,
++ .release_mem = pm_release_mem_v10,
++ .map_process_size = sizeof(struct pm4_mes_map_process),
++ .runlist_size = sizeof(struct pm4_mes_runlist),
++ .set_resources_size = sizeof(struct pm4_mes_set_resources),
++ .map_queues_size = sizeof(struct pm4_mes_map_queues),
++ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
++ .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
++ .query_status_size = sizeof(struct pm4_mes_query_status),
++ .release_mem_size = sizeof(struct pm4_mec_release_mem)
+ };
+
+--
+2.17.1
+