From 17dd5f8421374886d216a736f68211f68db08a02 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 6 Oct 2016 20:01:20 -0500 Subject: [PATCH 1200/4131] drm/amdkfd: Enable user trap handler on non-CWSR ASICs AMDKFD_IOC_SET_TRAP_HANDLER requires a context-save trap handler to be installed, from which a jump to the user-provided trap handler can be initiated. We would like to have this feature on ASICs which do not support context-save/restore. When a context-save handler is not installed configure the trap handler registers to point directly to the user trap handler. Also remove the legacy microcode version check since the KFD relies on functionality present only in newer versions. Signed-off-by: Jay Cornwall --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 ----- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 25 +++++++++++++--------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +++++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 17 +++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 5 files changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a828572..61729eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -519,11 +519,6 @@ static int kfd_ioctl_set_trap_handler(struct file *filep, err = -ESRCH; goto out; } - if (!dev->cwsr_enabled || !pdd->qpd.cwsr_kaddr) { - pr_err("kfd: CWSR is not enabled, can't set trap handler.\n"); - err = -EINVAL; - goto out; - } if (dev->dqm->ops.set_trap_handler(dev->dqm, &pdd->qpd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index ba48aca..6bab9db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -43,7 +43,8 @@ static const struct kfd_device_info kaveri_device_info = { .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, - .is_need_iommu_device = true + .is_need_iommu_device = true, + .supports_cwsr = false, }; static const struct kfd_device_info hawaii_device_info = { @@ -55,7 +56,8 @@ static const struct kfd_device_info hawaii_device_info = { .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, - .is_need_iommu_device = false + .is_need_iommu_device = false, + .supports_cwsr = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -67,7 +69,8 @@ static const struct kfd_device_info carrizo_device_info = { .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, - .is_need_iommu_device = true + .is_need_iommu_device = true, + .supports_cwsr = true, }; static const struct kfd_device_info tonga_device_info = { @@ -78,7 +81,8 @@ static const struct kfd_device_info tonga_device_info = { .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, - .is_need_iommu_device = false + .is_need_iommu_device = false, + .supports_cwsr = false, }; static const struct kfd_device_info fiji_device_info = { @@ -89,7 +93,8 @@ static const struct kfd_device_info fiji_device_info = { .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, - .is_need_iommu_device = false + .is_need_iommu_device = false, + .supports_cwsr = true, }; static const struct kfd_device_info polaris10_device_info = { @@ -100,7 +105,8 @@ static const struct kfd_device_info polaris10_device_info = { .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, - .is_need_iommu_device = false + .is_need_iommu_device = false, + .supports_cwsr = true, }; static const struct kfd_device_info polaris11_device_info = { @@ -111,7 +117,8 @@ static const struct kfd_device_info polaris11_device_info = { .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, - .is_need_iommu_device = false + .is_need_iommu_device = false, + .supports_cwsr = true, }; struct kfd_deviceid { @@ -321,10 +328,8 @@ static int kfd_cwsr_init(struct kfd_dev *kfd) { /* * Initialize the CWSR required memory for TBA and TMA - * only support CWSR on VI and up with FW version >=625. */ - if (cwsr_enable && - (kfd->mec_fw_version >= KFD_CWSR_CZ_FW_VER)) { + if (cwsr_enable && kfd->device_info->supports_cwsr) { void *cwsr_addr = NULL; unsigned int size = sizeof(cwsr_trap_carrizo_hex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 3bc831f..349b8c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -210,6 +210,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, q->properties.queue_percent > 0 && q->properties.queue_address != 0); + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -1333,9 +1336,16 @@ static int set_trap_handler(struct device_queue_manager *dqm, { uint64_t *tma; - tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset); - tma[0] = tba_addr; - tma[1] = tma_addr; + if (dqm->dev->cwsr_enabled) { + /* Jump from CWSR trap handler to user trap */ + tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset); + tma[0] = tba_addr; + tma[1] = tma_addr; + } else { + qpd->tba_addr = tba_addr; + qpd->tma_addr = tma_addr; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 579cc68..b0ea0d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -142,10 +142,17 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_iq_rptr = 1; if (q->tba_addr) { - m->cp_hqd_persistent_state |= - (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8); + m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8); + m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8); + m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8); m->compute_pgm_rsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); m->cp_hqd_ctx_save_base_addr_lo = lower_32_bits(q->ctx_save_restore_area_address); m->cp_hqd_ctx_save_base_addr_hi = @@ -154,10 +161,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_cntl_stack_size = q->ctl_stack_size; m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; m->cp_hqd_wg_state_offset = q->ctl_stack_size; - m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8); - m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8); - m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8); - m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8); } *mqd = m; @@ -238,7 +241,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } - if (q->tba_addr) + if (mm->dev->cwsr_enabled) m->cp_hqd_ctx_save_control = atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ed7f9bc..c02bfa0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -188,6 +188,7 @@ struct kfd_device_info { uint8_t num_of_watch_points; uint16_t mqd_size_aligned; bool is_need_iommu_device; + bool supports_cwsr; }; struct kfd_mem_obj { -- 2.7.4