aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1200-drm-amdkfd-Enable-user-trap-handler-on-non-CWSR-ASIC.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1200-drm-amdkfd-Enable-user-trap-handler-on-non-CWSR-ASIC.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/1200-drm-amdkfd-Enable-user-trap-handler-on-non-CWSR-ASIC.patch221
1 files changed, 221 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1200-drm-amdkfd-Enable-user-trap-handler-on-non-CWSR-ASIC.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1200-drm-amdkfd-Enable-user-trap-handler-on-non-CWSR-ASIC.patch
new file mode 100644
index 00000000..389c2c9a
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1200-drm-amdkfd-Enable-user-trap-handler-on-non-CWSR-ASIC.patch
@@ -0,0 +1,221 @@
+From 17dd5f8421374886d216a736f68211f68db08a02 Mon Sep 17 00:00:00 2001
+From: Jay Cornwall <Jay.Cornwall@amd.com>
+Date: Thu, 6 Oct 2016 20:01:20 -0500
+Subject: [PATCH 1200/4131] drm/amdkfd: Enable user trap handler on non-CWSR
+ ASICs
+
+AMDKFD_IOC_SET_TRAP_HANDLER requires a context-save trap handler
+to be installed, from which a jump to the user-provided trap handler
+can be initiated. We would like to have this feature on ASICs which
+do not support context-save/restore.
+
+When a context-save handler is not installed configure the trap handler
+registers to point directly to the user trap handler.
+
+Also remove the legacy microcode version check since the KFD relies on
+functionality present only in newer versions.
+
+Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 -----
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 25 +++++++++++++---------
+ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +++++++++++---
+ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 17 +++++++++------
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
+ 5 files changed, 39 insertions(+), 25 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index a828572..61729eb 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -519,11 +519,6 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
+ err = -ESRCH;
+ goto out;
+ }
+- if (!dev->cwsr_enabled || !pdd->qpd.cwsr_kaddr) {
+- pr_err("kfd: CWSR is not enabled, can't set trap handler.\n");
+- err = -EINVAL;
+- goto out;
+- }
+
+ if (dev->dqm->ops.set_trap_handler(dev->dqm,
+ &pdd->qpd,
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index ba48aca..6bab9db 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -43,7 +43,8 @@ static const struct kfd_device_info kaveri_device_info = {
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+- .is_need_iommu_device = true
++ .is_need_iommu_device = true,
++ .supports_cwsr = false,
+ };
+
+ static const struct kfd_device_info hawaii_device_info = {
+@@ -55,7 +56,8 @@ static const struct kfd_device_info hawaii_device_info = {
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+- .is_need_iommu_device = false
++ .is_need_iommu_device = false,
++ .supports_cwsr = false,
+ };
+
+ static const struct kfd_device_info carrizo_device_info = {
+@@ -67,7 +69,8 @@ static const struct kfd_device_info carrizo_device_info = {
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+- .is_need_iommu_device = true
++ .is_need_iommu_device = true,
++ .supports_cwsr = true,
+ };
+
+ static const struct kfd_device_info tonga_device_info = {
+@@ -78,7 +81,8 @@ static const struct kfd_device_info tonga_device_info = {
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+- .is_need_iommu_device = false
++ .is_need_iommu_device = false,
++ .supports_cwsr = false,
+ };
+
+ static const struct kfd_device_info fiji_device_info = {
+@@ -89,7 +93,8 @@ static const struct kfd_device_info fiji_device_info = {
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+- .is_need_iommu_device = false
++ .is_need_iommu_device = false,
++ .supports_cwsr = true,
+ };
+
+ static const struct kfd_device_info polaris10_device_info = {
+@@ -100,7 +105,8 @@ static const struct kfd_device_info polaris10_device_info = {
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+- .is_need_iommu_device = false
++ .is_need_iommu_device = false,
++ .supports_cwsr = true,
+ };
+
+ static const struct kfd_device_info polaris11_device_info = {
+@@ -111,7 +117,8 @@ static const struct kfd_device_info polaris11_device_info = {
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+- .is_need_iommu_device = false
++ .is_need_iommu_device = false,
++ .supports_cwsr = true,
+ };
+
+ struct kfd_deviceid {
+@@ -321,10 +328,8 @@ static int kfd_cwsr_init(struct kfd_dev *kfd)
+ {
+ /*
+ * Initialize the CWSR required memory for TBA and TMA
+- * only support CWSR on VI and up with FW version >=625.
+ */
+- if (cwsr_enable &&
+- (kfd->mec_fw_version >= KFD_CWSR_CZ_FW_VER)) {
++ if (cwsr_enable && kfd->device_info->supports_cwsr) {
+ void *cwsr_addr = NULL;
+ unsigned int size = sizeof(cwsr_trap_carrizo_hex);
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 3bc831f..349b8c1 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -210,6 +210,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
+ q->properties.queue_percent > 0 &&
+ q->properties.queue_address != 0);
+
++ q->properties.tba_addr = qpd->tba_addr;
++ q->properties.tma_addr = qpd->tma_addr;
++
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+ retval = create_compute_queue_nocpsch(dqm, q, qpd);
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+@@ -1333,9 +1336,16 @@ static int set_trap_handler(struct device_queue_manager *dqm,
+ {
+ uint64_t *tma;
+
+- tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset);
+- tma[0] = tba_addr;
+- tma[1] = tma_addr;
++ if (dqm->dev->cwsr_enabled) {
++ /* Jump from CWSR trap handler to user trap */
++ tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset);
++ tma[0] = tba_addr;
++ tma[1] = tma_addr;
++ } else {
++ qpd->tba_addr = tba_addr;
++ qpd->tma_addr = tma_addr;
++ }
++
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+index 579cc68..b0ea0d2 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+@@ -142,10 +142,17 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
+ m->cp_hqd_iq_rptr = 1;
+
+ if (q->tba_addr) {
+- m->cp_hqd_persistent_state |=
+- (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
++ m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
++ m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
++ m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
++ m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
+ m->compute_pgm_rsrc2 |=
+ (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
++ }
++
++ if (mm->dev->cwsr_enabled) {
++ m->cp_hqd_persistent_state |=
++ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+@@ -154,10 +161,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+- m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
+- m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
+- m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
+- m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
+ }
+
+ *mqd = m;
+@@ -238,7 +241,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
+ }
+- if (q->tba_addr)
++ if (mm->dev->cwsr_enabled)
+ m->cp_hqd_ctx_save_control =
+ atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
+ mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index ed7f9bc..c02bfa0 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -188,6 +188,7 @@ struct kfd_device_info {
+ uint8_t num_of_watch_points;
+ uint16_t mqd_size_aligned;
+ bool is_need_iommu_device;
++ bool supports_cwsr;
+ };
+
+ struct kfd_mem_obj {
+--
+2.7.4
+