diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch | 603 |
1 files changed, 603 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch new file mode 100644 index 00000000..ec5a9d7a --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch @@ -0,0 +1,603 @@ +From 6308e3ee33ebb207403bc2ff7df9f89eb6becb1e Mon Sep 17 00:00:00 2001 +From: Oak Zeng <Oak.Zeng@amd.com> +Date: Thu, 7 Feb 2019 14:02:27 -0600 +Subject: [PATCH 2749/2940] drm/amdkfd: Introduce XGMI SDMA queue type + +Existing QUEUE_TYPE_SDMA means PCIe optimized SDMA queues. +Introduce a new QUEUE_TYPE_SDMA_XGMI, which is optimized +for non-PCIe transfer such as XGMI. + +Change-Id: I5fd53c4e243eb10e4ecc965de9f8da6a87acd2e8 +Signed-off-by: Oak Zeng <Oak.Zeng@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 + + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 15 +++ + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 124 +++++++++++++----- + .../drm/amd/amdkfd/kfd_device_queue_manager.h | 3 + + .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 2 + + .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 + + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +- + .../amd/amdkfd/kfd_process_queue_manager.c | 9 +- + include/uapi/linux/kfd_ioctl.h | 1 + + 10 files changed, 128 insertions(+), 37 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index a34c8cea1fb7..684f84f130a8 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -235,6 +235,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, + q_properties->type = KFD_QUEUE_TYPE_COMPUTE; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) + q_properties->type = KFD_QUEUE_TYPE_SDMA; ++ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) ++ q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; + else + return -ENOTSUPP; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 1350de675611..6b2e019b50f5 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = { + .needs_iommu_device = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = { + .needs_iommu_device = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = { + .needs_iommu_device = true, + .needs_pci_atomics = true, + .num_sdma_engines = 1, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + #endif +@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -218,6 +228,7 @@ static const struct kfd_device_info polaris12_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -234,6 +245,7 @@ static const struct kfd_device_info vega10_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -250,6 +262,7 @@ static const struct kfd_device_info vega10_vf_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -266,6 +279,7 @@ static const struct kfd_device_info vega12_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, + }; + +@@ -282,6 +296,7 @@ static const struct kfd_device_info vega20_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, ++ .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, + }; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 805bb0f0e591..4aa959bf81eb 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + + static void deallocate_sdma_queue(struct device_queue_manager *dqm, +- unsigned int sdma_queue_id); ++ struct queue *q); + + static void kfd_process_hw_exception(struct work_struct *work); + + static inline + enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) + { +- if (type == KFD_QUEUE_TYPE_SDMA) ++ if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) + return KFD_MQD_TYPE_SDMA; + return KFD_MQD_TYPE_CP; + } +@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) + return dqm->dev->device_info->num_sdma_engines; + } + ++static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) ++{ ++ return dqm->dev->device_info->num_xgmi_sdma_engines; ++} ++ + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) + { + return dqm->dev->device_info->num_sdma_engines + * dqm->dev->device_info->num_sdma_queues_per_engine; + } + ++unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) ++{ ++ return dqm->dev->device_info->num_xgmi_sdma_engines ++ * dqm->dev->device_info->num_sdma_queues_per_engine; ++} ++ + void program_sh_mem_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { +@@ -158,7 +169,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) + * preserve the user mode ABI. + */ + q->doorbell_id = q->properties.queue_id; +- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { ++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || ++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + /* For SDMA queues on SOC15 with 8-byte doorbell, use static + * doorbell assignments based on the engine and queue id. + * The doobell index distance between RLC (2*i) and (2*i+1) +@@ -199,7 +211,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family) || +- q->properties.type == KFD_QUEUE_TYPE_SDMA) ++ q->properties.type == KFD_QUEUE_TYPE_SDMA || ++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + return; + + old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); +@@ -314,7 +327,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, + + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + retval = create_compute_queue_nocpsch(dqm, q, qpd); +- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) ++ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || ++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + retval = create_sdma_queue_nocpsch(dqm, q, qpd); + else + retval = -EINVAL; +@@ -332,6 +346,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; ++ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) ++ dqm->xgmi_sdma_queue_count++; + + /* + * Unconditionally increment this counter, regardless of the queue's +@@ -455,7 +471,10 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, + deallocate_hqd(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + dqm->sdma_queue_count--; +- deallocate_sdma_queue(dqm, q->sdma_id); ++ deallocate_sdma_queue(dqm, q); ++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { ++ dqm->xgmi_sdma_queue_count--; ++ deallocate_sdma_queue(dqm, q); + } else { + pr_debug("q->properties.type %d is invalid\n", + q->properties.type); +@@ -546,7 +565,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + } + } else if (prev_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || +- q->properties.type == KFD_QUEUE_TYPE_SDMA)) { ++ q->properties.type == KFD_QUEUE_TYPE_SDMA || ++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); +@@ -573,7 +593,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + retval = map_queues_cpsch(dqm); + else if (q->properties.is_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || +- q->properties.type == KFD_QUEUE_TYPE_SDMA)) { ++ q->properties.type == KFD_QUEUE_TYPE_SDMA || ++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + if (WARN(q->process->mm != current->mm, + "should only run in user thread")) + retval = -EFAULT; +@@ -865,6 +886,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) + INIT_LIST_HEAD(&dqm->queues); + dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->sdma_queue_count = 0; ++ dqm->xgmi_sdma_queue_count = 0; + dqm->trap_debug_vmid = 0; + + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { +@@ -878,6 +900,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) + + dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; ++ dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; + + return 0; + } +@@ -912,18 +935,34 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, + { + int bit; + +- if (dqm->sdma_bitmap == 0) +- return -ENOMEM; +- +- bit = __ffs64(dqm->sdma_bitmap); +- +- dqm->sdma_bitmap &= ~(1ULL << bit); +- q->sdma_id = bit; +- +- q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); +- q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); ++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { ++ if (dqm->sdma_bitmap == 0) ++ return -ENOMEM; ++ bit = __ffs64(dqm->sdma_bitmap); ++ dqm->sdma_bitmap &= ~(1ULL << bit); ++ q->sdma_id = bit; ++ q->properties.sdma_engine_id = q->sdma_id % ++ get_num_sdma_engines(dqm); ++ q->properties.sdma_queue_id = q->sdma_id / ++ get_num_sdma_engines(dqm); ++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { ++ if (dqm->xgmi_sdma_bitmap == 0) ++ return -ENOMEM; ++ bit = __ffs64(dqm->xgmi_sdma_bitmap); ++ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); ++ q->sdma_id = bit; ++ /* sdma_engine_id is sdma id including ++ * both PCIe-optimized SDMAs and XGMI- ++ * optimized SDMAs. The calculation below ++ * assumes the first N engines are always ++ * PCIe-optimized ones ++ */ ++ q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + ++ q->sdma_id % get_num_xgmi_sdma_engines(dqm); ++ q->properties.sdma_queue_id = q->sdma_id / ++ get_num_xgmi_sdma_engines(dqm); ++ } + +- pr_debug("SDMA id is: %d\n", q->sdma_id); + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); + pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); + +@@ -931,11 +970,17 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, + } + + static void deallocate_sdma_queue(struct device_queue_manager *dqm, +- unsigned int sdma_id) ++ struct queue *q) + { +- if (sdma_id >= get_num_sdma_queues(dqm)) +- return; +- dqm->sdma_bitmap |= (1ULL << sdma_id); ++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { ++ if (q->sdma_id >= get_num_sdma_queues(dqm)) ++ return; ++ dqm->sdma_bitmap |= (1ULL << q->sdma_id); ++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { ++ if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) ++ return; ++ dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); ++ } + } + + static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, +@@ -973,7 +1018,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + out_deallocate_doorbell: + deallocate_doorbell(qpd, q); + out_deallocate_sdma_queue: +- deallocate_sdma_queue(dqm, q->sdma_id); ++ deallocate_sdma_queue(dqm, q); + + return retval; + } +@@ -1031,8 +1076,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm) + INIT_LIST_HEAD(&dqm->queues); + dqm->queue_count = dqm->processes_count = 0; + dqm->sdma_queue_count = 0; ++ dqm->xgmi_sdma_queue_count = 0; + dqm->active_runlist = false; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; ++ dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; + dqm->trap_debug_vmid = 0; + + INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); +@@ -1159,7 +1206,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + goto out_unlock; + } + +- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { ++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA || ++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + retval = allocate_sdma_queue(dqm, q); + if (retval) + goto out_unlock; +@@ -1199,6 +1247,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; ++ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) ++ dqm->xgmi_sdma_queue_count++; + /* + * Unconditionally increment this counter, regardless of the queue's + * type or whether the queue is active. +@@ -1214,8 +1264,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + out_deallocate_doorbell: + deallocate_doorbell(qpd, q); + out_deallocate_sdma_queue: +- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) +- deallocate_sdma_queue(dqm, q->sdma_id); ++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA || ++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) ++ deallocate_sdma_queue(dqm, q); + out_unlock: + dqm_unlock(dqm); + +@@ -1250,7 +1301,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm) + { + int i, retval = 0; + +- for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) { ++ for (i = 0; i < dqm->dev->device_info->num_sdma_engines + ++ dqm->dev->device_info->num_xgmi_sdma_engines; i++) { + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); + if (retval) +@@ -1292,10 +1344,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, + if (!dqm->active_runlist) + return retval; + +- pr_debug("Before destroying queues, sdma queue count is : %u\n", +- dqm->sdma_queue_count); ++ pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n", ++ dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count); + +- if (dqm->sdma_queue_count > 0) ++ if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count) + unmap_sdma_queues(dqm); + + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, +@@ -1367,7 +1419,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + dqm->sdma_queue_count--; +- deallocate_sdma_queue(dqm, q->sdma_id); ++ deallocate_sdma_queue(dqm, q); ++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { ++ dqm->xgmi_sdma_queue_count--; ++ deallocate_sdma_queue(dqm, q); + } + + list_del(&q->list); +@@ -1583,7 +1638,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + dqm->sdma_queue_count--; +- deallocate_sdma_queue(dqm, q->sdma_id); ++ deallocate_sdma_queue(dqm, q); ++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { ++ dqm->xgmi_sdma_queue_count--; ++ deallocate_sdma_queue(dqm, q); + } + + if (q->properties.is_active) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +index bc8af25511de..01f8249cb2ed 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +@@ -181,10 +181,12 @@ struct device_queue_manager { + unsigned int processes_count; + unsigned int queue_count; + unsigned int sdma_queue_count; ++ unsigned int xgmi_sdma_queue_count; + unsigned int total_queue_count; + unsigned int next_pipe_to_allocate; + unsigned int *allocated_queues; + uint64_t sdma_bitmap; ++ uint64_t xgmi_sdma_bitmap; + unsigned int vmid_bitmap; + uint64_t pipelines_addr; + struct kfd_mem_obj *pipeline_mem; +@@ -217,6 +219,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm); + unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); + unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); ++unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm); + bool check_if_queues_active(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + int reserve_debug_trap_vmid(struct device_queue_manager *dqm); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +index afb13f0633cc..989900449876 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +@@ -181,6 +181,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: ++ case KFD_QUEUE_TYPE_SDMA_XGMI: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ +@@ -227,6 +228,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: ++ case KFD_QUEUE_TYPE_SDMA_XGMI: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +index bf20c6d32ef3..3cdb19826927 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +@@ -212,6 +212,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: ++ case KFD_QUEUE_TYPE_SDMA_XGMI: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ +@@ -258,6 +259,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: ++ case KFD_QUEUE_TYPE_SDMA_XGMI: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +index 045a229436a0..077c47fd4fee 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + + process_count = pm->dqm->processes_count; + queue_count = pm->dqm->queue_count; +- compute_queue_count = queue_count - pm->dqm->sdma_queue_count; ++ compute_queue_count = queue_count - pm->dqm->sdma_queue_count - ++ pm->dqm->xgmi_sdma_queue_count; + + /* check if there is over subscription + * Note: the arbitration between the number of VMIDs and +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 195e58d6e059..ddee4878418c 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -233,6 +233,7 @@ struct kfd_device_info { + bool needs_iommu_device; + bool needs_pci_atomics; + unsigned int num_sdma_engines; ++ unsigned int num_xgmi_sdma_engines; + unsigned int num_sdma_queues_per_engine; + }; + +@@ -418,7 +419,8 @@ enum kfd_queue_type { + KFD_QUEUE_TYPE_COMPUTE, + KFD_QUEUE_TYPE_SDMA, + KFD_QUEUE_TYPE_HIQ, +- KFD_QUEUE_TYPE_DIQ ++ KFD_QUEUE_TYPE_DIQ, ++ KFD_QUEUE_TYPE_SDMA_XGMI + }; + + enum kfd_queue_format { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +index fea67dc0569b..e652e25ede75 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +@@ -186,8 +186,12 @@ int pqm_create_queue(struct process_queue_manager *pqm, + + switch (type) { + case KFD_QUEUE_TYPE_SDMA: +- if (dev->dqm->sdma_queue_count +- >= get_num_sdma_queues(dev->dqm)) { ++ case KFD_QUEUE_TYPE_SDMA_XGMI: ++ if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count ++ >= get_num_sdma_queues(dev->dqm)) || ++ (type == KFD_QUEUE_TYPE_SDMA_XGMI && ++ dev->dqm->xgmi_sdma_queue_count ++ >= get_num_xgmi_sdma_queues(dev->dqm))) { + pr_debug("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; +@@ -447,6 +451,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) + q = pqn->q; + switch (q->properties.type) { + case KFD_QUEUE_TYPE_SDMA: ++ case KFD_QUEUE_TYPE_SDMA_XGMI: + seq_printf(m, " SDMA queue on device %x\n", + q->device->id); + mqd_type = KFD_MQD_TYPE_SDMA; +diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h +index d8fca2c27366..feeb887ceaac 100644 +--- a/include/uapi/linux/kfd_ioctl.h ++++ b/include/uapi/linux/kfd_ioctl.h +@@ -38,6 +38,7 @@ struct kfd_ioctl_get_version_args { + #define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 + #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 + #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 ++#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 + + #define KFD_MAX_QUEUE_PERCENTAGE 100 + #define KFD_MAX_QUEUE_PRIORITY 15 +-- +2.17.1 + |