aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch603
1 files changed, 603 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch
new file mode 100644
index 00000000..ec5a9d7a
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2749-drm-amdkfd-Introduce-XGMI-SDMA-queue-type.patch
@@ -0,0 +1,603 @@
+From 6308e3ee33ebb207403bc2ff7df9f89eb6becb1e Mon Sep 17 00:00:00 2001
+From: Oak Zeng <Oak.Zeng@amd.com>
+Date: Thu, 7 Feb 2019 14:02:27 -0600
+Subject: [PATCH 2749/2940] drm/amdkfd: Introduce XGMI SDMA queue type
+
+Existing QUEUE_TYPE_SDMA means PCIe optimized SDMA queues.
+Introduce a new QUEUE_TYPE_SDMA_XGMI, which is optimized
+for non-PCIe transfer such as XGMI.
+
+Change-Id: I5fd53c4e243eb10e4ecc965de9f8da6a87acd2e8
+Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 15 +++
+ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 124 +++++++++++++-----
+ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +
+ .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 2 +
+ .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 +
+ .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +-
+ .../amd/amdkfd/kfd_process_queue_manager.c | 9 +-
+ include/uapi/linux/kfd_ioctl.h | 1 +
+ 10 files changed, 128 insertions(+), 37 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index a34c8cea1fb7..684f84f130a8 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -235,6 +235,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
+ q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
+ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
+ q_properties->type = KFD_QUEUE_TYPE_SDMA;
++ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
++ q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
+ else
+ return -ENOTSUPP;
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 1350de675611..6b2e019b50f5 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
+ .needs_iommu_device = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
+ .needs_iommu_device = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
+ .needs_iommu_device = true,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 1,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+ #endif
+@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -218,6 +228,7 @@ static const struct kfd_device_info polaris12_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -234,6 +245,7 @@ static const struct kfd_device_info vega10_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -250,6 +262,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -266,6 +279,7 @@ static const struct kfd_device_info vega12_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+ };
+
+@@ -282,6 +296,7 @@ static const struct kfd_device_info vega20_device_info = {
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
++ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
+ };
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 805bb0f0e591..4aa959bf81eb 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+
+ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+- unsigned int sdma_queue_id);
++ struct queue *q);
+
+ static void kfd_process_hw_exception(struct work_struct *work);
+
+ static inline
+ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
+ {
+- if (type == KFD_QUEUE_TYPE_SDMA)
++ if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ return KFD_MQD_TYPE_SDMA;
+ return KFD_MQD_TYPE_CP;
+ }
+@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
+ return dqm->dev->device_info->num_sdma_engines;
+ }
+
++static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
++{
++ return dqm->dev->device_info->num_xgmi_sdma_engines;
++}
++
+ unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
+ {
+ return dqm->dev->device_info->num_sdma_engines
+ * dqm->dev->device_info->num_sdma_queues_per_engine;
+ }
+
++unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
++{
++ return dqm->dev->device_info->num_xgmi_sdma_engines
++ * dqm->dev->device_info->num_sdma_queues_per_engine;
++}
++
+ void program_sh_mem_settings(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+ {
+@@ -158,7 +169,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+ * preserve the user mode ABI.
+ */
+ q->doorbell_id = q->properties.queue_id;
+- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ /* For SDMA queues on SOC15 with 8-byte doorbell, use static
+ * doorbell assignments based on the engine and queue id.
+ * The doobell index distance between RLC (2*i) and (2*i+1)
+@@ -199,7 +211,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
+ struct kfd_dev *dev = qpd->dqm->dev;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+- q->properties.type == KFD_QUEUE_TYPE_SDMA)
++ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ return;
+
+ old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
+@@ -314,7 +327,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
+
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+ retval = create_compute_queue_nocpsch(dqm, q, qpd);
+- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
++ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ retval = create_sdma_queue_nocpsch(dqm, q, qpd);
+ else
+ retval = -EINVAL;
+@@ -332,6 +346,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count++;
++ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
++ dqm->xgmi_sdma_queue_count++;
+
+ /*
+ * Unconditionally increment this counter, regardless of the queue's
+@@ -455,7 +471,10 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
+ deallocate_hqd(dqm, q);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ dqm->sdma_queue_count--;
+- deallocate_sdma_queue(dqm, q->sdma_id);
++ deallocate_sdma_queue(dqm, q);
++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
++ dqm->xgmi_sdma_queue_count--;
++ deallocate_sdma_queue(dqm, q);
+ } else {
+ pr_debug("q->properties.type %d is invalid\n",
+ q->properties.type);
+@@ -546,7 +565,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+ }
+ } else if (prev_active &&
+ (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+- q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
++ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+ KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
+@@ -573,7 +593,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+ retval = map_queues_cpsch(dqm);
+ else if (q->properties.is_active &&
+ (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+- q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
++ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ if (WARN(q->process->mm != current->mm,
+ "should only run in user thread"))
+ retval = -EFAULT;
+@@ -865,6 +886,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
+ INIT_LIST_HEAD(&dqm->queues);
+ dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+ dqm->sdma_queue_count = 0;
++ dqm->xgmi_sdma_queue_count = 0;
+ dqm->trap_debug_vmid = 0;
+
+ for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+@@ -878,6 +900,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
+
+ dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
+ dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
++ dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
+
+ return 0;
+ }
+@@ -912,18 +935,34 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
+ {
+ int bit;
+
+- if (dqm->sdma_bitmap == 0)
+- return -ENOMEM;
+-
+- bit = __ffs64(dqm->sdma_bitmap);
+-
+- dqm->sdma_bitmap &= ~(1ULL << bit);
+- q->sdma_id = bit;
+-
+- q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
+- q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
++ if (dqm->sdma_bitmap == 0)
++ return -ENOMEM;
++ bit = __ffs64(dqm->sdma_bitmap);
++ dqm->sdma_bitmap &= ~(1ULL << bit);
++ q->sdma_id = bit;
++ q->properties.sdma_engine_id = q->sdma_id %
++ get_num_sdma_engines(dqm);
++ q->properties.sdma_queue_id = q->sdma_id /
++ get_num_sdma_engines(dqm);
++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
++ if (dqm->xgmi_sdma_bitmap == 0)
++ return -ENOMEM;
++ bit = __ffs64(dqm->xgmi_sdma_bitmap);
++ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
++ q->sdma_id = bit;
++ /* sdma_engine_id is sdma id including
++ * both PCIe-optimized SDMAs and XGMI-
++ * optimized SDMAs. The calculation below
++ * assumes the first N engines are always
++ * PCIe-optimized ones
++ */
++ q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
++ q->sdma_id % get_num_xgmi_sdma_engines(dqm);
++ q->properties.sdma_queue_id = q->sdma_id /
++ get_num_xgmi_sdma_engines(dqm);
++ }
+
+- pr_debug("SDMA id is: %d\n", q->sdma_id);
+ pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
+ pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
+
+@@ -931,11 +970,17 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
+ }
+
+ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+- unsigned int sdma_id)
++ struct queue *q)
+ {
+- if (sdma_id >= get_num_sdma_queues(dqm))
+- return;
+- dqm->sdma_bitmap |= (1ULL << sdma_id);
++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
++ if (q->sdma_id >= get_num_sdma_queues(dqm))
++ return;
++ dqm->sdma_bitmap |= (1ULL << q->sdma_id);
++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
++ if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
++ return;
++ dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
++ }
+ }
+
+ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+@@ -973,7 +1018,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+ out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
+ out_deallocate_sdma_queue:
+- deallocate_sdma_queue(dqm, q->sdma_id);
++ deallocate_sdma_queue(dqm, q);
+
+ return retval;
+ }
+@@ -1031,8 +1076,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
+ INIT_LIST_HEAD(&dqm->queues);
+ dqm->queue_count = dqm->processes_count = 0;
+ dqm->sdma_queue_count = 0;
++ dqm->xgmi_sdma_queue_count = 0;
+ dqm->active_runlist = false;
+ dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
++ dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
+ dqm->trap_debug_vmid = 0;
+
+ INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
+@@ -1159,7 +1206,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ goto out_unlock;
+ }
+
+- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ retval = allocate_sdma_queue(dqm, q);
+ if (retval)
+ goto out_unlock;
+@@ -1199,6 +1247,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count++;
++ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
++ dqm->xgmi_sdma_queue_count++;
+ /*
+ * Unconditionally increment this counter, regardless of the queue's
+ * type or whether the queue is active.
+@@ -1214,8 +1264,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
+ out_deallocate_sdma_queue:
+- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+- deallocate_sdma_queue(dqm, q->sdma_id);
++ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
++ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
++ deallocate_sdma_queue(dqm, q);
+ out_unlock:
+ dqm_unlock(dqm);
+
+@@ -1250,7 +1301,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
+ {
+ int i, retval = 0;
+
+- for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) {
++ for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
++ dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
+ retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
+ if (retval)
+@@ -1292,10 +1344,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
+ if (!dqm->active_runlist)
+ return retval;
+
+- pr_debug("Before destroying queues, sdma queue count is : %u\n",
+- dqm->sdma_queue_count);
++ pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
++ dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
+
+- if (dqm->sdma_queue_count > 0)
++ if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
+ unmap_sdma_queues(dqm);
+
+ retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
+@@ -1367,7 +1419,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ dqm->sdma_queue_count--;
+- deallocate_sdma_queue(dqm, q->sdma_id);
++ deallocate_sdma_queue(dqm, q);
++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
++ dqm->xgmi_sdma_queue_count--;
++ deallocate_sdma_queue(dqm, q);
+ }
+
+ list_del(&q->list);
+@@ -1583,7 +1638,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ dqm->sdma_queue_count--;
+- deallocate_sdma_queue(dqm, q->sdma_id);
++ deallocate_sdma_queue(dqm, q);
++ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
++ dqm->xgmi_sdma_queue_count--;
++ deallocate_sdma_queue(dqm, q);
+ }
+
+ if (q->properties.is_active)
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+index bc8af25511de..01f8249cb2ed 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+@@ -181,10 +181,12 @@ struct device_queue_manager {
+ unsigned int processes_count;
+ unsigned int queue_count;
+ unsigned int sdma_queue_count;
++ unsigned int xgmi_sdma_queue_count;
+ unsigned int total_queue_count;
+ unsigned int next_pipe_to_allocate;
+ unsigned int *allocated_queues;
+ uint64_t sdma_bitmap;
++ uint64_t xgmi_sdma_bitmap;
+ unsigned int vmid_bitmap;
+ uint64_t pipelines_addr;
+ struct kfd_mem_obj *pipeline_mem;
+@@ -217,6 +219,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
+ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
+ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
+ unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
++unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
+ bool check_if_queues_active(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+ int reserve_debug_trap_vmid(struct device_queue_manager *dqm);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+index afb13f0633cc..989900449876 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+@@ -181,6 +181,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
++ case KFD_QUEUE_TYPE_SDMA_XGMI:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+@@ -227,6 +228,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
++ case KFD_QUEUE_TYPE_SDMA_XGMI:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+index bf20c6d32ef3..3cdb19826927 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+@@ -212,6 +212,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
++ case KFD_QUEUE_TYPE_SDMA_XGMI:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+@@ -258,6 +259,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
++ case KFD_QUEUE_TYPE_SDMA_XGMI:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+index 045a229436a0..077c47fd4fee 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+
+ process_count = pm->dqm->processes_count;
+ queue_count = pm->dqm->queue_count;
+- compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
++ compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
++ pm->dqm->xgmi_sdma_queue_count;
+
+ /* check if there is over subscription
+ * Note: the arbitration between the number of VMIDs and
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 195e58d6e059..ddee4878418c 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -233,6 +233,7 @@ struct kfd_device_info {
+ bool needs_iommu_device;
+ bool needs_pci_atomics;
+ unsigned int num_sdma_engines;
++ unsigned int num_xgmi_sdma_engines;
+ unsigned int num_sdma_queues_per_engine;
+ };
+
+@@ -418,7 +419,8 @@ enum kfd_queue_type {
+ KFD_QUEUE_TYPE_COMPUTE,
+ KFD_QUEUE_TYPE_SDMA,
+ KFD_QUEUE_TYPE_HIQ,
+- KFD_QUEUE_TYPE_DIQ
++ KFD_QUEUE_TYPE_DIQ,
++ KFD_QUEUE_TYPE_SDMA_XGMI
+ };
+
+ enum kfd_queue_format {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+index fea67dc0569b..e652e25ede75 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+@@ -186,8 +186,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
+
+ switch (type) {
+ case KFD_QUEUE_TYPE_SDMA:
+- if (dev->dqm->sdma_queue_count
+- >= get_num_sdma_queues(dev->dqm)) {
++ case KFD_QUEUE_TYPE_SDMA_XGMI:
++ if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
++ >= get_num_sdma_queues(dev->dqm)) ||
++ (type == KFD_QUEUE_TYPE_SDMA_XGMI &&
++ dev->dqm->xgmi_sdma_queue_count
++ >= get_num_xgmi_sdma_queues(dev->dqm))) {
+ pr_debug("Over-subscription is not allowed for SDMA.\n");
+ retval = -EPERM;
+ goto err_create_queue;
+@@ -447,6 +451,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
+ q = pqn->q;
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_SDMA:
++ case KFD_QUEUE_TYPE_SDMA_XGMI:
+ seq_printf(m, " SDMA queue on device %x\n",
+ q->device->id);
+ mqd_type = KFD_MQD_TYPE_SDMA;
+diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
+index d8fca2c27366..feeb887ceaac 100644
+--- a/include/uapi/linux/kfd_ioctl.h
++++ b/include/uapi/linux/kfd_ioctl.h
+@@ -38,6 +38,7 @@ struct kfd_ioctl_get_version_args {
+ #define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0
+ #define KFD_IOC_QUEUE_TYPE_SDMA 0x1
+ #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
++#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
+
+ #define KFD_MAX_QUEUE_PERCENTAGE 100
+ #define KFD_MAX_QUEUE_PRIORITY 15
+--
+2.17.1
+