aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/5713-drm-amdkfd-Generate-xGMI-direct-iolink.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/5713-drm-amdkfd-Generate-xGMI-direct-iolink.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/5713-drm-amdkfd-Generate-xGMI-direct-iolink.patch173
1 files changed, 173 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/5713-drm-amdkfd-Generate-xGMI-direct-iolink.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/5713-drm-amdkfd-Generate-xGMI-direct-iolink.patch
new file mode 100644
index 00000000..3ea456e9
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/5713-drm-amdkfd-Generate-xGMI-direct-iolink.patch
@@ -0,0 +1,173 @@
+From 7a3e542dd28b87db46b2812cd3d9ee6fbee04077 Mon Sep 17 00:00:00 2001
+From: Shaoyun Liu <Shaoyun.Liu@amd.com>
+Date: Mon, 13 Aug 2018 14:04:11 -0400
+Subject: [PATCH 5713/5725] drm/amdkfd: Generate xGMI direct iolink
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Generate xGMI iolink for upper level usage
+
+Change-Id: I37bc29fee45cb10d1da849956055c59d823f6f5d
+Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 78 ++++++++++++++++++++++++++++++-----
+ 1 file changed, 68 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+index c540b65..1655e8b 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -346,7 +346,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+ struct list_head *device_list)
+ {
+ struct kfd_iolink_properties *props = NULL, *props2;
+- struct kfd_topology_device *dev, *cpu_dev;
++ struct kfd_topology_device *dev, *to_dev;
+ uint32_t id_from;
+ uint32_t id_to;
+
+@@ -369,6 +369,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+
+ if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
+ props->weight = 20;
++ else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
++ props->weight = 15;
+ else
+ props->weight = node_distance(id_from, id_to);
+
+@@ -390,19 +392,22 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+ * links are not built at that time. If a PCIe type is discovered, it
+ * means a GPU is detected and we are adding GPU->CPU to the topology.
+ * At this time, also add the corresponded CPU->GPU link.
++ * For xGMI, we only added the link with one direction in the crat
++ * table, add corresponded reversed direction link now.
+ */
+- if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) {
+- cpu_dev = kfd_topology_device_by_proximity_domain(id_to);
+- if (!cpu_dev)
++ if (props && (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
++ props->iolink_type == CRAT_IOLINK_TYPE_XGMI)) {
++ to_dev = kfd_topology_device_by_proximity_domain(id_to);
++ if (!to_dev)
+ return -ENODEV;
+ /* same everything but the other direction */
+ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+ props2->node_from = id_to;
+ props2->node_to = id_from;
+ props2->kobj = NULL;
+- cpu_dev->io_link_count++;
+- cpu_dev->node_props.io_links_count++;
+- list_add_tail(&props2->list, &cpu_dev->io_link_props);
++ to_dev->io_link_count++;
++ to_dev->node_props.io_links_count++;
++ list_add_tail(&props2->list, &to_dev->io_link_props);
+ }
+
+ return 0;
+@@ -1056,7 +1061,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
+ *
+ * Return 0 if successful else return -ve value
+ */
+-static int kfd_fill_gpu_direct_io_link(int *avail_size,
++static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
+ struct kfd_dev *kdev,
+ struct crat_subtype_iolink *sub_type_hdr,
+ uint32_t proximity_domain)
+@@ -1088,6 +1093,28 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
+ return 0;
+ }
+
++static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
++ struct kfd_dev *kdev,
++ struct crat_subtype_iolink *sub_type_hdr,
++ uint32_t proximity_domain_from,
++ uint32_t proximity_domain_to)
++{
++ *avail_size -= sizeof(struct crat_subtype_iolink);
++ if (*avail_size < 0)
++ return -ENOMEM;
++
++ memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
++
++ sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
++ sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
++ sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
++
++ sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
++ sub_type_hdr->proximity_domain_from = proximity_domain_from;
++ sub_type_hdr->proximity_domain_to = proximity_domain_to;
++ return 0;
++}
++
+ /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
+ *
+ * @pcrat_image: Fill in VCRAT for GPU
+@@ -1100,14 +1127,16 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ {
+ struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+ struct crat_subtype_generic *sub_type_hdr;
++ struct kfd_local_mem_info local_mem_info;
++ struct kfd_topology_device *peer_dev;
+ struct crat_subtype_computeunit *cu;
+ struct kfd_cu_info cu_info;
+ int avail_size = *size;
+ uint32_t total_num_of_cu;
+ int num_of_cache_entries = 0;
+ int cache_mem_filled = 0;
++ uint32_t nid = 0;
+ int ret = 0;
+- struct kfd_local_mem_info local_mem_info;
+
+ if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
+ return -EINVAL;
+@@ -1231,7 +1260,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ */
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ cache_mem_filled);
+- ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev,
++ ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
+ (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
+
+ if (ret < 0)
+@@ -1240,6 +1269,35 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+
++
++ /* Fill in Subtype: IO_LINKS
++ * Direct links from GPU to other GPUs through xGMI.
++ * We will loop GPUs that already be processed (with lower value
++ * of proximity_domain), add the link for the GPUs with same
++ * hive id (from this GPU to other GPU) . The reversed iolink
++ * (from other GPU to this GPU) will be added
++ * in kfd_parse_subtype_iolink.
++ */
++ if (kdev->hive_id) {
++ for (nid = 0; nid < proximity_domain; ++nid) {
++ peer_dev = kfd_topology_device_by_proximity_domain(nid);
++ if (!peer_dev->gpu)
++ continue;
++ if (peer_dev->gpu->hive_id != kdev->hive_id)
++ continue;
++ sub_type_hdr = (typeof(sub_type_hdr))(
++ (char *)sub_type_hdr +
++ sizeof(struct crat_subtype_iolink));
++ ret = kfd_fill_gpu_xgmi_link_to_gpu(
++ &avail_size, kdev,
++ (struct crat_subtype_iolink *)sub_type_hdr,
++ proximity_domain, nid);
++ if (ret < 0)
++ return ret;
++ crat_table->length += sub_type_hdr->length;
++ crat_table->total_entries++;
++ }
++ }
+ *size = crat_table->length;
+ pr_info("Virtual CRAT table created for GPU\n");
+
+--
+2.7.4
+