diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1327-drm-amdkfd-Improve-topology-error-handling.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1327-drm-amdkfd-Improve-topology-error-handling.patch | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1327-drm-amdkfd-Improve-topology-error-handling.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1327-drm-amdkfd-Improve-topology-error-handling.patch new file mode 100644 index 00000000..f8b80792 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1327-drm-amdkfd-Improve-topology-error-handling.patch @@ -0,0 +1,314 @@ +From 14720c8664f70436d794830367fcd98f5415958e Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Mon, 31 Jul 2017 17:26:06 -0400 +Subject: [PATCH 1327/4131] drm/amdkfd: Improve topology error handling + +* Properly handle and report errors both when creating and parsing + CRAT tables +* Release temporary device lists after errors +* Update sys_props.num_devices not when devices are created but when + they are added to the global topology device list +* Refactor code to avoid duplication between + kfd_release_topology_device_list and kfd_release_live_view +* Make kfd_release_live_view static and move locking out to its caller + +Change-Id: If190c88b7f41d2e12b582e7555f27713225728d2 +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 26 ++++++---- + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 80 +++++++++++++++---------------- + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 +- + 3 files changed, 57 insertions(+), 51 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +index ac7eaac..5f597a6 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +@@ -420,7 +420,7 @@ int kfd_parse_crat_table(void *crat_image, + struct kfd_topology_device *top_dev = NULL; + struct crat_subtype_generic *sub_type_hdr; + uint16_t node_id; +- int ret; ++ int ret = 0; + struct crat_header *crat_table = (struct crat_header *)crat_image; + uint16_t num_nodes; + uint32_t image_len; +@@ -429,8 +429,10 @@ int kfd_parse_crat_table(void *crat_image, + if (!crat_image) + return -EINVAL; + +- if (!list_empty(device_list)) ++ if (!list_empty(device_list)) { + pr_warn("Error device list should be empty\n"); ++ return -EINVAL; ++ } + + num_nodes = crat_table->num_domains; + image_len = crat_table->length; +@@ -444,8 +446,10 @@ int kfd_parse_crat_table(void *crat_image, + top_dev->proximity_domain = proximity_domain++; + } + +- if (!top_dev) +- return -ENOMEM; ++ if (!top_dev) { ++ ret = -ENOMEM; ++ goto err; ++ } + + memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); + memcpy(top_dev->oem_table_id, crat_table->oem_table_id, +@@ -475,7 +479,7 @@ int kfd_parse_crat_table(void *crat_image, + if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { + ret = kfd_parse_subtype(sub_type_hdr, device_list); + if (ret != 0) +- return ret; ++ break; + } + + last_header_type = sub_type_hdr->type; +@@ -484,7 +488,11 @@ int kfd_parse_crat_table(void *crat_image, + sub_type_hdr->length); + } + +- return 0; ++err: ++ if (ret) ++ kfd_release_topology_device_list(device_list); ++ ++ return ret; + } + + /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +@@ -1240,7 +1248,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + int flags, struct kfd_dev *kdev, uint32_t proximity_domain) + { +- void *pcrat_image; ++ void *pcrat_image = NULL; + int ret = 0; + + if (!crat_image) +@@ -1280,8 +1288,10 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + ret = -EINVAL; + } + +- if (ret == 0) ++ if (!ret) + *crat_image = pcrat_image; ++ else ++ kfree(pcrat_image); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +index 89876bb..b2dde30 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +@@ -159,21 +159,22 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) + #endif + + kfree(dev); +- +- sys_props.num_devices--; + } + +-void kfd_release_live_view(void) ++void kfd_release_topology_device_list(struct list_head *device_list) + { + struct kfd_topology_device *dev; + +- down_write(&topology_lock); +- while (topology_device_list.next != &topology_device_list) { +- dev = container_of(topology_device_list.next, +- struct kfd_topology_device, list); ++ while (!list_empty(device_list)) { ++ dev = list_first_entry(device_list, ++ struct kfd_topology_device, list); + kfd_release_topology_device(dev); + } +- up_write(&topology_lock); ++} ++ ++static void kfd_release_live_view(void) ++{ ++ kfd_release_topology_device_list(&topology_device_list); + memset(&sys_props, 0, sizeof(sys_props)); + } + +@@ -196,7 +197,6 @@ struct kfd_topology_device *kfd_create_topology_device( + #endif + + list_add_tail(&dev->list, device_list); +- sys_props.num_devices++; + + return dev; + } +@@ -872,16 +872,13 @@ static void kfd_topology_release_sysfs(void) + } + + /* Called with write topology_lock acquired */ +-static int kfd_topology_update_device_list(struct list_head *temp_list, ++static void kfd_topology_update_device_list(struct list_head *temp_list, + struct list_head *master_list) + { +- int num = 0; +- + while (!list_empty(temp_list)) { + list_move_tail(temp_list->next, master_list); +- num++; ++ sys_props.num_devices++; + } +- return num; + } + + static void kfd_debug_print_topology(void) +@@ -1006,14 +1003,6 @@ static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) + pr_info("Ignoring ACPI CRAT on non-APU system\n"); + return true; + } +- +-static void kfd_delete_topology_device_list(struct list_head *device_list) +-{ +- struct kfd_topology_device *dev, *tmp; +- +- list_for_each_entry_safe(dev, tmp, device_list, list) +- kfd_release_topology_device(dev); +-} + #endif + + int kfd_topology_init(void) +@@ -1025,7 +1014,6 @@ int kfd_topology_init(void) + int cpu_only_node = 0; + struct kfd_topology_device *kdev; + int proximity_domain; +- int num_nodes; + + /* topology_device_list - Master list of all topology devices + * temp_topology_device_list - temporary list created while parsing CRAT +@@ -1062,9 +1050,8 @@ int kfd_topology_init(void) + if (ret || + kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { + +- kfd_delete_topology_device_list( ++ kfd_release_topology_device_list( + &temp_topology_device_list); +- INIT_LIST_HEAD(&temp_topology_device_list); + kfd_destroy_crat_image(crat_image); + crat_image = NULL; + } +@@ -1075,13 +1062,16 @@ int kfd_topology_init(void) + COMPUTE_UNIT_CPU, NULL, + proximity_domain); + cpu_only_node = 1; ++ if (ret) { ++ pr_err("Error creating VCRAT table for CPU\n"); ++ return ret; ++ } + +- if (ret == 0) +- ret = kfd_parse_crat_table(crat_image, ++ ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); +- else { +- pr_err("Error getting/creating CRAT table\n"); ++ if (ret) { ++ pr_err("Error parsing VCRAT table for CPU\n"); + goto err; + } + } +@@ -1093,9 +1083,9 @@ int kfd_topology_init(void) + #endif + + down_write(&topology_lock); +- num_nodes = kfd_topology_update_device_list(&temp_topology_device_list, +- &topology_device_list); +- atomic_set(&topology_crat_proximity_domain, num_nodes-1); ++ kfd_topology_update_device_list(&temp_topology_device_list, ++ &topology_device_list); ++ atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + ret = kfd_topology_update_sysfs(); + up_write(&topology_lock); + +@@ -1128,8 +1118,8 @@ void kfd_topology_shutdown(void) + { + down_write(&topology_lock); + kfd_topology_release_sysfs(); +- up_write(&topology_lock); + kfd_release_live_view(); ++ up_write(&topology_lock); + } + + static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) +@@ -1269,11 +1259,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + res = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_GPU, + gpu, proximity_domain); +- if (res == 0) +- res = kfd_parse_crat_table(crat_image, ++ if (res) { ++ pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", ++ gpu_id); ++ return res; ++ } ++ res = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, proximity_domain); +- else { +- pr_err("Error in VCRAT for GPU (ID: 0x%x)\n", gpu_id); ++ if (res) { ++ pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", ++ gpu_id); + goto err; + } + +@@ -1368,27 +1363,28 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + } + + kfd_debug_print_topology(); +-err: +- if (res == 0) +- kfd_notify_gpu_change(gpu_id, 1); + ++ if (!res) ++ kfd_notify_gpu_change(gpu_id, 1); ++err: + kfd_destroy_crat_image(crat_image); + return res; + } + + int kfd_topology_remove_device(struct kfd_dev *gpu) + { +- struct kfd_topology_device *dev; ++ struct kfd_topology_device *dev, *tmp; + uint32_t gpu_id; + int res = -ENODEV; + + down_write(&topology_lock); + +- list_for_each_entry(dev, &topology_device_list, list) ++ list_for_each_entry_safe(dev, tmp, &topology_device_list, list) + if (dev->gpu == gpu) { + gpu_id = dev->gpu_id; + kfd_remove_sysfs_node_entry(dev); + kfd_release_topology_device(dev); ++ sys_props.num_devices--; + res = 0; + if (kfd_topology_update_sysfs() < 0) + kfd_topology_release_sysfs(); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +index 44f0fc7..b59b32c 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +@@ -193,7 +193,7 @@ struct kfd_system_properties { + + struct kfd_topology_device *kfd_create_topology_device( + struct list_head *device_list); +-void kfd_release_live_view(void); ++void kfd_release_topology_device_list(struct list_head *device_list); + + #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + extern bool amd_iommu_pc_supported(void); +-- +2.7.4 + |