diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/1930-drm-amdgpu-add-badpages-sysfs-interafce.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/1930-drm-amdgpu-add-badpages-sysfs-interafce.patch | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/1930-drm-amdgpu-add-badpages-sysfs-interafce.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/1930-drm-amdgpu-add-badpages-sysfs-interafce.patch new file mode 100644 index 00000000..cdb96060 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/1930-drm-amdgpu-add-badpages-sysfs-interafce.patch @@ -0,0 +1,247 @@ +From 837f6474ff63321a06124f86904a5805677cc599 Mon Sep 17 00:00:00 2001 +From: xinhui pan <xinhui.pan@amd.com> +Date: Tue, 7 May 2019 11:53:31 +0800 +Subject: [PATCH 1930/2940] drm/amdgpu: add badpages sysfs interafce + +add badpages node. +it will output badpages list in format +gpu pfn : gpu page size : flags + +example +0x00000000 : 0x00001000 : R +0x00000001 : 0x00001000 : R +0x00000002 : 0x00001000 : R +0x00000003 : 0x00001000 : R +0x00000004 : 0x00001000 : R +0x00000005 : 0x00001000 : R +0x00000006 : 0x00001000 : R +0x00000007 : 0x00001000 : P +0x00000008 : 0x00001000 : P +0x00000009 : 0x00001000 : P + +flags can be one of below characters +R: reserved. +P: pending for reserve. +F: failed to reserve for some reasons. + +Signed-off-by: xinhui pan <xinhui.pan@amd.com> +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 146 ++++++++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + + 2 files changed, 147 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +index 37cb3de08494..49c71cfc7fc6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +@@ -90,6 +90,12 @@ struct ras_manager { + struct ras_err_data err_data; + }; + ++struct ras_badpage { ++ unsigned int bp; ++ unsigned int size; ++ unsigned int flags; ++}; ++ + const char *ras_error_string[] = { + "none", + "parity", +@@ -710,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + + /* sysfs begin */ + ++static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, ++ struct ras_badpage **bps, unsigned int *count); ++ ++static char *amdgpu_ras_badpage_flags_str(unsigned int flags) ++{ ++ switch (flags) { ++ case 0: ++ return "R"; ++ case 1: ++ return "P"; ++ case 2: ++ default: ++ return "F"; ++ }; ++} ++ ++/* ++ * DOC: ras sysfs gpu_vram_bad_pages interface ++ * ++ * It allows user to read the bad pages of vram on the gpu through ++ * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages ++ * ++ * It outputs multiple lines, and each line stands for one gpu page. ++ * ++ * The format of one line is below, ++ * gpu pfn : gpu page size : flags ++ * ++ * gpu pfn and gpu page size are printed in hex format. ++ * flags can be one of below character, ++ * R: reserved, this gpu page is reserved and not able to use. ++ * P: pending for reserve, this gpu page is marked as bad, will be reserved ++ * in next window of page_reserve. ++ * F: unable to reserve. this gpu page can't be reserved due to some reasons. ++ * ++ * examples: ++ * 0x00000001 : 0x00001000 : R ++ * 0x00000002 : 0x00001000 : P ++ */ ++ ++static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, ++ struct kobject *kobj, struct bin_attribute *attr, ++ char *buf, loff_t ppos, size_t count) ++{ ++ struct amdgpu_ras *con = ++ container_of(attr, struct amdgpu_ras, badpages_attr); ++ struct amdgpu_device *adev = con->adev; ++ const unsigned int element_size = ++ sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; ++ unsigned int start = (ppos + element_size - 1) / element_size; ++ unsigned int end = (ppos + count - 1) / element_size; ++ ssize_t s = 0; ++ struct ras_badpage *bps = NULL; ++ unsigned int bps_count = 0; ++ ++ memset(buf, 0, count); ++ ++ if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) ++ return 0; ++ ++ for (; start < end && start < bps_count; start++) ++ s += scnprintf(&buf[s], element_size + 1, ++ "0x%08x : 0x%08x : %1s\n", ++ bps[start].bp, ++ bps[start].size, ++ amdgpu_ras_badpage_flags_str(bps[start].flags)); ++ ++ kfree(bps); ++ ++ return s; ++} ++ + static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, + struct device_attribute *attr, char *buf) + { +@@ -750,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) + &con->features_attr.attr, + NULL + }; ++ struct bin_attribute *bin_attrs[] = { ++ &con->badpages_attr, ++ NULL ++ }; + struct attribute_group group = { + .name = "ras", + .attrs = attrs, ++ .bin_attrs = bin_attrs, + }; + + con->features_attr = (struct device_attribute) { +@@ -762,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) + }, + .show = amdgpu_ras_sysfs_features_read, + }; ++ ++ con->badpages_attr = (struct bin_attribute) { ++ .attr = { ++ .name = "gpu_vram_bad_pages", ++ .mode = S_IRUGO, ++ }, ++ .size = 0, ++ .private = NULL, ++ .read = amdgpu_ras_sysfs_badpages_read, ++ }; ++ + sysfs_attr_init(attrs[0]); ++ sysfs_bin_attr_init(bin_attrs[0]); + + return sysfs_create_group(&adev->dev->kobj, &group); + } +@@ -774,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) + &con->features_attr.attr, + NULL + }; ++ struct bin_attribute *bin_attrs[] = { ++ &con->badpages_attr, ++ NULL ++ }; + struct attribute_group group = { + .name = "ras", + .attrs = attrs, ++ .bin_attrs = bin_attrs, + }; + + sysfs_remove_group(&adev->dev->kobj, &group); +@@ -1108,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) + /* ih end */ + + /* recovery begin */ ++ ++/* return 0 on success. ++ * caller need free bps. ++ */ ++static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, ++ struct ras_badpage **bps, unsigned int *count) ++{ ++ struct amdgpu_ras *con = amdgpu_ras_get_context(adev); ++ struct ras_err_handler_data *data; ++ int i = 0; ++ int ret = 0; ++ ++ if (!con || !con->eh_data || !bps || !count) ++ return -EINVAL; ++ ++ mutex_lock(&con->recovery_lock); ++ data = con->eh_data; ++ if (!data || data->count == 0) { ++ *bps = NULL; ++ goto out; ++ } ++ ++ *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); ++ if (!*bps) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ for (; i < data->count; i++) { ++ (*bps)[i] = (struct ras_badpage){ ++ .bp = data->bps[i].bp, ++ .size = AMDGPU_GPU_PAGE_SIZE, ++ .flags = 0, ++ }; ++ ++ if (data->last_reserved <= i) ++ (*bps)[i].flags = 1; ++ else if (data->bps[i].bo == NULL) ++ (*bps)[i].flags = 2; ++ } ++ ++ *count = data->count; ++out: ++ mutex_unlock(&con->recovery_lock); ++ return ret; ++} ++ + static void amdgpu_ras_do_recovery(struct work_struct *work) + { + struct amdgpu_ras *ras = +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +index 06ef325b61b8..59994ee00855 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +@@ -93,6 +93,7 @@ struct amdgpu_ras { + struct dentry *ent; + /* sysfs */ + struct device_attribute features_attr; ++ struct bin_attribute badpages_attr; + /* block array */ + struct ras_manager *objs; + +-- +2.17.1 + |