diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch new file mode 100644 index 00000000..6c6f64df --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch @@ -0,0 +1,186 @@ +From 769ca42dc042d1ca918ced73a89f2732b626d328 Mon Sep 17 00:00:00 2001 +From: Tao Zhou <tao.zhou1@amd.com> +Date: Tue, 13 Aug 2019 10:39:05 +0800 +Subject: [PATCH 3750/4256] drm/amdgpu: change ras bps type to eeprom table + record structure + +change bps type from retired page to eeprom table record, prepare for +saving umc error records to eeprom + +Signed-off-by: Tao Zhou <tao.zhou1@amd.com> +Reviewed-by: Guchun Chen <guchun.chen@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 59 ++++++++++++++++--------- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 11 +++-- + 2 files changed, 43 insertions(+), 27 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +index 5c2276bb8325..c6f4c01b98a8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +@@ -1203,14 +1203,14 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + + for (; i < data->count; i++) { + (*bps)[i] = (struct ras_badpage){ +- .bp = data->bps[i].bp, ++ .bp = data->bps[i].retired_page, + .size = AMDGPU_GPU_PAGE_SIZE, + .flags = 0, + }; + + if (data->last_reserved <= i) + (*bps)[i].flags = 1; +- else if (data->bps[i].bo == NULL) ++ else if (data->bps_bo[i] == NULL) + (*bps)[i].flags = 2; + } + +@@ -1304,30 +1304,40 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, + { + unsigned int old_space = data->count + data->space_left; + unsigned int new_space = old_space + pages; +- unsigned int align_space = ALIGN(new_space, 1024); +- void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); +- +- if (!tmp) ++ unsigned int align_space = ALIGN(new_space, 512); ++ void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); ++ struct amdgpu_bo **bps_bo = ++ kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); ++ ++ if (!bps || !bps_bo) { ++ kfree(bps); ++ kfree(bps_bo); + return -ENOMEM; ++ } + + if (data->bps) { +- memcpy(tmp, data->bps, ++ memcpy(bps, data->bps, + data->count * sizeof(*data->bps)); + kfree(data->bps); + } ++ if (data->bps_bo) { ++ memcpy(bps_bo, data->bps_bo, ++ data->count * sizeof(*data->bps_bo)); ++ kfree(data->bps_bo); ++ } + +- data->bps = tmp; ++ data->bps = bps; ++ data->bps_bo = bps_bo; + data->space_left += align_space - old_space; + return 0; + } + + /* it deal with vram only. */ + int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, +- unsigned long *bps, int pages) ++ struct eeprom_table_record *bps, int pages) + { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; +- int i = pages; + int ret = 0; + + if (!con || !con->eh_data || !bps || pages <= 0) +@@ -1344,10 +1354,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, + goto out; + } + +- while (i--) +- data->bps[data->count++].bp = bps[i]; +- ++ memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); ++ data->count += pages; + data->space_left -= pages; ++ + out: + mutex_unlock(&con->recovery_lock); + +@@ -1372,13 +1382,13 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) + goto out; + /* reserve vram at driver post stage. */ + for (i = data->last_reserved; i < data->count; i++) { +- bp = data->bps[i].bp; ++ bp = data->bps[i].retired_page; + + if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT, + PAGE_SIZE, &bo)) + DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp); + +- data->bps[i].bo = bo; ++ data->bps_bo[i] = bo; + data->last_reserved = i + 1; + } + out: +@@ -1403,11 +1413,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) + goto out; + + for (i = data->last_reserved - 1; i >= 0; i--) { +- bo = data->bps[i].bo; ++ bo = data->bps_bo[i]; + + amdgpu_ras_release_vram(adev, &bo); + +- data->bps[i].bo = bo; ++ data->bps_bo[i] = bo; + data->last_reserved = i; + } + out: +@@ -1423,12 +1433,19 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) + return 0; + } + ++/* ++ * read error record array in eeprom and reserve enough space for ++ * storing new bad pages ++ */ + static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) + { +- /* TODO +- * read the array to eeprom when SMU disabled. +- */ +- return 0; ++ struct eeprom_table_record *bps = NULL; ++ int ret; ++ ++ ret = amdgpu_ras_add_bad_pages(adev, bps, ++ adev->umc.max_ras_err_cnt_per_query); ++ ++ return ret; + } + + static int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +index f487038ba331..bc1d45971607 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +@@ -351,11 +351,10 @@ struct ras_err_data { + }; + + struct ras_err_handler_data { +- /* point to bad pages array */ +- struct { +- unsigned long bp; +- struct amdgpu_bo *bo; +- } *bps; ++ /* point to bad page records array */ ++ struct eeprom_table_record *bps; ++ /* point to reserved bo array */ ++ struct amdgpu_bo **bps_bo; + /* the count of entries */ + int count; + /* the space can place new entries */ +@@ -492,7 +491,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, + + /* error handling functions */ + int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, +- unsigned long *bps, int pages); ++ struct eeprom_table_record *bps, int pages); + + int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); + +-- +2.17.1 + |