aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch186
1 files changed, 186 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch
new file mode 100644
index 00000000..6c6f64df
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3750-drm-amdgpu-change-ras-bps-type-to-eeprom-table-recor.patch
@@ -0,0 +1,186 @@
+From 769ca42dc042d1ca918ced73a89f2732b626d328 Mon Sep 17 00:00:00 2001
+From: Tao Zhou <tao.zhou1@amd.com>
+Date: Tue, 13 Aug 2019 10:39:05 +0800
+Subject: [PATCH 3750/4256] drm/amdgpu: change ras bps type to eeprom table
+ record structure
+
+change bps type from retired page to eeprom table record, prepare for
+saving umc error records to eeprom
+
+Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
+Reviewed-by: Guchun Chen <guchun.chen@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 59 ++++++++++++++++---------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 11 +++--
+ 2 files changed, 43 insertions(+), 27 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index 5c2276bb8325..c6f4c01b98a8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -1203,14 +1203,14 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+
+ for (; i < data->count; i++) {
+ (*bps)[i] = (struct ras_badpage){
+- .bp = data->bps[i].bp,
++ .bp = data->bps[i].retired_page,
+ .size = AMDGPU_GPU_PAGE_SIZE,
+ .flags = 0,
+ };
+
+ if (data->last_reserved <= i)
+ (*bps)[i].flags = 1;
+- else if (data->bps[i].bo == NULL)
++ else if (data->bps_bo[i] == NULL)
+ (*bps)[i].flags = 2;
+ }
+
+@@ -1304,30 +1304,40 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
+ {
+ unsigned int old_space = data->count + data->space_left;
+ unsigned int new_space = old_space + pages;
+- unsigned int align_space = ALIGN(new_space, 1024);
+- void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+-
+- if (!tmp)
++ unsigned int align_space = ALIGN(new_space, 512);
++ void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
++ struct amdgpu_bo **bps_bo =
++ kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
++
++ if (!bps || !bps_bo) {
++ kfree(bps);
++ kfree(bps_bo);
+ return -ENOMEM;
++ }
+
+ if (data->bps) {
+- memcpy(tmp, data->bps,
++ memcpy(bps, data->bps,
+ data->count * sizeof(*data->bps));
+ kfree(data->bps);
+ }
++ if (data->bps_bo) {
++ memcpy(bps_bo, data->bps_bo,
++ data->count * sizeof(*data->bps_bo));
++ kfree(data->bps_bo);
++ }
+
+- data->bps = tmp;
++ data->bps = bps;
++ data->bps_bo = bps_bo;
+ data->space_left += align_space - old_space;
+ return 0;
+ }
+
+ /* it deal with vram only. */
+ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+- unsigned long *bps, int pages)
++ struct eeprom_table_record *bps, int pages)
+ {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+- int i = pages;
+ int ret = 0;
+
+ if (!con || !con->eh_data || !bps || pages <= 0)
+@@ -1344,10 +1354,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+ goto out;
+ }
+
+- while (i--)
+- data->bps[data->count++].bp = bps[i];
+-
++ memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
++ data->count += pages;
+ data->space_left -= pages;
++
+ out:
+ mutex_unlock(&con->recovery_lock);
+
+@@ -1372,13 +1382,13 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
+ goto out;
+ /* reserve vram at driver post stage. */
+ for (i = data->last_reserved; i < data->count; i++) {
+- bp = data->bps[i].bp;
++ bp = data->bps[i].retired_page;
+
+ if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT,
+ PAGE_SIZE, &bo))
+ DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp);
+
+- data->bps[i].bo = bo;
++ data->bps_bo[i] = bo;
+ data->last_reserved = i + 1;
+ }
+ out:
+@@ -1403,11 +1413,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
+ goto out;
+
+ for (i = data->last_reserved - 1; i >= 0; i--) {
+- bo = data->bps[i].bo;
++ bo = data->bps_bo[i];
+
+ amdgpu_ras_release_vram(adev, &bo);
+
+- data->bps[i].bo = bo;
++ data->bps_bo[i] = bo;
+ data->last_reserved = i;
+ }
+ out:
+@@ -1423,12 +1433,19 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+ return 0;
+ }
+
++/*
++ * read error record array in eeprom and reserve enough space for
++ * storing new bad pages
++ */
+ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
+ {
+- /* TODO
+- * read the array to eeprom when SMU disabled.
+- */
+- return 0;
++ struct eeprom_table_record *bps = NULL;
++ int ret;
++
++ ret = amdgpu_ras_add_bad_pages(adev, bps,
++ adev->umc.max_ras_err_cnt_per_query);
++
++ return ret;
+ }
+
+ static int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+index f487038ba331..bc1d45971607 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+@@ -351,11 +351,10 @@ struct ras_err_data {
+ };
+
+ struct ras_err_handler_data {
+- /* point to bad pages array */
+- struct {
+- unsigned long bp;
+- struct amdgpu_bo *bo;
+- } *bps;
++ /* point to bad page records array */
++ struct eeprom_table_record *bps;
++ /* point to reserved bo array */
++ struct amdgpu_bo **bps_bo;
+ /* the count of entries */
+ int count;
+ /* the space can place new entries */
+@@ -492,7 +491,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+
+ /* error handling functions */
+ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+- unsigned long *bps, int pages);
++ struct eeprom_table_record *bps, int pages);
+
+ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
+
+--
+2.17.1
+