aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch')
-rw-r--r--common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch476
1 files changed, 476 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch b/common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch
new file mode 100644
index 00000000..d2751c5f
--- /dev/null
+++ b/common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch
@@ -0,0 +1,476 @@
+From 757050ee4e2166ab406163e584e61a85417231a0 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 12 Feb 2016 00:39:13 -0500
+Subject: [PATCH 0310/1110] drm/amdgpu/gfx: clean up harvest configuration (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Read back harvest configuration from registers and simplify
+calculations. No need to program the raster config registers.
+These are programmed as golden registers and the user mode
+drivers program them as well.
+
+v2: rebase on Tom's patches
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +-
+ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 127 ++++++++++---------------------
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 131 ++++++++++----------------------
+ 4 files changed, 82 insertions(+), 180 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index e33c5a6..d56dc9c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -1097,6 +1097,7 @@ struct amdgpu_gca_config {
+ unsigned multi_gpu_tile_size;
+ unsigned mc_arb_ramcfg;
+ unsigned gb_addr_config;
++ unsigned num_rbs;
+
+ uint32_t tile_mode_array[32];
+ uint32_t macrotile_mode_array[16];
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index ab58187..c825880 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ dev_info.max_memory_clock = adev->pm.default_mclk * 10;
+ }
+ dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+- dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
+- adev->gfx.config.max_shader_engines;
++ dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
+ dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
+ dev_info._pad = 0;
+ dev_info.ids_flags = 0;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+index 409c6af..55c38fb 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+@@ -1598,39 +1598,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+ */
+ static u32 gfx_v7_0_create_bitmask(u32 bit_width)
+ {
+- return (u32)((1ULL<<bit_width)-1);
++ return (u32)((1ULL << bit_width) - 1);
+ }
+
+ /**
+- * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs
++ * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
+ *
+ * @adev: amdgpu_device pointer
+- * @max_rb_num: max RBs (render backends) for the asic
+- * @se_num: number of SEs (shader engines) for the asic
+- * @sh_per_se: number of SH blocks per SE for the asic
+ *
+- * Calculates the bitmask of disabled RBs (CIK).
+- * Returns the disabled RB bitmask.
++ * Calculates the bitmask of enabled RBs (CIK).
++ * Returns the enabled RB bitmask.
+ */
+-static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
+- u32 max_rb_num_per_se,
+- u32 sh_per_se)
++static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+ {
+ u32 data, mask;
+
+ data = RREG32(mmCC_RB_BACKEND_DISABLE);
+- if (data & 1)
+- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+- else
+- data = 0;
+-
+ data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+
++ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+ data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+- mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se);
++ mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
++ adev->gfx.config.max_sh_per_se);
+
+- return data & mask;
++ return (~data) & mask;
+ }
+
+ /**
+@@ -1639,68 +1631,36 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
+ * @adev: amdgpu_device pointer
+ * @se_num: number of SEs (shader engines) for the asic
+ * @sh_per_se: number of SH blocks per SE for the asic
+- * @max_rb_num: max RBs (render backends) for the asic
+ *
+ * Configures per-SE/SH RB registers (CIK).
+ */
+-static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
+- u32 se_num, u32 sh_per_se,
+- u32 max_rb_num_per_se)
++static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
+ {
+ int i, j;
+- u32 data;
+- u32 disabled_rbs = 0;
+- u32 enabled_rbs = 0;
++ u32 data, tmp, num_rbs = 0;
++ u32 active_rbs = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+- for (i = 0; i < se_num; i++) {
+- for (j = 0; j < sh_per_se; j++) {
++ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
++ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v7_0_select_se_sh(adev, i, j);
+- data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se);
++ data = gfx_v7_0_get_rb_active_bitmap(adev);
+ if (adev->asic_type == CHIP_HAWAII)
+- disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
++ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
++ HAWAII_RB_BITMAP_WIDTH_PER_SH);
+ else
+- disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
++ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
++ CIK_RB_BITMAP_WIDTH_PER_SH);
+ }
+ }
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+- enabled_rbs = (~disabled_rbs) & ((1UL<<(max_rb_num_per_se*se_num))-1);
+-
+- adev->gfx.config.backend_enable_mask = enabled_rbs;
+-
+- mutex_lock(&adev->grbm_idx_mutex);
+- for (i = 0; i < se_num; i++) {
+- gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
+- data = 0;
+- for (j = 0; j < sh_per_se; j++) {
+- switch (enabled_rbs & 3) {
+- case 0:
+- if (j == 0)
+- data |= (RASTER_CONFIG_RB_MAP_3 <<
+- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
+- else
+- data |= (RASTER_CONFIG_RB_MAP_0 <<
+- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
+- break;
+- case 1:
+- data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
+- break;
+- case 2:
+- data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
+- break;
+- case 3:
+- default:
+- data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
+- break;
+- }
+- enabled_rbs >>= 2;
+- }
+- WREG32(mmPA_SC_RASTER_CONFIG, data);
+- }
+- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+- mutex_unlock(&adev->grbm_idx_mutex);
++ adev->gfx.config.backend_enable_mask = active_rbs;
++ tmp = active_rbs;
++ while (tmp >>= 1)
++ num_rbs++;
++ adev->gfx.config.num_rbs = num_rbs;
+ }
+
+ /**
+@@ -1931,9 +1891,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
+
+ gfx_v7_0_tiling_mode_table_init(adev);
+
+- gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
+- adev->gfx.config.max_sh_per_se,
+- adev->gfx.config.max_backends_per_se);
++ gfx_v7_0_setup_rb(adev);
+
+ /* set HW defaults for 3D engine */
+ WREG32(mmCP_MEQ_THRESHOLDS,
+@@ -4039,28 +3997,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
+ }
+ }
+
+-static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev,
+- u32 se, u32 sh)
++static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
+ {
+- u32 mask = 0, tmp, tmp1;
+- int i;
+-
+- gfx_v7_0_select_se_sh(adev, se, sh);
+- tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
+- tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
++ u32 data, mask;
+
+- tmp &= 0xffff0000;
++ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
++ data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+
+- tmp |= tmp1;
+- tmp >>= 16;
++ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
++ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+- for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
+- mask <<= 1;
+- mask |= 1;
+- }
++ mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
++ adev->gfx.config.max_sh_per_se);
+
+- return (~tmp) & mask;
++ return (~data) & mask;
+ }
+
+ static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
+@@ -5317,10 +5267,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+- bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j);
++ gfx_v7_0_select_se_sh(adev, i, j);
++ bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
+ cu_info->bitmap[i][j] = bitmap;
+
+- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
++ for (k = 0; k < 16; k ++) {
+ if (bitmap & mask) {
+ if (counter < 2)
+ ao_bitmap |= mask;
+@@ -5332,9 +5283,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ }
+ }
++ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
++ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+- mutex_unlock(&adev->grbm_idx_mutex);
++
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index 1b5abdb..5e04140 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -2572,11 +2572,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
+ }
+ }
+
+-static u32 gfx_v8_0_create_bitmask(u32 bit_width)
+-{
+- return (u32)((1ULL << bit_width) - 1);
+-}
+-
+ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+ {
+ u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+@@ -2597,89 +2592,50 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+ WREG32(mmGRBM_GFX_INDEX, data);
+ }
+
+-static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
+- u32 max_rb_num_per_se,
+- u32 sh_per_se)
++static u32 gfx_v8_0_create_bitmask(u32 bit_width)
++{
++ return (u32)((1ULL << bit_width) - 1);
++}
++
++static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+ {
+ u32 data, mask;
+
+ data = RREG32(mmCC_RB_BACKEND_DISABLE);
+- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+-
+ data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+
++ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+ data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+- mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
++ mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
++ adev->gfx.config.max_sh_per_se);
+
+- return data & mask;
++ return (~data) & mask;
+ }
+
+-static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
+- u32 se_num, u32 sh_per_se,
+- u32 max_rb_num_per_se)
++static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
+ {
+ int i, j;
+- u32 data, mask;
+- u32 disabled_rbs = 0;
+- u32 enabled_rbs = 0;
++ u32 data, tmp, num_rbs = 0;
++ u32 active_rbs = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+- for (i = 0; i < se_num; i++) {
+- for (j = 0; j < sh_per_se; j++) {
++ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
++ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v8_0_select_se_sh(adev, i, j);
+- data = gfx_v8_0_get_rb_disabled(adev,
+- max_rb_num_per_se, sh_per_se);
+- disabled_rbs |= data << ((i * sh_per_se + j) *
+- RB_BITMAP_WIDTH_PER_SH);
++ data = gfx_v8_0_get_rb_active_bitmap(adev);
++ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
++ RB_BITMAP_WIDTH_PER_SH);
+ }
+ }
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+- mask = 1;
+- for (i = 0; i < max_rb_num_per_se * se_num; i++) {
+- if (!(disabled_rbs & mask))
+- enabled_rbs |= mask;
+- mask <<= 1;
+- }
+-
+- adev->gfx.config.backend_enable_mask = enabled_rbs;
+-
+- mutex_lock(&adev->grbm_idx_mutex);
+- for (i = 0; i < se_num; i++) {
+- gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
+- data = RREG32(mmPA_SC_RASTER_CONFIG);
+- for (j = 0; j < sh_per_se; j++) {
+- switch (enabled_rbs & 3) {
+- case 0:
+- if (j == 0)
+- data |= (RASTER_CONFIG_RB_MAP_3 <<
+- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
+- else
+- data |= (RASTER_CONFIG_RB_MAP_0 <<
+- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
+- break;
+- case 1:
+- data |= (RASTER_CONFIG_RB_MAP_0 <<
+- (i * sh_per_se + j) * 2);
+- break;
+- case 2:
+- data |= (RASTER_CONFIG_RB_MAP_3 <<
+- (i * sh_per_se + j) * 2);
+- break;
+- case 3:
+- default:
+- data |= (RASTER_CONFIG_RB_MAP_2 <<
+- (i * sh_per_se + j) * 2);
+- break;
+- }
+- enabled_rbs >>= 2;
+- }
+- WREG32(mmPA_SC_RASTER_CONFIG, data);
+- }
+- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+- mutex_unlock(&adev->grbm_idx_mutex);
++ adev->gfx.config.backend_enable_mask = active_rbs;
++ tmp = active_rbs;
++ while (tmp >>= 1)
++ num_rbs++;
++ adev->gfx.config.num_rbs = num_rbs;
+ }
+
+ /**
+@@ -2749,9 +2705,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
+
+ gfx_v8_0_tiling_mode_table_init(adev);
+
+- gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
+- adev->gfx.config.max_sh_per_se,
+- adev->gfx.config.max_backends_per_se);
++ gfx_v8_0_setup_rb(adev);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+@@ -5188,32 +5142,24 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
+ }
+ }
+
+-static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
+- u32 se, u32 sh)
++static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
+ {
+- u32 mask = 0, tmp, tmp1;
+- int i;
++ u32 data, mask;
+
+- gfx_v8_0_select_se_sh(adev, se, sh);
+- tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
+- tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
++ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
++ data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+
+- tmp &= 0xffff0000;
++ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
++ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+- tmp |= tmp1;
+- tmp >>= 16;
++ mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
++ adev->gfx.config.max_sh_per_se);
+
+- for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
+- mask <<= 1;
+- mask |= 1;
+- }
+-
+- return (~tmp) & mask;
++ return (~data) & mask;
+ }
+
+ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
+- struct amdgpu_cu_info *cu_info)
++ struct amdgpu_cu_info *cu_info)
+ {
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+@@ -5227,10 +5173,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+- bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
++ gfx_v8_0_select_se_sh(adev, i, j);
++ bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
+ cu_info->bitmap[i][j] = bitmap;
+
+- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
++ for (k = 0; k < 16; k ++) {
+ if (bitmap & mask) {
+ if (counter < 2)
+ ao_bitmap |= mask;
+@@ -5242,9 +5189,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ }
+ }
++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
++ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+- mutex_unlock(&adev->grbm_idx_mutex);
++
+ return 0;
+ }
+--
+2.7.4
+