diff options
Diffstat (limited to 'common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch')
-rw-r--r-- | common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch b/common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch new file mode 100644 index 00000000..d2751c5f --- /dev/null +++ b/common/recipes-kernel/linux/files/0310-drm-amdgpu-gfx-clean-up-harvest-configuration-v2.patch @@ -0,0 +1,476 @@ +From 757050ee4e2166ab406163e584e61a85417231a0 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Fri, 12 Feb 2016 00:39:13 -0500 +Subject: [PATCH 0310/1110] drm/amdgpu/gfx: clean up harvest configuration (v2) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Read back harvest configuration from registers and simplify +calculations. No need to program the raster config registers. +These are programmed as golden registers and the user mode +drivers program them as well. + +v2: rebase on Tom's patches + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +- + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 127 ++++++++++--------------------- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 131 ++++++++++---------------------- + 4 files changed, 82 insertions(+), 180 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index e33c5a6..d56dc9c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1097,6 +1097,7 @@ struct amdgpu_gca_config { + unsigned multi_gpu_tile_size; + unsigned mc_arb_ramcfg; + unsigned gb_addr_config; ++ unsigned num_rbs; + + uint32_t tile_mode_array[32]; + uint32_t macrotile_mode_array[16]; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index ab58187..c825880 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + dev_info.max_memory_clock = adev->pm.default_mclk * 10; + } + dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; +- dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * +- adev->gfx.config.max_shader_engines; ++ dev_info.num_rb_pipes = adev->gfx.config.num_rbs; + dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; + dev_info._pad = 0; + dev_info.ids_flags = 0; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +index 409c6af..55c38fb 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -1598,39 +1598,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) + */ + static u32 gfx_v7_0_create_bitmask(u32 bit_width) + { +- return (u32)((1ULL<<bit_width)-1); ++ return (u32)((1ULL << bit_width) - 1); + } + + /** +- * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs ++ * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs + * + * @adev: amdgpu_device pointer +- * @max_rb_num: max RBs (render backends) for the asic +- * @se_num: number of SEs (shader engines) for the asic +- * @sh_per_se: number of SH blocks per SE for the asic + * +- * Calculates the bitmask of disabled RBs (CIK). +- * Returns the disabled RB bitmask. ++ * Calculates the bitmask of enabled RBs (CIK). ++ * Returns the enabled RB bitmask. + */ +-static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, +- u32 max_rb_num_per_se, +- u32 sh_per_se) ++static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) + { + u32 data, mask; + + data = RREG32(mmCC_RB_BACKEND_DISABLE); +- if (data & 1) +- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; +- else +- data = 0; +- + data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); + ++ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; + +- mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se); ++ mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / ++ adev->gfx.config.max_sh_per_se); + +- return data & mask; ++ return (~data) & mask; + } + + /** +@@ -1639,68 +1631,36 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, + * @adev: amdgpu_device pointer + * @se_num: number of SEs (shader engines) for the asic + * @sh_per_se: number of SH blocks per SE for the asic +- * @max_rb_num: max RBs (render backends) for the asic + * + * Configures per-SE/SH RB registers (CIK). + */ +-static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, +- u32 se_num, u32 sh_per_se, +- u32 max_rb_num_per_se) ++static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) + { + int i, j; +- u32 data; +- u32 disabled_rbs = 0; +- u32 enabled_rbs = 0; ++ u32 data, tmp, num_rbs = 0; ++ u32 active_rbs = 0; + + mutex_lock(&adev->grbm_idx_mutex); +- for (i = 0; i < se_num; i++) { +- for (j = 0; j < sh_per_se; j++) { ++ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { ++ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v7_0_select_se_sh(adev, i, j); +- data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se); ++ data = gfx_v7_0_get_rb_active_bitmap(adev); + if (adev->asic_type == CHIP_HAWAII) +- disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); ++ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * ++ HAWAII_RB_BITMAP_WIDTH_PER_SH); + else +- disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); ++ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * ++ CIK_RB_BITMAP_WIDTH_PER_SH); + } + } + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + +- enabled_rbs = (~disabled_rbs) & ((1UL<<(max_rb_num_per_se*se_num))-1); +- +- adev->gfx.config.backend_enable_mask = enabled_rbs; +- +- mutex_lock(&adev->grbm_idx_mutex); +- for (i = 0; i < se_num; i++) { +- gfx_v7_0_select_se_sh(adev, i, 0xffffffff); +- data = 0; +- for (j = 0; j < sh_per_se; j++) { +- switch (enabled_rbs & 3) { +- case 0: +- if (j == 0) +- data |= (RASTER_CONFIG_RB_MAP_3 << +- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); +- else +- data |= (RASTER_CONFIG_RB_MAP_0 << +- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); +- break; +- case 1: +- data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); +- break; +- case 2: +- data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); +- break; +- case 3: +- default: +- data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); +- break; +- } +- enabled_rbs >>= 2; +- } +- WREG32(mmPA_SC_RASTER_CONFIG, data); +- } +- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); +- mutex_unlock(&adev->grbm_idx_mutex); ++ adev->gfx.config.backend_enable_mask = active_rbs; ++ tmp = active_rbs; ++ while (tmp >>= 1) ++ num_rbs++; ++ adev->gfx.config.num_rbs = num_rbs; + } + + /** +@@ -1931,9 +1891,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) + + gfx_v7_0_tiling_mode_table_init(adev); + +- gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines, +- adev->gfx.config.max_sh_per_se, +- adev->gfx.config.max_backends_per_se); ++ gfx_v7_0_setup_rb(adev); + + /* set HW defaults for 3D engine */ + WREG32(mmCP_MEQ_THRESHOLDS, +@@ -4039,28 +3997,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, + } + } + +-static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev, +- u32 se, u32 sh) ++static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) + { +- u32 mask = 0, tmp, tmp1; +- int i; +- +- gfx_v7_0_select_se_sh(adev, se, sh); +- tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); +- tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); +- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++ u32 data, mask; + +- tmp &= 0xffff0000; ++ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); ++ data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); + +- tmp |= tmp1; +- tmp >>= 16; ++ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; ++ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + +- for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { +- mask <<= 1; +- mask |= 1; +- } ++ mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / ++ adev->gfx.config.max_sh_per_se); + +- return (~tmp) & mask; ++ return (~data) & mask; + } + + static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev) +@@ -5317,10 +5267,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, + mask = 1; + ao_bitmap = 0; + counter = 0; +- bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j); ++ gfx_v7_0_select_se_sh(adev, i, j); ++ bitmap = gfx_v7_0_get_cu_active_bitmap(adev); + cu_info->bitmap[i][j] = bitmap; + +- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { ++ for (k = 0; k < 16; k ++) { + if (bitmap & mask) { + if (counter < 2) + ao_bitmap |= mask; +@@ -5332,9 +5283,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + } + } ++ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++ mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; +- mutex_unlock(&adev->grbm_idx_mutex); ++ + return 0; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 1b5abdb..5e04140 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -2572,11 +2572,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) + } + } + +-static u32 gfx_v8_0_create_bitmask(u32 bit_width) +-{ +- return (u32)((1ULL << bit_width) - 1); +-} +- + void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) + { + u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); +@@ -2597,89 +2592,50 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) + WREG32(mmGRBM_GFX_INDEX, data); + } + +-static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, +- u32 max_rb_num_per_se, +- u32 sh_per_se) ++static u32 gfx_v8_0_create_bitmask(u32 bit_width) ++{ ++ return (u32)((1ULL << bit_width) - 1); ++} ++ ++static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) + { + u32 data, mask; + + data = RREG32(mmCC_RB_BACKEND_DISABLE); +- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; +- + data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); + ++ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; + +- mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se); ++ mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / ++ adev->gfx.config.max_sh_per_se); + +- return data & mask; ++ return (~data) & mask; + } + +-static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, +- u32 se_num, u32 sh_per_se, +- u32 max_rb_num_per_se) ++static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + { + int i, j; +- u32 data, mask; +- u32 disabled_rbs = 0; +- u32 enabled_rbs = 0; ++ u32 data, tmp, num_rbs = 0; ++ u32 active_rbs = 0; + + mutex_lock(&adev->grbm_idx_mutex); +- for (i = 0; i < se_num; i++) { +- for (j = 0; j < sh_per_se; j++) { ++ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { ++ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v8_0_select_se_sh(adev, i, j); +- data = gfx_v8_0_get_rb_disabled(adev, +- max_rb_num_per_se, sh_per_se); +- disabled_rbs |= data << ((i * sh_per_se + j) * +- RB_BITMAP_WIDTH_PER_SH); ++ data = gfx_v8_0_get_rb_active_bitmap(adev); ++ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * ++ RB_BITMAP_WIDTH_PER_SH); + } + } + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + +- mask = 1; +- for (i = 0; i < max_rb_num_per_se * se_num; i++) { +- if (!(disabled_rbs & mask)) +- enabled_rbs |= mask; +- mask <<= 1; +- } +- +- adev->gfx.config.backend_enable_mask = enabled_rbs; +- +- mutex_lock(&adev->grbm_idx_mutex); +- for (i = 0; i < se_num; i++) { +- gfx_v8_0_select_se_sh(adev, i, 0xffffffff); +- data = RREG32(mmPA_SC_RASTER_CONFIG); +- for (j = 0; j < sh_per_se; j++) { +- switch (enabled_rbs & 3) { +- case 0: +- if (j == 0) +- data |= (RASTER_CONFIG_RB_MAP_3 << +- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); +- else +- data |= (RASTER_CONFIG_RB_MAP_0 << +- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); +- break; +- case 1: +- data |= (RASTER_CONFIG_RB_MAP_0 << +- (i * sh_per_se + j) * 2); +- break; +- case 2: +- data |= (RASTER_CONFIG_RB_MAP_3 << +- (i * sh_per_se + j) * 2); +- break; +- case 3: +- default: +- data |= (RASTER_CONFIG_RB_MAP_2 << +- (i * sh_per_se + j) * 2); +- break; +- } +- enabled_rbs >>= 2; +- } +- WREG32(mmPA_SC_RASTER_CONFIG, data); +- } +- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); +- mutex_unlock(&adev->grbm_idx_mutex); ++ adev->gfx.config.backend_enable_mask = active_rbs; ++ tmp = active_rbs; ++ while (tmp >>= 1) ++ num_rbs++; ++ adev->gfx.config.num_rbs = num_rbs; + } + + /** +@@ -2749,9 +2705,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) + + gfx_v8_0_tiling_mode_table_init(adev); + +- gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines, +- adev->gfx.config.max_sh_per_se, +- adev->gfx.config.max_backends_per_se); ++ gfx_v8_0_setup_rb(adev); + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ +@@ -5188,32 +5142,24 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) + } + } + +-static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev, +- u32 se, u32 sh) ++static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) + { +- u32 mask = 0, tmp, tmp1; +- int i; ++ u32 data, mask; + +- gfx_v8_0_select_se_sh(adev, se, sh); +- tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); +- tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); +- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); ++ data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); + +- tmp &= 0xffff0000; ++ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; ++ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + +- tmp |= tmp1; +- tmp >>= 16; ++ mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / ++ adev->gfx.config.max_sh_per_se); + +- for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { +- mask <<= 1; +- mask |= 1; +- } +- +- return (~tmp) & mask; ++ return (~data) & mask; + } + + int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, +- struct amdgpu_cu_info *cu_info) ++ struct amdgpu_cu_info *cu_info) + { + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; +@@ -5227,10 +5173,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, + mask = 1; + ao_bitmap = 0; + counter = 0; +- bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j); ++ gfx_v8_0_select_se_sh(adev, i, j); ++ bitmap = gfx_v8_0_get_cu_active_bitmap(adev); + cu_info->bitmap[i][j] = bitmap; + +- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { ++ for (k = 0; k < 16; k ++) { + if (bitmap & mask) { + if (counter < 2) + ao_bitmap |= mask; +@@ -5242,9 +5189,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + } + } ++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++ mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; +- mutex_unlock(&adev->grbm_idx_mutex); ++ + return 0; + } +-- +2.7.4 + |