diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch | 276 |
1 files changed, 276 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch new file mode 100644 index 00000000..61d10836 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch @@ -0,0 +1,276 @@ +From c4737669669d41e583029b94acfc2a26631b0813 Mon Sep 17 00:00:00 2001 +From: Krunoslav Kovac <Krunoslav.Kovac@amd.com> +Date: Fri, 19 Jan 2018 17:55:26 -0500 +Subject: [PATCH 3510/4131] drm/amd/display: Optimize regamma calculations + +There are several optimizations: +1) Use predefined SRGB, don't calculate. This is the most common case. +2) Precompute HW X points at boot since they're fixed in ColModule +3) Precompute PQ - it never changes and is very CPU intensive in fixed pt. +4) Reduce number of points in ColModule to 512 (32x16) from 1024. This also +requires reducing some regions for legacy DCEs to 16 pts at most. + +Performance +1) is super-fast, build_output_tf is 1-2us, down from 25000-30000. +Programming also fast since only one reg write. +2)+3) gives build_output_tf for PQ in ~100us range, down from ~80000-110000 +2) + 4) results in slightly over 50% improvement. It gives an idea of the +savings when we can't use SRGB or PQ table (e.g. sdr white level > 80). + +There's also a bit of refactoring: renaming some stuff that was misleading +and removing a lot of magic numbers that novices might not be able to +understand where they come from and what they mean. + +Signed-off-by: Krunoslav Kovac <Krunoslav.Kovac@amd.com> +Reviewed-by: Tony Cheng <Tony.Cheng@amd.com> +Acked-by: Harry Wentland <harry.wentland@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + .../amd/display/dc/dce110/dce110_hw_sequencer.c | 56 ++++++++++------------ + .../gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c | 39 ++++++++------- + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c | 2 +- + drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 2 +- + 4 files changed, 47 insertions(+), 52 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +index b87974e..54c933b 100644 +--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c ++++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +@@ -407,6 +407,10 @@ static bool convert_to_custom_float(struct pwl_result_data *rgb_resulted, + return true; + } + ++#define MAX_LOW_POINT 11 ++#define NUMBER_REGIONS 16 ++#define NUMBER_SW_SEGMENTS 16 ++ + static bool + dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, + struct pwl_params *regamma_params) +@@ -421,8 +425,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, + struct fixed31_32 y1_min; + struct fixed31_32 y3_max; + +- int32_t segment_start, segment_end; +- uint32_t i, j, k, seg_distr[16], increment, start_index, hw_points; ++ int32_t region_start, region_end; ++ uint32_t i, j, k, seg_distr[NUMBER_REGIONS], increment, start_index, hw_points; + + if (output_tf == NULL || regamma_params == NULL || output_tf->type == TF_TYPE_BYPASS) + return false; +@@ -437,34 +441,20 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, + /* 16 segments + * segments are from 2^-11 to 2^5 + */ +- segment_start = -11; +- segment_end = 5; +- +- seg_distr[0] = 2; +- seg_distr[1] = 2; +- seg_distr[2] = 2; +- seg_distr[3] = 2; +- seg_distr[4] = 2; +- seg_distr[5] = 2; +- seg_distr[6] = 3; +- seg_distr[7] = 4; +- seg_distr[8] = 4; +- seg_distr[9] = 4; +- seg_distr[10] = 4; +- seg_distr[11] = 5; +- seg_distr[12] = 5; +- seg_distr[13] = 5; +- seg_distr[14] = 5; +- seg_distr[15] = 5; ++ region_start = -MAX_LOW_POINT; ++ region_end = NUMBER_REGIONS - MAX_LOW_POINT; ++ ++ for (i = 0; i < NUMBER_REGIONS; i++) ++ seg_distr[i] = 4; + + } else { + /* 10 segments + * segment is from 2^-10 to 2^0 + */ +- segment_start = -10; +- segment_end = 0; ++ region_start = -10; ++ region_end = 0; + +- seg_distr[0] = 3; ++ seg_distr[0] = 4; + seg_distr[1] = 4; + seg_distr[2] = 4; + seg_distr[3] = 4; +@@ -472,8 +462,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, + seg_distr[5] = 4; + seg_distr[6] = 4; + seg_distr[7] = 4; +- seg_distr[8] = 5; +- seg_distr[9] = 5; ++ seg_distr[8] = 4; ++ seg_distr[9] = 4; + seg_distr[10] = -1; + seg_distr[11] = -1; + seg_distr[12] = -1; +@@ -488,10 +478,12 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, + } + + j = 0; +- for (k = 0; k < (segment_end - segment_start); k++) { ++ for (k = 0; k < (region_end - region_start); k++) { + increment = 32 / (1 << seg_distr[k]); +- start_index = (segment_start + k + 25) * 32; +- for (i = start_index; i < start_index + 32; i += increment) { ++ start_index = (region_start + k + MAX_LOW_POINT) * ++ NUMBER_SW_SEGMENTS; ++ for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; ++ i += increment) { + if (j == hw_points - 1) + break; + rgb_resulted[j].red = output_tf->tf_pts.red[i]; +@@ -502,15 +494,15 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, + } + + /* last point */ +- start_index = (segment_end + 25) * 32; ++ start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; + rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; + rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; + rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; + + arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), +- dal_fixed31_32_from_int(segment_start)); ++ dal_fixed31_32_from_int(region_start)); + arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), +- dal_fixed31_32_from_int(segment_end)); ++ dal_fixed31_32_from_int(region_end)); + + y_r = rgb_resulted[0].red; + y_g = rgb_resulted[0].green; +diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +index 53ba360..b3db639 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +@@ -232,10 +232,11 @@ bool cm_helper_convert_to_custom_float( + return true; + } + +- ++/* driver uses 32 regions or less, but DCN HW has 34, extra 2 are set to 0 */ + #define MAX_REGIONS_NUMBER 34 + #define MAX_LOW_POINT 25 +-#define NUMBER_SEGMENTS 32 ++#define NUMBER_REGIONS 32 ++#define NUMBER_SW_SEGMENTS 16 + + bool cm_helper_translate_curve_to_hw_format( + const struct dc_transfer_func *output_tf, +@@ -251,7 +252,7 @@ bool cm_helper_translate_curve_to_hw_format( + struct fixed31_32 y1_min; + struct fixed31_32 y3_max; + +- int32_t segment_start, segment_end; ++ int32_t region_start, region_end; + int32_t i; + uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; + +@@ -271,11 +272,11 @@ bool cm_helper_translate_curve_to_hw_format( + /* 32 segments + * segments are from 2^-25 to 2^7 + */ +- for (i = 0; i < 32 ; i++) ++ for (i = 0; i < NUMBER_REGIONS ; i++) + seg_distr[i] = 3; + +- segment_start = -25; +- segment_end = 7; ++ region_start = -MAX_LOW_POINT; ++ region_end = NUMBER_REGIONS - MAX_LOW_POINT; + } else { + /* 10 segments + * segment is from 2^-10 to 2^0 +@@ -289,14 +290,14 @@ bool cm_helper_translate_curve_to_hw_format( + seg_distr[5] = 4; + seg_distr[6] = 4; + seg_distr[7] = 4; +- seg_distr[8] = 5; +- seg_distr[9] = 5; ++ seg_distr[8] = 4; ++ seg_distr[9] = 4; + +- segment_start = -10; +- segment_end = 0; ++ region_start = -10; ++ region_end = 0; + } + +- for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++) ++ for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) + seg_distr[i] = -1; + + for (k = 0; k < MAX_REGIONS_NUMBER; k++) { +@@ -305,10 +306,12 @@ bool cm_helper_translate_curve_to_hw_format( + } + + j = 0; +- for (k = 0; k < (segment_end - segment_start); k++) { +- increment = NUMBER_SEGMENTS / (1 << seg_distr[k]); +- start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS; +- for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) { ++ for (k = 0; k < (region_end - region_start); k++) { ++ increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); ++ start_index = (region_start + k + MAX_LOW_POINT) * ++ NUMBER_SW_SEGMENTS; ++ for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; ++ i += increment) { + if (j == hw_points - 1) + break; + rgb_resulted[j].red = output_tf->tf_pts.red[i]; +@@ -319,15 +322,15 @@ bool cm_helper_translate_curve_to_hw_format( + } + + /* last point */ +- start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS; ++ start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; + rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; + rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; + rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; + + arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), +- dal_fixed31_32_from_int(segment_start)); ++ dal_fixed31_32_from_int(region_start)); + arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), +- dal_fixed31_32_from_int(segment_end)); ++ dal_fixed31_32_from_int(region_end)); + + y_r = rgb_resulted[0].red; + y_g = rgb_resulted[0].green; +diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +index 080c253..8725cab 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +@@ -196,7 +196,7 @@ static void dpp1_cm_set_regamma_pwl( + case OPP_REGAMMA_SRGB: + re_mode = 1; + break; +- case OPP_REGAMMA_3_6: ++ case OPP_REGAMMA_XVYCC: + re_mode = 2; + break; + case OPP_REGAMMA_USER: +diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +index e3f0b40..b221581 100644 +--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h ++++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +@@ -136,7 +136,7 @@ struct out_csc_color_matrix { + enum opp_regamma { + OPP_REGAMMA_BYPASS = 0, + OPP_REGAMMA_SRGB, +- OPP_REGAMMA_3_6, ++ OPP_REGAMMA_XVYCC, + OPP_REGAMMA_USER + }; + +-- +2.7.4 + |