aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch276
1 files changed, 276 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch
new file mode 100644
index 00000000..61d10836
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3510-drm-amd-display-Optimize-regamma-calculations.patch
@@ -0,0 +1,276 @@
+From c4737669669d41e583029b94acfc2a26631b0813 Mon Sep 17 00:00:00 2001
+From: Krunoslav Kovac <Krunoslav.Kovac@amd.com>
+Date: Fri, 19 Jan 2018 17:55:26 -0500
+Subject: [PATCH 3510/4131] drm/amd/display: Optimize regamma calculations
+
+There are several optimizations:
+1) Use predefined SRGB, don't calculate. This is the most common case.
+2) Precompute HW X points at boot since they're fixed in ColModule
+3) Precompute PQ - it never changes and is very CPU intensive in fixed pt.
+4) Reduce number of points in ColModule to 512 (32x16) from 1024. This also
+requires reducing some regions for legacy DCEs to 16 pts at most.
+
+Performance
+1) is super-fast, build_output_tf is 1-2us, down from 25000-30000.
+Programming also fast since only one reg write.
+2)+3) gives build_output_tf for PQ in ~100us range, down from ~80000-110000
+2) + 4) results in slightly over 50% improvement. It gives an idea of the
+savings when we can't use SRGB or PQ table (e.g. sdr white level > 80).
+
+There's also a bit of refactoring: renaming some stuff that was misleading
+and removing a lot of magic numbers that novices might not be able to
+understand where they come from and what they mean.
+
+Signed-off-by: Krunoslav Kovac <Krunoslav.Kovac@amd.com>
+Reviewed-by: Tony Cheng <Tony.Cheng@amd.com>
+Acked-by: Harry Wentland <harry.wentland@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ .../amd/display/dc/dce110/dce110_hw_sequencer.c | 56 ++++++++++------------
+ .../gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c | 39 ++++++++-------
+ drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c | 2 +-
+ drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 2 +-
+ 4 files changed, 47 insertions(+), 52 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+index b87974e..54c933b 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
++++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+@@ -407,6 +407,10 @@ static bool convert_to_custom_float(struct pwl_result_data *rgb_resulted,
+ return true;
+ }
+
++#define MAX_LOW_POINT 11
++#define NUMBER_REGIONS 16
++#define NUMBER_SW_SEGMENTS 16
++
+ static bool
+ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
+ struct pwl_params *regamma_params)
+@@ -421,8 +425,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
+ struct fixed31_32 y1_min;
+ struct fixed31_32 y3_max;
+
+- int32_t segment_start, segment_end;
+- uint32_t i, j, k, seg_distr[16], increment, start_index, hw_points;
++ int32_t region_start, region_end;
++ uint32_t i, j, k, seg_distr[NUMBER_REGIONS], increment, start_index, hw_points;
+
+ if (output_tf == NULL || regamma_params == NULL || output_tf->type == TF_TYPE_BYPASS)
+ return false;
+@@ -437,34 +441,20 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
+ /* 16 segments
+ * segments are from 2^-11 to 2^5
+ */
+- segment_start = -11;
+- segment_end = 5;
+-
+- seg_distr[0] = 2;
+- seg_distr[1] = 2;
+- seg_distr[2] = 2;
+- seg_distr[3] = 2;
+- seg_distr[4] = 2;
+- seg_distr[5] = 2;
+- seg_distr[6] = 3;
+- seg_distr[7] = 4;
+- seg_distr[8] = 4;
+- seg_distr[9] = 4;
+- seg_distr[10] = 4;
+- seg_distr[11] = 5;
+- seg_distr[12] = 5;
+- seg_distr[13] = 5;
+- seg_distr[14] = 5;
+- seg_distr[15] = 5;
++ region_start = -MAX_LOW_POINT;
++ region_end = NUMBER_REGIONS - MAX_LOW_POINT;
++
++ for (i = 0; i < NUMBER_REGIONS; i++)
++ seg_distr[i] = 4;
+
+ } else {
+ /* 10 segments
+ * segment is from 2^-10 to 2^0
+ */
+- segment_start = -10;
+- segment_end = 0;
++ region_start = -10;
++ region_end = 0;
+
+- seg_distr[0] = 3;
++ seg_distr[0] = 4;
+ seg_distr[1] = 4;
+ seg_distr[2] = 4;
+ seg_distr[3] = 4;
+@@ -472,8 +462,8 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
+ seg_distr[5] = 4;
+ seg_distr[6] = 4;
+ seg_distr[7] = 4;
+- seg_distr[8] = 5;
+- seg_distr[9] = 5;
++ seg_distr[8] = 4;
++ seg_distr[9] = 4;
+ seg_distr[10] = -1;
+ seg_distr[11] = -1;
+ seg_distr[12] = -1;
+@@ -488,10 +478,12 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
+ }
+
+ j = 0;
+- for (k = 0; k < (segment_end - segment_start); k++) {
++ for (k = 0; k < (region_end - region_start); k++) {
+ increment = 32 / (1 << seg_distr[k]);
+- start_index = (segment_start + k + 25) * 32;
+- for (i = start_index; i < start_index + 32; i += increment) {
++ start_index = (region_start + k + MAX_LOW_POINT) *
++ NUMBER_SW_SEGMENTS;
++ for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
++ i += increment) {
+ if (j == hw_points - 1)
+ break;
+ rgb_resulted[j].red = output_tf->tf_pts.red[i];
+@@ -502,15 +494,15 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
+ }
+
+ /* last point */
+- start_index = (segment_end + 25) * 32;
++ start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
+ rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
+ rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
+ rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+
+ arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+- dal_fixed31_32_from_int(segment_start));
++ dal_fixed31_32_from_int(region_start));
+ arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+- dal_fixed31_32_from_int(segment_end));
++ dal_fixed31_32_from_int(region_end));
+
+ y_r = rgb_resulted[0].red;
+ y_g = rgb_resulted[0].green;
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+index 53ba360..b3db639 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+@@ -232,10 +232,11 @@ bool cm_helper_convert_to_custom_float(
+ return true;
+ }
+
+-
++/* driver uses 32 regions or less, but DCN HW has 34, extra 2 are set to 0 */
+ #define MAX_REGIONS_NUMBER 34
+ #define MAX_LOW_POINT 25
+-#define NUMBER_SEGMENTS 32
++#define NUMBER_REGIONS 32
++#define NUMBER_SW_SEGMENTS 16
+
+ bool cm_helper_translate_curve_to_hw_format(
+ const struct dc_transfer_func *output_tf,
+@@ -251,7 +252,7 @@ bool cm_helper_translate_curve_to_hw_format(
+ struct fixed31_32 y1_min;
+ struct fixed31_32 y3_max;
+
+- int32_t segment_start, segment_end;
++ int32_t region_start, region_end;
+ int32_t i;
+ uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
+
+@@ -271,11 +272,11 @@ bool cm_helper_translate_curve_to_hw_format(
+ /* 32 segments
+ * segments are from 2^-25 to 2^7
+ */
+- for (i = 0; i < 32 ; i++)
++ for (i = 0; i < NUMBER_REGIONS ; i++)
+ seg_distr[i] = 3;
+
+- segment_start = -25;
+- segment_end = 7;
++ region_start = -MAX_LOW_POINT;
++ region_end = NUMBER_REGIONS - MAX_LOW_POINT;
+ } else {
+ /* 10 segments
+ * segment is from 2^-10 to 2^0
+@@ -289,14 +290,14 @@ bool cm_helper_translate_curve_to_hw_format(
+ seg_distr[5] = 4;
+ seg_distr[6] = 4;
+ seg_distr[7] = 4;
+- seg_distr[8] = 5;
+- seg_distr[9] = 5;
++ seg_distr[8] = 4;
++ seg_distr[9] = 4;
+
+- segment_start = -10;
+- segment_end = 0;
++ region_start = -10;
++ region_end = 0;
+ }
+
+- for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++)
++ for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
+ seg_distr[i] = -1;
+
+ for (k = 0; k < MAX_REGIONS_NUMBER; k++) {
+@@ -305,10 +306,12 @@ bool cm_helper_translate_curve_to_hw_format(
+ }
+
+ j = 0;
+- for (k = 0; k < (segment_end - segment_start); k++) {
+- increment = NUMBER_SEGMENTS / (1 << seg_distr[k]);
+- start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS;
+- for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) {
++ for (k = 0; k < (region_end - region_start); k++) {
++ increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
++ start_index = (region_start + k + MAX_LOW_POINT) *
++ NUMBER_SW_SEGMENTS;
++ for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
++ i += increment) {
+ if (j == hw_points - 1)
+ break;
+ rgb_resulted[j].red = output_tf->tf_pts.red[i];
+@@ -319,15 +322,15 @@ bool cm_helper_translate_curve_to_hw_format(
+ }
+
+ /* last point */
+- start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS;
++ start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
+ rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
+ rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
+ rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+
+ arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+- dal_fixed31_32_from_int(segment_start));
++ dal_fixed31_32_from_int(region_start));
+ arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+- dal_fixed31_32_from_int(segment_end));
++ dal_fixed31_32_from_int(region_end));
+
+ y_r = rgb_resulted[0].red;
+ y_g = rgb_resulted[0].green;
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+index 080c253..8725cab 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+@@ -196,7 +196,7 @@ static void dpp1_cm_set_regamma_pwl(
+ case OPP_REGAMMA_SRGB:
+ re_mode = 1;
+ break;
+- case OPP_REGAMMA_3_6:
++ case OPP_REGAMMA_XVYCC:
+ re_mode = 2;
+ break;
+ case OPP_REGAMMA_USER:
+diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+index e3f0b40..b221581 100644
+--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
++++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+@@ -136,7 +136,7 @@ struct out_csc_color_matrix {
+ enum opp_regamma {
+ OPP_REGAMMA_BYPASS = 0,
+ OPP_REGAMMA_SRGB,
+- OPP_REGAMMA_3_6,
++ OPP_REGAMMA_XVYCC,
+ OPP_REGAMMA_USER
+ };
+
+--
+2.7.4
+