aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch7283
1 files changed, 7283 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch
new file mode 100644
index 00000000..992d3920
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3090-drm-amd-display-support-dummy-pstate.patch
@@ -0,0 +1,7283 @@
+From e1e58ece64ca912cda59f58f4b0a4037d5cdbe08 Mon Sep 17 00:00:00 2001
+From: Jun Lei <Jun.Lei@amd.com>
+Date: Mon, 8 Jul 2019 15:15:42 -0400
+Subject: [PATCH 3090/4256] drm/amd/display: support "dummy pstate"
+
+[why]
+Existing support in DC for pstate only accounts for a single latency. This is sufficient when the
+variance of latency is small, or that pstate support isn't necessary for correct ASIC functionality.
+
+Newer ASICs violate both existing assumptions. PState support is mandatory of correct ASIC
+functionality, but not all latencies have to be supported. Existing code supports a "full p state" which
+allows memory clock to change, but is hard for DCN to support (as it requires very large buffers).
+New code will now fall back to a "dummy p state" support when "full p state" cannot be support.
+This easy p state support should always be allowed.
+
+[how]
+Define a new latency in socBB. Add fallback logic to support it. Note DML is also updated to ensure
+that fallback will always work.
+
+Change-Id: I068e34cae8fa2f7cd31b530a68822c479525048c
+Signed-off-by: Jun Lei <Jun.Lei@amd.com>
+Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com>
+Acked-by: Leo Li <sunpeng.li@amd.com>
+---
+ .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 2 +
+ drivers/gpu/drm/amd/display/dc/dc.h | 7 +
+ .../drm/amd/display/dc/dcn20/dcn20_hubbub.c | 11 +
+ .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 10 +-
+ .../drm/amd/display/dc/dcn20/dcn20_resource.c | 59 +-
+ drivers/gpu/drm/amd/display/dc/dml/Makefile | 3 +
+ .../dc/dml/dcn20/display_mode_vba_20v2.c | 5109 +++++++++++++++++
+ .../dc/dml/dcn20/display_mode_vba_20v2.h | 32 +
+ .../dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 1701 ++++++
+ .../dc/dml/dcn20/display_rq_dlg_calc_20v2.h | 74 +
+ .../drm/amd/display/dc/dml/display_mode_lib.c | 12 +
+ .../drm/amd/display/dc/dml/display_mode_lib.h | 1 +
+ .../amd/display/dc/dml/display_mode_structs.h | 1 +
+ .../drm/amd/display/dc/dml/display_mode_vba.c | 8 +-
+ 14 files changed, 7022 insertions(+), 8 deletions(-)
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+ create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
+
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+index 3cff4f0518d3..7ff0396956b3 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+@@ -201,6 +201,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
+ }
+
+ if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) {
++ clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
+ clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support;
+ if (pp_smu && pp_smu->set_pstate_handshake_support)
+ pp_smu->set_pstate_handshake_support(&pp_smu->pp_smu, clk_mgr_base->clks.p_state_change_support);
+@@ -308,6 +309,7 @@ void dcn2_init_clocks(struct clk_mgr *clk_mgr)
+ memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ // Assumption is that boot state always supports pstate
+ clk_mgr->clks.p_state_change_support = true;
++ clk_mgr->clks.prev_p_state_change_support = true;
+ }
+
+ void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index 24320d04f5d1..8d890468908f 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -124,6 +124,7 @@ struct dc_caps {
+ struct dc_bug_wa {
+ bool no_connect_phy_config;
+ bool dedcn20_305_wa;
++ struct display_mode_lib alternate_dml;
+ };
+ #endif
+
+@@ -266,6 +267,12 @@ struct dc_clocks {
+ int phyclk_khz;
+ int dramclk_khz;
+ bool p_state_change_support;
++
++ /*
++ * Elements below are not compared for the purposes of
++ * optimization required
++ */
++ bool prev_p_state_change_support;
+ };
+
+ struct dc_bw_validation_profile {
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+index 6e2dbd03f9bf..31d6e79ba2b8 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+@@ -26,6 +26,7 @@
+
+ #include "dcn20_hubbub.h"
+ #include "reg_helper.h"
++#include "clk_mgr.h"
+
+ #define REG(reg)\
+ hubbub1->regs->reg
+@@ -553,6 +554,16 @@ static void hubbub2_program_watermarks(
+ */
+ hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
+ hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
++
++ /*
++ * There's a special case when going from p-state support to p-state unsupported
++ * here we are going to LOWER watermarks to go to dummy p-state only, but this has
++ * to be done prepare_bandwidth, not optimize
++ */
++ if (hubbub1->base.ctx->dc->clk_mgr->clks.prev_p_state_change_support == true &&
++ hubbub1->base.ctx->dc->clk_mgr->clks.p_state_change_support == false)
++ safe_to_lower = true;
++
+ hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
+
+ REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+index e60be115691b..08a96faef775 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+@@ -1445,16 +1445,16 @@ void dcn20_prepare_bandwidth(
+ {
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
++ dc->clk_mgr->funcs->update_clocks(
++ dc->clk_mgr,
++ context,
++ false);
++
+ /* program dchubbub watermarks */
+ hubbub->funcs->program_watermarks(hubbub,
+ &context->bw_ctx.bw.dcn.watermarks,
+ dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
+ false);
+-
+- dc->clk_mgr->funcs->update_clocks(
+- dc->clk_mgr,
+- context,
+- false);
+ }
+
+ void dcn20_optimize_bandwidth(
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+index 193270ba60e6..2cf788a3704e 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+@@ -2425,7 +2425,7 @@ void dcn20_calculate_dlg_params(
+ }
+ }
+
+-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
++static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context,
+ bool fast_validate)
+ {
+ bool out = false;
+@@ -2477,6 +2477,62 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
+ return out;
+ }
+
++
++bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
++ bool fast_validate)
++{
++ bool voltage_supported = false;
++ bool full_pstate_supported = false;
++ bool dummy_pstate_supported = false;
++ double p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
++
++ if (fast_validate)
++ return dcn20_validate_bandwidth_internal(dc, context, true);
++
++
++ // Best case, we support full UCLK switch latency
++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
++ full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
++
++ if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 ||
++ (voltage_supported && full_pstate_supported)) {
++ context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
++ goto restore_dml_state;
++ }
++
++ // Fallback #1: Try to only support G6 temperature read latency
++ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
++
++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
++ dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
++
++ if (voltage_supported && dummy_pstate_supported) {
++ context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
++ goto restore_dml_state;
++ }
++
++ // Fallback #2: Retry with "new" DCN20 to support G6 temperature read latency
++ memcpy (&context->bw_ctx.dml, &dc->work_arounds.alternate_dml, sizeof (struct display_mode_lib));
++ context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
++
++ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
++ dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
++
++ if (voltage_supported && dummy_pstate_supported) {
++ context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
++ goto restore_dml_state;
++ }
++
++ // ERROR: fallback #2 is supposed to always work.
++ ASSERT(false);
++
++restore_dml_state:
++ memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
++ context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
++
++ return voltage_supported;
++}
++
+ struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer(
+ struct dc_state *state,
+ const struct resource_pool *pool,
+@@ -3073,6 +3129,7 @@ static bool construct(
+ }
+
+ dml_init_instance(&dc->dml, &dcn2_0_soc, &dcn2_0_ip, DML_PROJECT_NAVI10);
++ dml_init_instance(&dc->work_arounds.alternate_dml, &dcn2_0_soc, &dcn2_0_ip, DML_PROJECT_NAVI10v2);
+
+ if (!dc->debug.disable_pplib_wm_range) {
+ struct pp_smu_wm_range_sets ranges = {0};
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+index 0bb7a20675c4..1735fc1e2eb1 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+@@ -38,6 +38,8 @@ ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ CFLAGS_display_mode_vba.o := $(dml_ccflags)
+ CFLAGS_display_mode_vba_20.o := $(dml_ccflags)
+ CFLAGS_display_rq_dlg_calc_20.o := $(dml_ccflags)
++CFLAGS_display_mode_vba_20v2.o := $(dml_ccflags)
++CFLAGS_display_rq_dlg_calc_20v2.o := $(dml_ccflags)
+ endif
+ ifdef CONFIG_DRM_AMD_DCN3AG
+ CFLAGS_display_mode_vba_3ag.o := $(dml_ccflags)
+@@ -51,6 +53,7 @@ DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \
+
+ ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
++DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
+ endif
+
+ AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML))
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+new file mode 100644
+index 000000000000..22455db54980
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+@@ -0,0 +1,5109 @@
++/*
++ * Copyright 2018 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#include "../display_mode_lib.h"
++#include "display_mode_vba_20v2.h"
++#include "../dml_inline_defs.h"
++
++/*
++ * NOTE:
++ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
++ *
++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
++ * ways. Unless there is something clearly wrong with it the code should
++ * remain as-is as it provides us with a guarantee from HW that it is correct.
++ */
++
++#define BPP_INVALID 0
++#define BPP_BLENDED_PIPE 0xffffffff
++
++static double adjust_ReturnBW(
++ struct display_mode_lib *mode_lib,
++ double ReturnBW,
++ bool DCCEnabledAnyPlane,
++ double ReturnBandwidthToDCN);
++static unsigned int dscceComputeDelay(
++ unsigned int bpc,
++ double bpp,
++ unsigned int sliceWidth,
++ unsigned int numSlices,
++ enum output_format_class pixelFormat);
++static unsigned int dscComputeDelay(enum output_format_class pixelFormat);
++static bool CalculateDelayAfterScaler(
++ struct display_mode_lib *mode_lib,
++ double ReturnBW,
++ double ReadBandwidthPlaneLuma,
++ double ReadBandwidthPlaneChroma,
++ double TotalDataReadBandwidth,
++ double DisplayPipeLineDeliveryTimeLuma,
++ double DisplayPipeLineDeliveryTimeChroma,
++ double DPPCLK,
++ double DISPCLK,
++ double PixelClock,
++ unsigned int DSCDelay,
++ unsigned int DPPPerPlane,
++ bool ScalerEnabled,
++ unsigned int NumberOfCursors,
++ double DPPCLKDelaySubtotal,
++ double DPPCLKDelaySCL,
++ double DPPCLKDelaySCLLBOnly,
++ double DPPCLKDelayCNVCFormater,
++ double DPPCLKDelayCNVCCursor,
++ double DISPCLKDelaySubtotal,
++ unsigned int ScalerRecoutWidth,
++ enum output_format_class OutputFormat,
++ unsigned int HTotal,
++ unsigned int SwathWidthSingleDPPY,
++ double BytePerPixelDETY,
++ double BytePerPixelDETC,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ bool Interlace,
++ bool ProgressiveToInterlaceUnitInOPP,
++ double *DSTXAfterScaler,
++ double *DSTYAfterScaler
++ );
++// Super monster function with some 45 argument
++static bool CalculatePrefetchSchedule(
++ struct display_mode_lib *mode_lib,
++ double DPPCLK,
++ double DISPCLK,
++ double PixelClock,
++ double DCFCLKDeepSleep,
++ unsigned int DPPPerPlane,
++ unsigned int NumberOfCursors,
++ unsigned int VBlank,
++ unsigned int HTotal,
++ unsigned int MaxInterDCNTileRepeaters,
++ unsigned int VStartup,
++ unsigned int PageTableLevels,
++ bool GPUVMEnable,
++ bool DynamicMetadataEnable,
++ unsigned int DynamicMetadataLinesBeforeActiveRequired,
++ unsigned int DynamicMetadataTransmittedBytes,
++ bool DCCEnable,
++ double UrgentLatencyPixelDataOnly,
++ double UrgentExtraLatency,
++ double TCalc,
++ unsigned int PDEAndMetaPTEBytesFrame,
++ unsigned int MetaRowByte,
++ unsigned int PixelPTEBytesPerRow,
++ double PrefetchSourceLinesY,
++ unsigned int SwathWidthY,
++ double BytePerPixelDETY,
++ double VInitPreFillY,
++ unsigned int MaxNumSwathY,
++ double PrefetchSourceLinesC,
++ double BytePerPixelDETC,
++ double VInitPreFillC,
++ unsigned int MaxNumSwathC,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ double TWait,
++ bool XFCEnabled,
++ double XFCRemoteSurfaceFlipDelay,
++ bool InterlaceEnable,
++ bool ProgressiveToInterlaceUnitInOPP,
++ double DSTXAfterScaler,
++ double DSTYAfterScaler,
++ double *DestinationLinesForPrefetch,
++ double *PrefetchBandwidth,
++ double *DestinationLinesToRequestVMInVBlank,
++ double *DestinationLinesToRequestRowInVBlank,
++ double *VRatioPrefetchY,
++ double *VRatioPrefetchC,
++ double *RequiredPrefetchPixDataBW,
++ double *Tno_bw,
++ unsigned int *VUpdateOffsetPix,
++ double *VUpdateWidthPix,
++ double *VReadyOffsetPix);
++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
++static double CalculatePrefetchSourceLines(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double vtaps,
++ bool Interlace,
++ bool ProgressiveToInterlaceUnitInOPP,
++ unsigned int SwathHeight,
++ unsigned int ViewportYStart,
++ double *VInitPreFill,
++ unsigned int *MaxNumSwath);
++static unsigned int CalculateVMAndRowBytes(
++ struct display_mode_lib *mode_lib,
++ bool DCCEnable,
++ unsigned int BlockHeight256Bytes,
++ unsigned int BlockWidth256Bytes,
++ enum source_format_class SourcePixelFormat,
++ unsigned int SurfaceTiling,
++ unsigned int BytePerPixel,
++ enum scan_direction_class ScanDirection,
++ unsigned int ViewportWidth,
++ unsigned int ViewportHeight,
++ unsigned int SwathWidthY,
++ bool GPUVMEnable,
++ unsigned int VMMPageSize,
++ unsigned int PTEBufferSizeInRequestsLuma,
++ unsigned int PDEProcessingBufIn64KBReqs,
++ unsigned int Pitch,
++ unsigned int DCCMetaPitch,
++ unsigned int *MacroTileWidth,
++ unsigned int *MetaRowByte,
++ unsigned int *PixelPTEBytesPerRow,
++ bool *PTEBufferSizeNotExceeded,
++ unsigned int *dpte_row_height,
++ unsigned int *meta_row_height);
++static double CalculateTWait(
++ unsigned int PrefetchMode,
++ double DRAMClockChangeLatency,
++ double UrgentLatencyPixelDataOnly,
++ double SREnterPlusExitTime);
++static double CalculateRemoteSurfaceFlipDelay(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double SwathWidth,
++ double Bpp,
++ double LineTime,
++ double XFCTSlvVupdateOffset,
++ double XFCTSlvVupdateWidth,
++ double XFCTSlvVreadyOffset,
++ double XFCXBUFLatencyTolerance,
++ double XFCFillBWOverhead,
++ double XFCSlvChunkSize,
++ double XFCBusTransportTime,
++ double TCalc,
++ double TWait,
++ double *SrcActiveDrainRate,
++ double *TInitXFill,
++ double *TslvChk);
++static void CalculateActiveRowBandwidth(
++ bool GPUVMEnable,
++ enum source_format_class SourcePixelFormat,
++ double VRatio,
++ bool DCCEnable,
++ double LineTime,
++ unsigned int MetaRowByteLuma,
++ unsigned int MetaRowByteChroma,
++ unsigned int meta_row_height_luma,
++ unsigned int meta_row_height_chroma,
++ unsigned int PixelPTEBytesPerRowLuma,
++ unsigned int PixelPTEBytesPerRowChroma,
++ unsigned int dpte_row_height_luma,
++ unsigned int dpte_row_height_chroma,
++ double *meta_row_bw,
++ double *dpte_row_bw,
++ double *qual_row_bw);
++static void CalculateFlipSchedule(
++ struct display_mode_lib *mode_lib,
++ double UrgentExtraLatency,
++ double UrgentLatencyPixelDataOnly,
++ unsigned int GPUVMMaxPageTableLevels,
++ bool GPUVMEnable,
++ double BandwidthAvailableForImmediateFlip,
++ unsigned int TotImmediateFlipBytes,
++ enum source_format_class SourcePixelFormat,
++ unsigned int ImmediateFlipBytes,
++ double LineTime,
++ double VRatio,
++ double Tno_bw,
++ double PDEAndMetaPTEBytesFrame,
++ unsigned int MetaRowByte,
++ unsigned int PixelPTEBytesPerRow,
++ bool DCCEnable,
++ unsigned int dpte_row_height,
++ unsigned int meta_row_height,
++ double qual_row_bw,
++ double *DestinationLinesToRequestVMInImmediateFlip,
++ double *DestinationLinesToRequestRowInImmediateFlip,
++ double *final_flip_bw,
++ bool *ImmediateFlipSupportedForPipe);
++static double CalculateWriteBackDelay(
++ enum source_format_class WritebackPixelFormat,
++ double WritebackHRatio,
++ double WritebackVRatio,
++ unsigned int WritebackLumaHTaps,
++ unsigned int WritebackLumaVTaps,
++ unsigned int WritebackChromaHTaps,
++ unsigned int WritebackChromaVTaps,
++ unsigned int WritebackDestinationWidth);
++
++static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
++static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
++ struct display_mode_lib *mode_lib);
++
++void dml20v2_recalculate(struct display_mode_lib *mode_lib)
++{
++ ModeSupportAndSystemConfiguration(mode_lib);
++ mode_lib->vba.FabricAndDRAMBandwidth = dml_min(
++ mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth,
++ mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0;
++ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
++ dml20v2_DisplayPipeConfiguration(mode_lib);
++ dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
++}
++
++static double adjust_ReturnBW(
++ struct display_mode_lib *mode_lib,
++ double ReturnBW,
++ bool DCCEnabledAnyPlane,
++ double ReturnBandwidthToDCN)
++{
++ double CriticalCompression;
++
++ if (DCCEnabledAnyPlane
++ && ReturnBandwidthToDCN
++ > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0)
++ ReturnBW =
++ dml_min(
++ ReturnBW,
++ ReturnBandwidthToDCN * 4
++ * (1.0
++ - mode_lib->vba.UrgentLatencyPixelDataOnly
++ / ((mode_lib->vba.ROBBufferSizeInKByte
++ - mode_lib->vba.PixelChunkSizeInKByte)
++ * 1024
++ / ReturnBandwidthToDCN
++ - mode_lib->vba.DCFCLK
++ * mode_lib->vba.ReturnBusWidth
++ / 4)
++ + mode_lib->vba.UrgentLatencyPixelDataOnly));
++
++ CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK
++ * mode_lib->vba.UrgentLatencyPixelDataOnly
++ / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly
++ + (mode_lib->vba.ROBBufferSizeInKByte
++ - mode_lib->vba.PixelChunkSizeInKByte)
++ * 1024);
++
++ if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0)
++ ReturnBW =
++ dml_min(
++ ReturnBW,
++ 4.0 * ReturnBandwidthToDCN
++ * (mode_lib->vba.ROBBufferSizeInKByte
++ - mode_lib->vba.PixelChunkSizeInKByte)
++ * 1024
++ * mode_lib->vba.ReturnBusWidth
++ * mode_lib->vba.DCFCLK
++ * mode_lib->vba.UrgentLatencyPixelDataOnly
++ / dml_pow(
++ (ReturnBandwidthToDCN
++ * mode_lib->vba.UrgentLatencyPixelDataOnly
++ + (mode_lib->vba.ROBBufferSizeInKByte
++ - mode_lib->vba.PixelChunkSizeInKByte)
++ * 1024),
++ 2));
++
++ return ReturnBW;
++}
++
++static unsigned int dscceComputeDelay(
++ unsigned int bpc,
++ double bpp,
++ unsigned int sliceWidth,
++ unsigned int numSlices,
++ enum output_format_class pixelFormat)
++{
++ // valid bpc = source bits per component in the set of {8, 10, 12}
++ // valid bpp = increments of 1/16 of a bit
++ // min = 6/7/8 in N420/N422/444, respectively
++ // max = such that compression is 1:1
++ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
++ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
++ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
++
++ // fixed value
++ unsigned int rcModelSize = 8192;
++
++ // N422/N420 operate at 2 pixels per clock
++ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l,
++ Delay, pixels;
++
++ if (pixelFormat == dm_n422 || pixelFormat == dm_420)
++ pixelsPerClock = 2;
++ // #all other modes operate at 1 pixel per clock
++ else
++ pixelsPerClock = 1;
++
++ //initial transmit delay as per PPS
++ initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock);
++
++ //compute ssm delay
++ if (bpc == 8)
++ D = 81;
++ else if (bpc == 10)
++ D = 89;
++ else
++ D = 113;
++
++ //divide by pixel per cycle to compute slice width as seen by DSC
++ w = sliceWidth / pixelsPerClock;
++
++ //422 mode has an additional cycle of delay
++ if (pixelFormat == dm_s422)
++ s = 1;
++ else
++ s = 0;
++
++ //main calculation for the dscce
++ ix = initalXmitDelay + 45;
++ wx = (w + 2) / 3;
++ p = 3 * wx - w;
++ l0 = ix / w;
++ a = ix + p * l0;
++ ax = (a + 2) / 3 + D + 6 + 1;
++ l = (ax + wx - 1) / wx;
++ if ((ix % w) == 0 && p != 0)
++ lstall = 1;
++ else
++ lstall = 0;
++ Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22;
++
++ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
++ pixels = Delay * 3 * pixelsPerClock;
++ return pixels;
++}
++
++static unsigned int dscComputeDelay(enum output_format_class pixelFormat)
++{
++ unsigned int Delay = 0;
++
++ if (pixelFormat == dm_420) {
++ // sfr
++ Delay = Delay + 2;
++ // dsccif
++ Delay = Delay + 0;
++ // dscc - input deserializer
++ Delay = Delay + 3;
++ // dscc gets pixels every other cycle
++ Delay = Delay + 2;
++ // dscc - input cdc fifo
++ Delay = Delay + 12;
++ // dscc gets pixels every other cycle
++ Delay = Delay + 13;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output cdc fifo
++ Delay = Delay + 7;
++ // dscc gets pixels every other cycle
++ Delay = Delay + 3;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output serializer
++ Delay = Delay + 1;
++ // sft
++ Delay = Delay + 1;
++ } else if (pixelFormat == dm_n422) {
++ // sfr
++ Delay = Delay + 2;
++ // dsccif
++ Delay = Delay + 1;
++ // dscc - input deserializer
++ Delay = Delay + 5;
++ // dscc - input cdc fifo
++ Delay = Delay + 25;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output cdc fifo
++ Delay = Delay + 10;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output serializer
++ Delay = Delay + 1;
++ // sft
++ Delay = Delay + 1;
++ } else {
++ // sfr
++ Delay = Delay + 2;
++ // dsccif
++ Delay = Delay + 0;
++ // dscc - input deserializer
++ Delay = Delay + 3;
++ // dscc - input cdc fifo
++ Delay = Delay + 12;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // dscc - output cdc fifo
++ Delay = Delay + 7;
++ // dscc - output serializer
++ Delay = Delay + 1;
++ // dscc - cdc uncertainty
++ Delay = Delay + 2;
++ // sft
++ Delay = Delay + 1;
++ }
++
++ return Delay;
++}
++
++static bool CalculateDelayAfterScaler(
++ struct display_mode_lib *mode_lib,
++ double ReturnBW,
++ double ReadBandwidthPlaneLuma,
++ double ReadBandwidthPlaneChroma,
++ double TotalDataReadBandwidth,
++ double DisplayPipeLineDeliveryTimeLuma,
++ double DisplayPipeLineDeliveryTimeChroma,
++ double DPPCLK,
++ double DISPCLK,
++ double PixelClock,
++ unsigned int DSCDelay,
++ unsigned int DPPPerPlane,
++ bool ScalerEnabled,
++ unsigned int NumberOfCursors,
++ double DPPCLKDelaySubtotal,
++ double DPPCLKDelaySCL,
++ double DPPCLKDelaySCLLBOnly,
++ double DPPCLKDelayCNVCFormater,
++ double DPPCLKDelayCNVCCursor,
++ double DISPCLKDelaySubtotal,
++ unsigned int ScalerRecoutWidth,
++ enum output_format_class OutputFormat,
++ unsigned int HTotal,
++ unsigned int SwathWidthSingleDPPY,
++ double BytePerPixelDETY,
++ double BytePerPixelDETC,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ bool Interlace,
++ bool ProgressiveToInterlaceUnitInOPP,
++ double *DSTXAfterScaler,
++ double *DSTYAfterScaler
++ )
++{
++ unsigned int DPPCycles, DISPCLKCycles;
++ double DataFabricLineDeliveryTimeLuma;
++ double DataFabricLineDeliveryTimeChroma;
++ double DSTTotalPixelsAfterScaler;
++
++ DataFabricLineDeliveryTimeLuma = SwathWidthSingleDPPY * SwathHeightY * dml_ceil(BytePerPixelDETY, 1) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneLuma / TotalDataReadBandwidth);
++ mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeLuma - DisplayPipeLineDeliveryTimeLuma);
++
++ if (BytePerPixelDETC != 0) {
++ DataFabricLineDeliveryTimeChroma = SwathWidthSingleDPPY / 2 * SwathHeightC * dml_ceil(BytePerPixelDETC, 2) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneChroma / TotalDataReadBandwidth);
++ mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeChroma - DisplayPipeLineDeliveryTimeChroma);
++ }
++
++ if (ScalerEnabled)
++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL;
++ else
++ DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly;
++
++ DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor;
++
++ DISPCLKCycles = DISPCLKDelaySubtotal;
++
++ if (DPPCLK == 0.0 || DISPCLK == 0.0)
++ return true;
++
++ *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK
++ + DSCDelay;
++
++ if (DPPPerPlane > 1)
++ *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth;
++
++ if (OutputFormat == dm_420 || (Interlace && ProgressiveToInterlaceUnitInOPP))
++ *DSTYAfterScaler = 1;
++ else
++ *DSTYAfterScaler = 0;
++
++ DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler;
++ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1);
++ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal));
++
++ return true;
++}
++
++static bool CalculatePrefetchSchedule(
++ struct display_mode_lib *mode_lib,
++ double DPPCLK,
++ double DISPCLK,
++ double PixelClock,
++ double DCFCLKDeepSleep,
++ unsigned int DPPPerPlane,
++ unsigned int NumberOfCursors,
++ unsigned int VBlank,
++ unsigned int HTotal,
++ unsigned int MaxInterDCNTileRepeaters,
++ unsigned int VStartup,
++ unsigned int PageTableLevels,
++ bool GPUVMEnable,
++ bool DynamicMetadataEnable,
++ unsigned int DynamicMetadataLinesBeforeActiveRequired,
++ unsigned int DynamicMetadataTransmittedBytes,
++ bool DCCEnable,
++ double UrgentLatencyPixelDataOnly,
++ double UrgentExtraLatency,
++ double TCalc,
++ unsigned int PDEAndMetaPTEBytesFrame,
++ unsigned int MetaRowByte,
++ unsigned int PixelPTEBytesPerRow,
++ double PrefetchSourceLinesY,
++ unsigned int SwathWidthY,
++ double BytePerPixelDETY,
++ double VInitPreFillY,
++ unsigned int MaxNumSwathY,
++ double PrefetchSourceLinesC,
++ double BytePerPixelDETC,
++ double VInitPreFillC,
++ unsigned int MaxNumSwathC,
++ unsigned int SwathHeightY,
++ unsigned int SwathHeightC,
++ double TWait,
++ bool XFCEnabled,
++ double XFCRemoteSurfaceFlipDelay,
++ bool InterlaceEnable,
++ bool ProgressiveToInterlaceUnitInOPP,
++ double DSTXAfterScaler,
++ double DSTYAfterScaler,
++ double *DestinationLinesForPrefetch,
++ double *PrefetchBandwidth,
++ double *DestinationLinesToRequestVMInVBlank,
++ double *DestinationLinesToRequestRowInVBlank,
++ double *VRatioPrefetchY,
++ double *VRatioPrefetchC,
++ double *RequiredPrefetchPixDataBW,
++ double *Tno_bw,
++ unsigned int *VUpdateOffsetPix,
++ double *VUpdateWidthPix,
++ double *VReadyOffsetPix)
++{
++ bool MyError = false;
++ double TotalRepeaterDelayTime;
++ double Tdm, LineTime, Tsetup;
++ double dst_y_prefetch_equ;
++ double Tsw_oto;
++ double prefetch_bw_oto;
++ double Tvm_oto;
++ double Tr0_oto;
++ double Tpre_oto;
++ double dst_y_prefetch_oto;
++ double TimeForFetchingMetaPTE = 0;
++ double TimeForFetchingRowInVBlank = 0;
++ double LinesToRequestPrefetchPixelData = 0;
++
++ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
++ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK);
++ *VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime)
++ * PixelClock;
++
++ *VReadyOffsetPix = dml_max(
++ 150.0 / DPPCLK,
++ TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK)
++ * PixelClock;
++
++ Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
++
++ LineTime = (double) HTotal / PixelClock;
++
++ if (DynamicMetadataEnable) {
++ double Tdmbf, Tdmec, Tdmsks;
++
++ Tdm = dml_max(0.0, UrgentExtraLatency - TCalc);
++ Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
++ Tdmec = LineTime;
++ if (DynamicMetadataLinesBeforeActiveRequired == 0)
++ Tdmsks = VBlank * LineTime / 2.0;
++ else
++ Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime;
++ if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP)
++ Tdmsks = Tdmsks / 2;
++ if (VStartup * LineTime
++ < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) {
++ MyError = true;
++ }
++ } else
++ Tdm = 0;
++
++ if (GPUVMEnable) {
++ if (PageTableLevels == 4)
++ *Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly;
++ else if (PageTableLevels == 3)
++ *Tno_bw = UrgentExtraLatency;
++ else
++ *Tno_bw = 0;
++ } else if (DCCEnable)
++ *Tno_bw = LineTime;
++ else
++ *Tno_bw = LineTime / 4;
++
++ dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime
++ - (Tsetup + Tdm) / LineTime
++ - (DSTYAfterScaler + DSTXAfterScaler / HTotal);
++
++ Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
++
++ prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow
++ + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1)
++ + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2))
++ / Tsw_oto;
++
++ if (GPUVMEnable == true) {
++ Tvm_oto =
++ dml_max(
++ *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto,
++ dml_max(
++ UrgentExtraLatency
++ + UrgentLatencyPixelDataOnly
++ * (PageTableLevels
++ - 1),
++ LineTime / 4.0));
++ } else
++ Tvm_oto = LineTime / 4.0;
++
++ if ((GPUVMEnable == true || DCCEnable == true)) {
++ Tr0_oto = dml_max(
++ (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto,
++ dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4)));
++ } else
++ Tr0_oto = LineTime - Tvm_oto;
++
++ Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto;
++
++ dst_y_prefetch_oto = Tpre_oto / LineTime;
++
++ if (dst_y_prefetch_oto < dst_y_prefetch_equ)
++ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
++ else
++ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
++
++ *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1)
++ / 4;
++
++ dml_print("DML: VStartup: %d\n", VStartup);
++ dml_print("DML: TCalc: %f\n", TCalc);
++ dml_print("DML: TWait: %f\n", TWait);
++ dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay);
++ dml_print("DML: LineTime: %f\n", LineTime);
++ dml_print("DML: Tsetup: %f\n", Tsetup);
++ dml_print("DML: Tdm: %f\n", Tdm);
++ dml_print("DML: DSTYAfterScaler: %f\n", DSTYAfterScaler);
++ dml_print("DML: DSTXAfterScaler: %f\n", DSTXAfterScaler);
++ dml_print("DML: HTotal: %d\n", HTotal);
++
++ *PrefetchBandwidth = 0;
++ *DestinationLinesToRequestVMInVBlank = 0;
++ *DestinationLinesToRequestRowInVBlank = 0;
++ *VRatioPrefetchY = 0;
++ *VRatioPrefetchC = 0;
++ *RequiredPrefetchPixDataBW = 0;
++ if (*DestinationLinesForPrefetch > 1) {
++ *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte
++ + 2 * PixelPTEBytesPerRow
++ + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1)
++ + PrefetchSourceLinesC * SwathWidthY / 2
++ * dml_ceil(BytePerPixelDETC, 2))
++ / (*DestinationLinesForPrefetch * LineTime - *Tno_bw);
++ if (GPUVMEnable) {
++ TimeForFetchingMetaPTE =
++ dml_max(
++ *Tno_bw
++ + (double) PDEAndMetaPTEBytesFrame
++ / *PrefetchBandwidth,
++ dml_max(
++ UrgentExtraLatency
++ + UrgentLatencyPixelDataOnly
++ * (PageTableLevels
++ - 1),
++ LineTime / 4));
++ } else {
++ if (NumberOfCursors > 0 || XFCEnabled)
++ TimeForFetchingMetaPTE = LineTime / 4;
++ else
++ TimeForFetchingMetaPTE = 0.0;
++ }
++
++ if ((GPUVMEnable == true || DCCEnable == true)) {
++ TimeForFetchingRowInVBlank =
++ dml_max(
++ (MetaRowByte + PixelPTEBytesPerRow)
++ / *PrefetchBandwidth,
++ dml_max(
++ UrgentLatencyPixelDataOnly,
++ dml_max(
++ LineTime
++ - TimeForFetchingMetaPTE,
++ LineTime
++ / 4.0)));
++ } else {
++ if (NumberOfCursors > 0 || XFCEnabled)
++ TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE;
++ else
++ TimeForFetchingRowInVBlank = 0.0;
++ }
++
++ *DestinationLinesToRequestVMInVBlank = dml_floor(
++ 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125),
++ 1) / 4.0;
++
++ *DestinationLinesToRequestRowInVBlank = dml_floor(
++ 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125),
++ 1) / 4.0;
++
++ LinesToRequestPrefetchPixelData =
++ *DestinationLinesForPrefetch
++ - ((NumberOfCursors > 0 || GPUVMEnable
++ || DCCEnable) ?
++ (*DestinationLinesToRequestVMInVBlank
++ + *DestinationLinesToRequestRowInVBlank) :
++ 0.0);
++
++ if (LinesToRequestPrefetchPixelData > 0) {
++
++ *VRatioPrefetchY = (double) PrefetchSourceLinesY
++ / LinesToRequestPrefetchPixelData;
++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
++ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
++ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
++ *VRatioPrefetchY =
++ dml_max(
++ (double) PrefetchSourceLinesY
++ / LinesToRequestPrefetchPixelData,
++ (double) MaxNumSwathY
++ * SwathHeightY
++ / (LinesToRequestPrefetchPixelData
++ - (VInitPreFillY
++ - 3.0)
++ / 2.0));
++ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
++ } else {
++ MyError = true;
++ *VRatioPrefetchY = 0;
++ }
++ }
++
++ *VRatioPrefetchC = (double) PrefetchSourceLinesC
++ / LinesToRequestPrefetchPixelData;
++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
++
++ if ((SwathHeightC > 4)) {
++ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
++ *VRatioPrefetchC =
++ dml_max(
++ *VRatioPrefetchC,
++ (double) MaxNumSwathC
++ * SwathHeightC
++ / (LinesToRequestPrefetchPixelData
++ - (VInitPreFillC
++ - 3.0)
++ / 2.0));
++ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
++ } else {
++ MyError = true;
++ *VRatioPrefetchC = 0;
++ }
++ }
++
++ *RequiredPrefetchPixDataBW =
++ DPPPerPlane
++ * ((double) PrefetchSourceLinesY
++ / LinesToRequestPrefetchPixelData
++ * dml_ceil(
++ BytePerPixelDETY,
++ 1)
++ + (double) PrefetchSourceLinesC
++ / LinesToRequestPrefetchPixelData
++ * dml_ceil(
++ BytePerPixelDETC,
++ 2)
++ / 2)
++ * SwathWidthY / LineTime;
++ } else {
++ MyError = true;
++ *VRatioPrefetchY = 0;
++ *VRatioPrefetchC = 0;
++ *RequiredPrefetchPixDataBW = 0;
++ }
++
++ } else {
++ MyError = true;
++ }
++
++ if (MyError) {
++ *PrefetchBandwidth = 0;
++ TimeForFetchingMetaPTE = 0;
++ TimeForFetchingRowInVBlank = 0;
++ *DestinationLinesToRequestVMInVBlank = 0;
++ *DestinationLinesToRequestRowInVBlank = 0;
++ *DestinationLinesForPrefetch = 0;
++ LinesToRequestPrefetchPixelData = 0;
++ *VRatioPrefetchY = 0;
++ *VRatioPrefetchC = 0;
++ *RequiredPrefetchPixDataBW = 0;
++ }
++
++ return MyError;
++}
++
++static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
++{
++ return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
++}
++
++static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
++{
++ return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1);
++}
++
++static double CalculatePrefetchSourceLines(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double vtaps,
++ bool Interlace,
++ bool ProgressiveToInterlaceUnitInOPP,
++ unsigned int SwathHeight,
++ unsigned int ViewportYStart,
++ double *VInitPreFill,
++ unsigned int *MaxNumSwath)
++{
++ unsigned int MaxPartialSwath;
++
++ if (ProgressiveToInterlaceUnitInOPP)
++ *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
++ else
++ *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
++
++ if (!mode_lib->vba.IgnoreViewportPositioning) {
++
++ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
++
++ if (*VInitPreFill > 1.0)
++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
++ else
++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
++ % SwathHeight;
++ MaxPartialSwath = dml_max(1U, MaxPartialSwath);
++
++ } else {
++
++ if (ViewportYStart != 0)
++ dml_print(
++ "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
++
++ *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
++
++ if (*VInitPreFill > 1.0)
++ MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
++ else
++ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
++ % SwathHeight;
++ }
++
++ return *MaxNumSwath * SwathHeight + MaxPartialSwath;
++}
++
++static unsigned int CalculateVMAndRowBytes(
++ struct display_mode_lib *mode_lib,
++ bool DCCEnable,
++ unsigned int BlockHeight256Bytes,
++ unsigned int BlockWidth256Bytes,
++ enum source_format_class SourcePixelFormat,
++ unsigned int SurfaceTiling,
++ unsigned int BytePerPixel,
++ enum scan_direction_class ScanDirection,
++ unsigned int ViewportWidth,
++ unsigned int ViewportHeight,
++ unsigned int SwathWidth,
++ bool GPUVMEnable,
++ unsigned int VMMPageSize,
++ unsigned int PTEBufferSizeInRequestsLuma,
++ unsigned int PDEProcessingBufIn64KBReqs,
++ unsigned int Pitch,
++ unsigned int DCCMetaPitch,
++ unsigned int *MacroTileWidth,
++ unsigned int *MetaRowByte,
++ unsigned int *PixelPTEBytesPerRow,
++ bool *PTEBufferSizeNotExceeded,
++ unsigned int *dpte_row_height,
++ unsigned int *meta_row_height)
++{
++ unsigned int MetaRequestHeight;
++ unsigned int MetaRequestWidth;
++ unsigned int MetaSurfWidth;
++ unsigned int MetaSurfHeight;
++ unsigned int MPDEBytesFrame;
++ unsigned int MetaPTEBytesFrame;
++ unsigned int DCCMetaSurfaceBytes;
++
++ unsigned int MacroTileSizeBytes;
++ unsigned int MacroTileHeight;
++ unsigned int DPDE0BytesFrame;
++ unsigned int ExtraDPDEBytesFrame;
++ unsigned int PDEAndMetaPTEBytesFrame;
++
++ if (DCCEnable == true) {
++ MetaRequestHeight = 8 * BlockHeight256Bytes;
++ MetaRequestWidth = 8 * BlockWidth256Bytes;
++ if (ScanDirection == dm_horz) {
++ *meta_row_height = MetaRequestHeight;
++ MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth)
++ + MetaRequestWidth;
++ *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0;
++ } else {
++ *meta_row_height = MetaRequestWidth;
++ MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight)
++ + MetaRequestHeight;
++ *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0;
++ }
++ if (ScanDirection == dm_horz) {
++ DCCMetaSurfaceBytes = DCCMetaPitch
++ * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
++ + 64 * BlockHeight256Bytes) * BytePerPixel
++ / 256;
++ } else {
++ DCCMetaSurfaceBytes = DCCMetaPitch
++ * (dml_ceil(
++ (double) ViewportHeight - 1,
++ 64 * BlockHeight256Bytes)
++ + 64 * BlockHeight256Bytes) * BytePerPixel
++ / 256;
++ }
++ if (GPUVMEnable == true) {
++ MetaPTEBytesFrame = (dml_ceil(
++ (double) (DCCMetaSurfaceBytes - VMMPageSize)
++ / (8 * VMMPageSize),
++ 1) + 1) * 64;
++ MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
++ } else {
++ MetaPTEBytesFrame = 0;
++ MPDEBytesFrame = 0;
++ }
++ } else {
++ MetaPTEBytesFrame = 0;
++ MPDEBytesFrame = 0;
++ *MetaRowByte = 0;
++ }
++
++ if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
++ MacroTileSizeBytes = 256;
++ MacroTileHeight = BlockHeight256Bytes;
++ } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
++ || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) {
++ MacroTileSizeBytes = 4096;
++ MacroTileHeight = 4 * BlockHeight256Bytes;
++ } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t
++ || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d
++ || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x
++ || SurfaceTiling == dm_sw_64kb_r_x) {
++ MacroTileSizeBytes = 65536;
++ MacroTileHeight = 16 * BlockHeight256Bytes;
++ } else {
++ MacroTileSizeBytes = 262144;
++ MacroTileHeight = 32 * BlockHeight256Bytes;
++ }
++ *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
++
++ if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
++ if (ScanDirection == dm_horz) {
++ DPDE0BytesFrame =
++ 64
++ * (dml_ceil(
++ ((Pitch
++ * (dml_ceil(
++ ViewportHeight
++ - 1,
++ MacroTileHeight)
++ + MacroTileHeight)
++ * BytePerPixel)
++ - MacroTileSizeBytes)
++ / (8
++ * 2097152),
++ 1) + 1);
++ } else {
++ DPDE0BytesFrame =
++ 64
++ * (dml_ceil(
++ ((Pitch
++ * (dml_ceil(
++ (double) SwathWidth
++ - 1,
++ MacroTileHeight)
++ + MacroTileHeight)
++ * BytePerPixel)
++ - MacroTileSizeBytes)
++ / (8
++ * 2097152),
++ 1) + 1);
++ }
++ ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
++ } else {
++ DPDE0BytesFrame = 0;
++ ExtraDPDEBytesFrame = 0;
++ }
++
++ PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame
++ + ExtraDPDEBytesFrame;
++
++ if (GPUVMEnable == true) {
++ unsigned int PTERequestSize;
++ unsigned int PixelPTEReqHeight;
++ unsigned int PixelPTEReqWidth;
++ double FractionOfPTEReturnDrop;
++ unsigned int EffectivePDEProcessingBufIn64KBReqs;
++
++ if (SurfaceTiling == dm_sw_linear) {
++ PixelPTEReqHeight = 1;
++ PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel;
++ PTERequestSize = 64;
++ FractionOfPTEReturnDrop = 0;
++ } else if (MacroTileSizeBytes == 4096) {
++ PixelPTEReqHeight = MacroTileHeight;
++ PixelPTEReqWidth = 8 * *MacroTileWidth;
++ PTERequestSize = 64;
++ if (ScanDirection == dm_horz)
++ FractionOfPTEReturnDrop = 0;
++ else
++ FractionOfPTEReturnDrop = 7 / 8;
++ } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
++ PixelPTEReqHeight = 16 * BlockHeight256Bytes;
++ PixelPTEReqWidth = 16 * BlockWidth256Bytes;
++ PTERequestSize = 128;
++ FractionOfPTEReturnDrop = 0;
++ } else {
++ PixelPTEReqHeight = MacroTileHeight;
++ PixelPTEReqWidth = 8 * *MacroTileWidth;
++ PTERequestSize = 64;
++ FractionOfPTEReturnDrop = 0;
++ }
++
++ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)
++ EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2;
++ else
++ EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs;
++
++ if (SurfaceTiling == dm_sw_linear) {
++ *dpte_row_height =
++ dml_min(
++ 128,
++ 1
++ << (unsigned int) dml_floor(
++ dml_log2(
++ dml_min(
++ (double) PTEBufferSizeInRequestsLuma
++ * PixelPTEReqWidth,
++ EffectivePDEProcessingBufIn64KBReqs
++ * 65536.0
++ / BytePerPixel)
++ / Pitch),
++ 1));
++ *PixelPTEBytesPerRow = PTERequestSize
++ * (dml_ceil(
++ (double) (Pitch * *dpte_row_height - 1)
++ / PixelPTEReqWidth,
++ 1) + 1);
++ } else if (ScanDirection == dm_horz) {
++ *dpte_row_height = PixelPTEReqHeight;
++ *PixelPTEBytesPerRow = PTERequestSize
++ * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1)
++ + 1);
++ } else {
++ *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth);
++ *PixelPTEBytesPerRow = PTERequestSize
++ * (dml_ceil(
++ ((double) SwathWidth - 1)
++ / PixelPTEReqHeight,
++ 1) + 1);
++ }
++ if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
++ <= 64 * PTEBufferSizeInRequestsLuma) {
++ *PTEBufferSizeNotExceeded = true;
++ } else {
++ *PTEBufferSizeNotExceeded = false;
++ }
++ } else {
++ *PixelPTEBytesPerRow = 0;
++ *PTEBufferSizeNotExceeded = true;
++ }
++
++ return PDEAndMetaPTEBytesFrame;
++}
++
++static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
++ struct display_mode_lib *mode_lib)
++{
++ unsigned int j, k;
++
++ mode_lib->vba.WritebackDISPCLK = 0.0;
++ mode_lib->vba.DISPCLKWithRamping = 0;
++ mode_lib->vba.DISPCLKWithoutRamping = 0;
++ mode_lib->vba.GlobalDPPCLK = 0.0;
++
++ // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation
++ //
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.WritebackEnable[k]) {
++ mode_lib->vba.WritebackDISPCLK =
++ dml_max(
++ mode_lib->vba.WritebackDISPCLK,
++ CalculateWriteBackDISPCLK(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k],
++ mode_lib->vba.HTotal[k],
++ mode_lib->vba.WritebackChromaLineBufferWidth));
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.HRatio[k] > 1) {
++ mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / dml_ceil(
++ mode_lib->vba.htaps[k]
++ / 6.0,
++ 1));
++ } else {
++ mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma =
++ mode_lib->vba.PixelClock[k]
++ * dml_max(
++ mode_lib->vba.vtaps[k] / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]),
++ dml_max(
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k],
++ 1.0));
++
++ if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6)
++ && mode_lib->vba.DPPCLKUsingSingleDPPLuma
++ < 2 * mode_lib->vba.PixelClock[k]) {
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k];
++ }
++
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
++ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0;
++ mode_lib->vba.DPPCLKUsingSingleDPP[k] =
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma;
++ } else {
++ if (mode_lib->vba.HRatio[k] > 1) {
++ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] =
++ dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / 2
++ / dml_ceil(
++ mode_lib->vba.HTAPsChroma[k]
++ / 6.0,
++ 1.0));
++ } else {
++ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++ mode_lib->vba.DPPCLKUsingSingleDPPChroma =
++ mode_lib->vba.PixelClock[k]
++ * dml_max(
++ mode_lib->vba.VTAPsChroma[k]
++ / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]
++ / 2),
++ dml_max(
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / 4
++ / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k],
++ 1.0));
++
++ if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6)
++ && mode_lib->vba.DPPCLKUsingSingleDPPChroma
++ < 2 * mode_lib->vba.PixelClock[k]) {
++ mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2
++ * mode_lib->vba.PixelClock[k];
++ }
++
++ mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max(
++ mode_lib->vba.DPPCLKUsingSingleDPPLuma,
++ mode_lib->vba.DPPCLKUsingSingleDPPChroma);
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.BlendingAndTiming[k] != k)
++ continue;
++ if (mode_lib->vba.ODMCombineEnabled[k]) {
++ mode_lib->vba.DISPCLKWithRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.PixelClock[k] / 2
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100)
++ * (1
++ + mode_lib->vba.DISPCLKRampingMargin
++ / 100));
++ mode_lib->vba.DISPCLKWithoutRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.PixelClock[k] / 2
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100));
++ } else if (!mode_lib->vba.ODMCombineEnabled[k]) {
++ mode_lib->vba.DISPCLKWithRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.PixelClock[k]
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100)
++ * (1
++ + mode_lib->vba.DISPCLKRampingMargin
++ / 100));
++ mode_lib->vba.DISPCLKWithoutRamping =
++ dml_max(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.PixelClock[k]
++ * (1
++ + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100));
++ }
++ }
++
++ mode_lib->vba.DISPCLKWithRamping = dml_max(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.WritebackDISPCLK);
++ mode_lib->vba.DISPCLKWithoutRamping = dml_max(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.WritebackDISPCLK);
++
++ ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0);
++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
++ mode_lib->vba.DISPCLKWithRamping,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
++ mode_lib->vba.DISPCLKWithoutRamping,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
++ mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity
++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
++ mode_lib->vba.DISPCLK_calculated =
++ mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity;
++ } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity
++ > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
++ mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity;
++ } else {
++ mode_lib->vba.DISPCLK_calculated =
++ mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity;
++ }
++ DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated);
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.DPPPerPlane[k] == 0) {
++ mode_lib->vba.DPPCLK_calculated[k] = 0;
++ } else {
++ mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k]
++ / mode_lib->vba.DPPPerPlane[k]
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
++ }
++ mode_lib->vba.GlobalDPPCLK = dml_max(
++ mode_lib->vba.GlobalDPPCLK,
++ mode_lib->vba.DPPCLK_calculated[k]);
++ }
++ mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp(
++ mode_lib->vba.GlobalDPPCLK,
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255
++ * dml_ceil(
++ mode_lib->vba.DPPCLK_calculated[k] * 255
++ / mode_lib->vba.GlobalDPPCLK,
++ 1);
++ DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]);
++ }
++
++ // Urgent Watermark
++ mode_lib->vba.DCCEnabledAnyPlane = false;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
++ if (mode_lib->vba.DCCEnable[k])
++ mode_lib->vba.DCCEnabledAnyPlane = true;
++
++ mode_lib->vba.ReturnBandwidthToDCN = dml_min(
++ mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK,
++ mode_lib->vba.FabricAndDRAMBandwidth * 1000)
++ * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
++
++ mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN;
++ mode_lib->vba.ReturnBW = adjust_ReturnBW(
++ mode_lib,
++ mode_lib->vba.ReturnBW,
++ mode_lib->vba.DCCEnabledAnyPlane,
++ mode_lib->vba.ReturnBandwidthToDCN);
++
++ // Let's do this calculation again??
++ mode_lib->vba.ReturnBandwidthToDCN = dml_min(
++ mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK,
++ mode_lib->vba.FabricAndDRAMBandwidth * 1000);
++ mode_lib->vba.ReturnBW = adjust_ReturnBW(
++ mode_lib,
++ mode_lib->vba.ReturnBW,
++ mode_lib->vba.DCCEnabledAnyPlane,
++ mode_lib->vba.ReturnBandwidthToDCN);
++
++ DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK);
++ DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN);
++ DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW);
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ bool MainPlaneDoesODMCombine = false;
++
++ if (mode_lib->vba.SourceScan[k] == dm_horz)
++ mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k];
++ else
++ mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
++
++ if (mode_lib->vba.ODMCombineEnabled[k] == true)
++ MainPlaneDoesODMCombine = true;
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
++ if (mode_lib->vba.BlendingAndTiming[k] == j
++ && mode_lib->vba.ODMCombineEnabled[j] == true)
++ MainPlaneDoesODMCombine = true;
++
++ if (MainPlaneDoesODMCombine == true)
++ mode_lib->vba.SwathWidthY[k] = dml_min(
++ (double) mode_lib->vba.SwathWidthSingleDPPY[k],
++ dml_round(
++ mode_lib->vba.HActive[k] / 2.0
++ * mode_lib->vba.HRatio[k]));
++ else {
++ if (mode_lib->vba.DPPPerPlane[k] == 0) {
++ mode_lib->vba.SwathWidthY[k] = 0;
++ } else {
++ mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k]
++ / mode_lib->vba.DPPPerPlane[k];
++ }
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ mode_lib->vba.BytePerPixelDETY[k] = 8;
++ mode_lib->vba.BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
++ mode_lib->vba.BytePerPixelDETY[k] = 4;
++ mode_lib->vba.BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
++ mode_lib->vba.BytePerPixelDETY[k] = 2;
++ mode_lib->vba.BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
++ mode_lib->vba.BytePerPixelDETY[k] = 1;
++ mode_lib->vba.BytePerPixelDETC[k] = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ mode_lib->vba.BytePerPixelDETY[k] = 1;
++ mode_lib->vba.BytePerPixelDETC[k] = 2;
++ } else { // dm_420_10
++ mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0;
++ mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0;
++ }
++ }
++
++ mode_lib->vba.TotalDataReadBandwidth = 0.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k]
++ * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ * mode_lib->vba.VRatio[k];
++ mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k]
++ / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ * mode_lib->vba.VRatio[k] / 2;
++ DTRACE(
++ " read_bw[%i] = %fBps",
++ k,
++ mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]);
++ mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k];
++ }
++
++ mode_lib->vba.TotalDCCActiveDPP = 0;
++ mode_lib->vba.TotalActiveDPP = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP
++ + mode_lib->vba.DPPPerPlane[k];
++ if (mode_lib->vba.DCCEnable[k])
++ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
++ + mode_lib->vba.DPPPerPlane[k];
++ }
++
++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency =
++ (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK
++ + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly
++ * mode_lib->vba.NumberOfChannels
++ / mode_lib->vba.ReturnBW;
++
++ mode_lib->vba.LastPixelOfLineExtraWatermark = 0;
++
++ mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency
++ + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte
++ + mode_lib->vba.TotalDCCActiveDPP
++ * mode_lib->vba.MetaChunkSize) * 1024.0
++ / mode_lib->vba.ReturnBW;
++
++ if (mode_lib->vba.GPUVMEnable)
++ mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP
++ * mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW;
++
++ mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly
++ + mode_lib->vba.LastPixelOfLineExtraWatermark
++ + mode_lib->vba.UrgentExtraLatency;
++
++ DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency);
++ DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark);
++
++ mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly;
++
++ mode_lib->vba.TotalActiveWriteback = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.WritebackEnable[k])
++ mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k];
++ }
++
++ if (mode_lib->vba.TotalActiveWriteback <= 1)
++ mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency;
++ else
++ mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency
++ + mode_lib->vba.WritebackChunkSize * 1024.0 / 32
++ / mode_lib->vba.SOCCLK;
++
++ DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark);
++
++ // NB P-State/DRAM Clock Change Watermark
++ mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency
++ + mode_lib->vba.UrgentWatermark;
++
++ DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark);
++
++ DTRACE(" calculating wb pstate watermark");
++ DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback);
++ DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK);
++
++ if (mode_lib->vba.TotalActiveWriteback <= 1)
++ mode_lib->vba.WritebackDRAMClockChangeWatermark =
++ mode_lib->vba.DRAMClockChangeLatency
++ + mode_lib->vba.WritebackLatency;
++ else
++ mode_lib->vba.WritebackDRAMClockChangeWatermark =
++ mode_lib->vba.DRAMClockChangeLatency
++ + mode_lib->vba.WritebackLatency
++ + mode_lib->vba.WritebackChunkSize * 1024.0 / 32
++ / mode_lib->vba.SOCCLK;
++
++ DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark);
++
++ // Stutter Efficiency
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k]
++ / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k];
++ mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor(
++ mode_lib->vba.LinesInDETY[k],
++ mode_lib->vba.SwathHeightY[k]);
++ mode_lib->vba.FullDETBufferingTimeY[k] =
++ mode_lib->vba.LinesInDETYRoundedDownToSwath[k]
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k])
++ / mode_lib->vba.VRatio[k];
++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) {
++ mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k]
++ / mode_lib->vba.BytePerPixelDETC[k]
++ / (mode_lib->vba.SwathWidthY[k] / 2);
++ mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor(
++ mode_lib->vba.LinesInDETC[k],
++ mode_lib->vba.SwathHeightC[k]);
++ mode_lib->vba.FullDETBufferingTimeC[k] =
++ mode_lib->vba.LinesInDETCRoundedDownToSwath[k]
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k])
++ / (mode_lib->vba.VRatio[k] / 2);
++ } else {
++ mode_lib->vba.LinesInDETC[k] = 0;
++ mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0;
++ mode_lib->vba.FullDETBufferingTimeC[k] = 999999;
++ }
++ }
++
++ mode_lib->vba.MinFullDETBufferingTime = 999999.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.FullDETBufferingTimeY[k]
++ < mode_lib->vba.MinFullDETBufferingTime) {
++ mode_lib->vba.MinFullDETBufferingTime =
++ mode_lib->vba.FullDETBufferingTimeY[k];
++ mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
++ (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k];
++ }
++ if (mode_lib->vba.FullDETBufferingTimeC[k]
++ < mode_lib->vba.MinFullDETBufferingTime) {
++ mode_lib->vba.MinFullDETBufferingTime =
++ mode_lib->vba.FullDETBufferingTimeC[k];
++ mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
++ (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k];
++ }
++ }
++
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.DCCEnable[k]) {
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond =
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond
++ + mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ / mode_lib->vba.DCCRate[k]
++ / 1000
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]
++ / mode_lib->vba.DCCRate[k]
++ / 1000;
++ } else {
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond =
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond
++ + mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ / 1000
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]
++ / 1000;
++ }
++ if (mode_lib->vba.DCCEnable[k]) {
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond =
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond
++ + mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ / 1000 / 256
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]
++ / 1000 / 256;
++ }
++ if (mode_lib->vba.GPUVMEnable) {
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond =
++ mode_lib->vba.AverageReadBandwidthGBytePerSecond
++ + mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ / 1000 / 512
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]
++ / 1000 / 512;
++ }
++ }
++
++ mode_lib->vba.PartOfBurstThatFitsInROB =
++ dml_min(
++ mode_lib->vba.MinFullDETBufferingTime
++ * mode_lib->vba.TotalDataReadBandwidth,
++ mode_lib->vba.ROBBufferSizeInKByte * 1024
++ * mode_lib->vba.TotalDataReadBandwidth
++ / (mode_lib->vba.AverageReadBandwidthGBytePerSecond
++ * 1000));
++ mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB
++ * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000)
++ / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW
++ + (mode_lib->vba.MinFullDETBufferingTime
++ * mode_lib->vba.TotalDataReadBandwidth
++ - mode_lib->vba.PartOfBurstThatFitsInROB)
++ / (mode_lib->vba.DCFCLK * 64);
++ if (mode_lib->vba.TotalActiveWriteback == 0) {
++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1
++ - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime)
++ / mode_lib->vba.MinFullDETBufferingTime) * 100;
++ } else {
++ mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0;
++ }
++
++ mode_lib->vba.SmallestVBlank = 999999;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
++ mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k]
++ - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k];
++ } else {
++ mode_lib->vba.VBlankTime = 0;
++ }
++ mode_lib->vba.SmallestVBlank = dml_min(
++ mode_lib->vba.SmallestVBlank,
++ mode_lib->vba.VBlankTime);
++ }
++
++ mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100
++ * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime
++ - mode_lib->vba.SmallestVBlank)
++ + mode_lib->vba.SmallestVBlank)
++ / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100;
++
++ // dml_ml->vba.DCFCLK Deep Sleep
++ mode_lib->vba.DCFCLKDeepSleep = 8.0;
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) {
++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) {
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] =
++ dml_max(
++ 1.1 * mode_lib->vba.SwathWidthY[k]
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelDETY[k],
++ 1) / 32
++ / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k],
++ 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelDETC[k],
++ 2) / 32
++ / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]);
++ } else
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k]
++ * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0
++ / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k];
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k],
++ mode_lib->vba.PixelClock[k] / 16.0);
++ mode_lib->vba.DCFCLKDeepSleep = dml_max(
++ mode_lib->vba.DCFCLKDeepSleep,
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
++
++ DTRACE(
++ " dcfclk_deepsleep_per_plane[%i] = %fMHz",
++ k,
++ mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
++ }
++
++ DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep);
++
++ // Stutter Watermark
++ mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime
++ + mode_lib->vba.LastPixelOfLineExtraWatermark
++ + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep;
++ mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime
++ + mode_lib->vba.LastPixelOfLineExtraWatermark
++ + mode_lib->vba.UrgentExtraLatency;
++
++ DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark);
++ DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark);
++
++ // Urgent Latency Supported
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.EffectiveDETPlusLBLinesLuma =
++ dml_floor(
++ mode_lib->vba.LinesInDETY[k]
++ + dml_min(
++ mode_lib->vba.LinesInDETY[k]
++ * mode_lib->vba.DPPCLK[k]
++ * mode_lib->vba.BytePerPixelDETY[k]
++ * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k]
++ / (mode_lib->vba.ReturnBW
++ / mode_lib->vba.DPPPerPlane[k]),
++ (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma),
++ mode_lib->vba.SwathHeightY[k]);
++
++ mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma
++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ / mode_lib->vba.VRatio[k]
++ - mode_lib->vba.EffectiveDETPlusLBLinesLuma
++ * mode_lib->vba.SwathWidthY[k]
++ * mode_lib->vba.BytePerPixelDETY[k]
++ / (mode_lib->vba.ReturnBW
++ / mode_lib->vba.DPPPerPlane[k]);
++
++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) {
++ mode_lib->vba.EffectiveDETPlusLBLinesChroma =
++ dml_floor(
++ mode_lib->vba.LinesInDETC[k]
++ + dml_min(
++ mode_lib->vba.LinesInDETC[k]
++ * mode_lib->vba.DPPCLK[k]
++ * mode_lib->vba.BytePerPixelDETC[k]
++ * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k]
++ / (mode_lib->vba.ReturnBW
++ / mode_lib->vba.DPPPerPlane[k]),
++ (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma),
++ mode_lib->vba.SwathHeightC[k]);
++ mode_lib->vba.UrgentLatencySupportUsChroma =
++ mode_lib->vba.EffectiveDETPlusLBLinesChroma
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k])
++ / (mode_lib->vba.VRatio[k] / 2)
++ - mode_lib->vba.EffectiveDETPlusLBLinesChroma
++ * (mode_lib->vba.SwathWidthY[k]
++ / 2)
++ * mode_lib->vba.BytePerPixelDETC[k]
++ / (mode_lib->vba.ReturnBW
++ / mode_lib->vba.DPPPerPlane[k]);
++ mode_lib->vba.UrgentLatencySupportUs[k] = dml_min(
++ mode_lib->vba.UrgentLatencySupportUsLuma,
++ mode_lib->vba.UrgentLatencySupportUsChroma);
++ } else {
++ mode_lib->vba.UrgentLatencySupportUs[k] =
++ mode_lib->vba.UrgentLatencySupportUsLuma;
++ }
++ }
++
++ mode_lib->vba.MinUrgentLatencySupportUs = 999999;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.MinUrgentLatencySupportUs = dml_min(
++ mode_lib->vba.MinUrgentLatencySupportUs,
++ mode_lib->vba.UrgentLatencySupportUs[k]);
++ }
++
++ // Non-Urgent Latency Tolerance
++ mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs
++ - mode_lib->vba.UrgentWatermark;
++
++ // DSCCLK
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
++ mode_lib->vba.DSCCLK_calculated[k] = 0.0;
++ } else {
++ if (mode_lib->vba.OutputFormat[k] == dm_420
++ || mode_lib->vba.OutputFormat[k] == dm_n422)
++ mode_lib->vba.DSCFormatFactor = 2;
++ else
++ mode_lib->vba.DSCFormatFactor = 1;
++ if (mode_lib->vba.ODMCombineEnabled[k])
++ mode_lib->vba.DSCCLK_calculated[k] =
++ mode_lib->vba.PixelClockBackEnd[k] / 6
++ / mode_lib->vba.DSCFormatFactor
++ / (1
++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100);
++ else
++ mode_lib->vba.DSCCLK_calculated[k] =
++ mode_lib->vba.PixelClockBackEnd[k] / 3
++ / mode_lib->vba.DSCFormatFactor
++ / (1
++ - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
++ / 100);
++ }
++ }
++
++ // DSC Delay
++ // TODO
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ double bpp = mode_lib->vba.OutputBpp[k];
++ unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k];
++
++ if (mode_lib->vba.DSCEnabled[k] && bpp != 0) {
++ if (!mode_lib->vba.ODMCombineEnabled[k]) {
++ mode_lib->vba.DSCDelay[k] =
++ dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ bpp,
++ dml_ceil(
++ (double) mode_lib->vba.HActive[k]
++ / mode_lib->vba.NumberOfDSCSlices[k],
++ 1),
++ slices,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(
++ mode_lib->vba.OutputFormat[k]);
++ } else {
++ mode_lib->vba.DSCDelay[k] =
++ 2
++ * (dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ bpp,
++ dml_ceil(
++ (double) mode_lib->vba.HActive[k]
++ / mode_lib->vba.NumberOfDSCSlices[k],
++ 1),
++ slices / 2.0,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(
++ mode_lib->vba.OutputFormat[k]));
++ }
++ mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k]
++ * mode_lib->vba.PixelClock[k]
++ / mode_lib->vba.PixelClockBackEnd[k];
++ } else {
++ mode_lib->vba.DSCDelay[k] = 0;
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes
++ if (j != k && mode_lib->vba.BlendingAndTiming[k] == j
++ && mode_lib->vba.DSCEnabled[j])
++ mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j];
++
++ // Prefetch
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ unsigned int PDEAndMetaPTEBytesFrameY;
++ unsigned int PixelPTEBytesPerRowY;
++ unsigned int MetaRowByteY;
++ unsigned int MetaRowByteC;
++ unsigned int PDEAndMetaPTEBytesFrameC;
++ unsigned int PixelPTEBytesPerRowC;
++
++ Calculate256BBlockSizes(
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1),
++ dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2),
++ &mode_lib->vba.BlockHeight256BytesY[k],
++ &mode_lib->vba.BlockHeight256BytesC[k],
++ &mode_lib->vba.BlockWidth256BytesY[k],
++ &mode_lib->vba.BlockWidth256BytesC[k]);
++ PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.BlockHeight256BytesY[k],
++ mode_lib->vba.BlockWidth256BytesY[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k],
++ mode_lib->vba.ViewportHeight[k],
++ mode_lib->vba.SwathWidthY[k],
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.VMMPageSize,
++ mode_lib->vba.PTEBufferSizeInRequestsLuma,
++ mode_lib->vba.PDEProcessingBufIn64KBReqs,
++ mode_lib->vba.PitchY[k],
++ mode_lib->vba.DCCMetaPitchY[k],
++ &mode_lib->vba.MacroTileWidthY[k],
++ &MetaRowByteY,
++ &PixelPTEBytesPerRowY,
++ &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0],
++ &mode_lib->vba.dpte_row_height[k],
++ &mode_lib->vba.meta_row_height[k]);
++ mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.vtaps[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.ViewportYStartY[k],
++ &mode_lib->vba.VInitPreFillY[k],
++ &mode_lib->vba.MaxNumSwathY[k]);
++
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) {
++ PDEAndMetaPTEBytesFrameC =
++ CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.BlockHeight256BytesC[k],
++ mode_lib->vba.BlockWidth256BytesC[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(
++ mode_lib->vba.BytePerPixelDETC[k],
++ 2),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k] / 2,
++ mode_lib->vba.ViewportHeight[k] / 2,
++ mode_lib->vba.SwathWidthY[k] / 2,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.VMMPageSize,
++ mode_lib->vba.PTEBufferSizeInRequestsLuma,
++ mode_lib->vba.PDEProcessingBufIn64KBReqs,
++ mode_lib->vba.PitchC[k],
++ 0,
++ &mode_lib->vba.MacroTileWidthC[k],
++ &MetaRowByteC,
++ &PixelPTEBytesPerRowC,
++ &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0],
++ &mode_lib->vba.dpte_row_height_chroma[k],
++ &mode_lib->vba.meta_row_height_chroma[k]);
++ mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k] / 2,
++ mode_lib->vba.VTAPsChroma[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.SwathHeightC[k],
++ mode_lib->vba.ViewportYStartC[k],
++ &mode_lib->vba.VInitPreFillC[k],
++ &mode_lib->vba.MaxNumSwathC[k]);
++ } else {
++ PixelPTEBytesPerRowC = 0;
++ PDEAndMetaPTEBytesFrameC = 0;
++ MetaRowByteC = 0;
++ mode_lib->vba.MaxNumSwathC[k] = 0;
++ mode_lib->vba.PrefetchSourceLinesC[k] = 0;
++ }
++
++ mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
++ + PDEAndMetaPTEBytesFrameC;
++ mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
++
++ CalculateActiveRowBandwidth(
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ MetaRowByteY,
++ MetaRowByteC,
++ mode_lib->vba.meta_row_height[k],
++ mode_lib->vba.meta_row_height_chroma[k],
++ PixelPTEBytesPerRowY,
++ PixelPTEBytesPerRowC,
++ mode_lib->vba.dpte_row_height[k],
++ mode_lib->vba.dpte_row_height_chroma[k],
++ &mode_lib->vba.meta_row_bw[k],
++ &mode_lib->vba.dpte_row_bw[k],
++ &mode_lib->vba.qual_row_bw[k]);
++ }
++
++ mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep;
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] =
++ mode_lib->vba.WritebackLatency
++ + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k])
++ / mode_lib->vba.DISPCLK;
++ } else
++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
++ if (mode_lib->vba.BlendingAndTiming[j] == k
++ && mode_lib->vba.WritebackEnable[j] == true) {
++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] =
++ dml_max(
++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k],
++ mode_lib->vba.WritebackLatency
++ + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[j],
++ mode_lib->vba.WritebackHRatio[j],
++ mode_lib->vba.WritebackVRatio[j],
++ mode_lib->vba.WritebackLumaHTaps[j],
++ mode_lib->vba.WritebackLumaVTaps[j],
++ mode_lib->vba.WritebackChromaHTaps[j],
++ mode_lib->vba.WritebackChromaVTaps[j],
++ mode_lib->vba.WritebackDestinationWidth[j])
++ / mode_lib->vba.DISPCLK);
++ }
++ }
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
++ if (mode_lib->vba.BlendingAndTiming[k] == j)
++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] =
++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j];
++
++ mode_lib->vba.VStartupLines = 13;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.MaxVStartupLines[k] =
++ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
++ - dml_max(
++ 1.0,
++ dml_ceil(
++ mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k]
++ / (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]),
++ 1));
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
++ mode_lib->vba.MaximumMaxVStartupLines = dml_max(
++ mode_lib->vba.MaximumMaxVStartupLines,
++ mode_lib->vba.MaxVStartupLines[k]);
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.cursor_bw[k] = 0.0;
++ for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j)
++ mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j]
++ * mode_lib->vba.CursorBPP[k][j] / 8.0
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ * mode_lib->vba.VRatio[k];
++ }
++
++ do {
++ double MaxTotalRDBandwidth = 0;
++ bool DestinationLineTimesForPrefetchLessThan2 = false;
++ bool VRatioPrefetchMoreThan4 = false;
++ bool prefetch_vm_bw_valid = true;
++ bool prefetch_row_bw_valid = true;
++ double TWait = CalculateTWait(
++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.UrgentLatencyPixelDataOnly,
++ mode_lib->vba.SREnterPlusExitTime);
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.XFCEnabled[k] == true) {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay =
++ CalculateRemoteSurfaceFlipDelay(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.SwathWidthY[k],
++ dml_ceil(
++ mode_lib->vba.BytePerPixelDETY[k],
++ 1),
++ mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.XFCTSlvVupdateOffset,
++ mode_lib->vba.XFCTSlvVupdateWidth,
++ mode_lib->vba.XFCTSlvVreadyOffset,
++ mode_lib->vba.XFCXBUFLatencyTolerance,
++ mode_lib->vba.XFCFillBWOverhead,
++ mode_lib->vba.XFCSlvChunkSize,
++ mode_lib->vba.XFCBusTransportTime,
++ mode_lib->vba.TCalc,
++ TWait,
++ &mode_lib->vba.SrcActiveDrainRate,
++ &mode_lib->vba.TInitXFill,
++ &mode_lib->vba.TslvChk);
++ } else {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0;
++ }
++
++ CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBW, mode_lib->vba.ReadBandwidthPlaneLuma[k], mode_lib->vba.ReadBandwidthPlaneChroma[k], mode_lib->vba.TotalDataReadBandwidth,
++ mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k],
++ mode_lib->vba.DPPCLK[k], mode_lib->vba.DISPCLK, mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelay[k], mode_lib->vba.DPPPerPlane[k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k],
++ mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal,
++ mode_lib->vba.SwathWidthY[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k],
++ mode_lib->vba.SwathWidthSingleDPPY[k], mode_lib->vba.BytePerPixelDETY[k], mode_lib->vba.BytePerPixelDETC[k], mode_lib->vba.SwathHeightY[k], mode_lib->vba.SwathHeightC[k], mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP, &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]);
++
++ mode_lib->vba.ErrorResult[k] =
++ CalculatePrefetchSchedule(
++ mode_lib,
++ mode_lib->vba.DPPCLK[k],
++ mode_lib->vba.DISPCLK,
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.DCFCLKDeepSleep,
++ mode_lib->vba.DPPPerPlane[k],
++ mode_lib->vba.NumberOfCursors[k],
++ mode_lib->vba.VTotal[k]
++ - mode_lib->vba.VActive[k],
++ mode_lib->vba.HTotal[k],
++ mode_lib->vba.MaxInterDCNTileRepeaters,
++ dml_min(
++ mode_lib->vba.VStartupLines,
++ mode_lib->vba.MaxVStartupLines[k]),
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.DynamicMetadataEnable[k],
++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
++ mode_lib->vba.DynamicMetadataTransmittedBytes[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.UrgentLatencyPixelDataOnly,
++ mode_lib->vba.UrgentExtraLatency,
++ mode_lib->vba.TCalc,
++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k],
++ mode_lib->vba.MetaRowByte[k],
++ mode_lib->vba.PixelPTEBytesPerRow[k],
++ mode_lib->vba.PrefetchSourceLinesY[k],
++ mode_lib->vba.SwathWidthY[k],
++ mode_lib->vba.BytePerPixelDETY[k],
++ mode_lib->vba.VInitPreFillY[k],
++ mode_lib->vba.MaxNumSwathY[k],
++ mode_lib->vba.PrefetchSourceLinesC[k],
++ mode_lib->vba.BytePerPixelDETC[k],
++ mode_lib->vba.VInitPreFillC[k],
++ mode_lib->vba.MaxNumSwathC[k],
++ mode_lib->vba.SwathHeightY[k],
++ mode_lib->vba.SwathHeightC[k],
++ TWait,
++ mode_lib->vba.XFCEnabled[k],
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay,
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.DSTXAfterScaler[k],
++ mode_lib->vba.DSTYAfterScaler[k],
++ &mode_lib->vba.DestinationLinesForPrefetch[k],
++ &mode_lib->vba.PrefetchBandwidth[k],
++ &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k],
++ &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k],
++ &mode_lib->vba.VRatioPrefetchY[k],
++ &mode_lib->vba.VRatioPrefetchC[k],
++ &mode_lib->vba.RequiredPrefetchPixDataBWLuma[k],
++ &mode_lib->vba.Tno_bw[k],
++ &mode_lib->vba.VUpdateOffsetPix[k],
++ &mode_lib->vba.VUpdateWidthPix[k],
++ &mode_lib->vba.VReadyOffsetPix[k]);
++
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ mode_lib->vba.VStartup[k] = dml_min(
++ mode_lib->vba.VStartupLines,
++ mode_lib->vba.MaxVStartupLines[k]);
++ if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata
++ != 0) {
++ mode_lib->vba.VStartup[k] =
++ mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata;
++ }
++ } else {
++ mode_lib->vba.VStartup[k] =
++ dml_min(
++ mode_lib->vba.VStartupLines,
++ mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]);
++ }
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++
++ if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0)
++ mode_lib->vba.prefetch_vm_bw[k] = 0;
++ else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) {
++ mode_lib->vba.prefetch_vm_bw[k] =
++ (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k]
++ / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]);
++ } else {
++ mode_lib->vba.prefetch_vm_bw[k] = 0;
++ prefetch_vm_bw_valid = false;
++ }
++ if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k]
++ == 0)
++ mode_lib->vba.prefetch_row_bw[k] = 0;
++ else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) {
++ mode_lib->vba.prefetch_row_bw[k] =
++ (double) (mode_lib->vba.MetaRowByte[k]
++ + mode_lib->vba.PixelPTEBytesPerRow[k])
++ / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]);
++ } else {
++ mode_lib->vba.prefetch_row_bw[k] = 0;
++ prefetch_row_bw_valid = false;
++ }
++
++ MaxTotalRDBandwidth =
++ MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k]
++ + dml_max(
++ mode_lib->vba.prefetch_vm_bw[k],
++ dml_max(
++ mode_lib->vba.prefetch_row_bw[k],
++ dml_max(
++ mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k],
++ mode_lib->vba.RequiredPrefetchPixDataBWLuma[k])
++ + mode_lib->vba.meta_row_bw[k]
++ + mode_lib->vba.dpte_row_bw[k]));
++
++ if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2)
++ DestinationLineTimesForPrefetchLessThan2 = true;
++ if (mode_lib->vba.VRatioPrefetchY[k] > 4
++ || mode_lib->vba.VRatioPrefetchC[k] > 4)
++ VRatioPrefetchMoreThan4 = true;
++ }
++
++ if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid
++ && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4
++ && !DestinationLineTimesForPrefetchLessThan2)
++ mode_lib->vba.PrefetchModeSupported = true;
++ else {
++ mode_lib->vba.PrefetchModeSupported = false;
++ dml_print(
++ "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
++ }
++
++ if (mode_lib->vba.PrefetchModeSupported == true) {
++ double final_flip_bw[DC__NUM_DPP__MAX];
++ unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX];
++ double total_dcn_read_bw_with_flip = 0;
++
++ mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.BandwidthAvailableForImmediateFlip =
++ mode_lib->vba.BandwidthAvailableForImmediateFlip
++ - mode_lib->vba.cursor_bw[k]
++ - dml_max(
++ mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k]
++ + mode_lib->vba.qual_row_bw[k],
++ mode_lib->vba.PrefetchBandwidth[k]);
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ ImmediateFlipBytes[k] = 0;
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
++ ImmediateFlipBytes[k] =
++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k]
++ + mode_lib->vba.MetaRowByte[k]
++ + mode_lib->vba.PixelPTEBytesPerRow[k];
++ }
++ }
++ mode_lib->vba.TotImmediateFlipBytes = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
++ mode_lib->vba.TotImmediateFlipBytes =
++ mode_lib->vba.TotImmediateFlipBytes
++ + ImmediateFlipBytes[k];
++ }
++ }
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ CalculateFlipSchedule(
++ mode_lib,
++ mode_lib->vba.UrgentExtraLatency,
++ mode_lib->vba.UrgentLatencyPixelDataOnly,
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.BandwidthAvailableForImmediateFlip,
++ mode_lib->vba.TotImmediateFlipBytes,
++ mode_lib->vba.SourcePixelFormat[k],
++ ImmediateFlipBytes[k],
++ mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.Tno_bw[k],
++ mode_lib->vba.PDEAndMetaPTEBytesFrame[k],
++ mode_lib->vba.MetaRowByte[k],
++ mode_lib->vba.PixelPTEBytesPerRow[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.dpte_row_height[k],
++ mode_lib->vba.meta_row_height[k],
++ mode_lib->vba.qual_row_bw[k],
++ &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k],
++ &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k],
++ &final_flip_bw[k],
++ &mode_lib->vba.ImmediateFlipSupportedForPipe[k]);
++ }
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ total_dcn_read_bw_with_flip =
++ total_dcn_read_bw_with_flip
++ + mode_lib->vba.cursor_bw[k]
++ + dml_max(
++ mode_lib->vba.prefetch_vm_bw[k],
++ dml_max(
++ mode_lib->vba.prefetch_row_bw[k],
++ final_flip_bw[k]
++ + dml_max(
++ mode_lib->vba.ReadBandwidthPlaneLuma[k]
++ + mode_lib->vba.ReadBandwidthPlaneChroma[k],
++ mode_lib->vba.RequiredPrefetchPixDataBWLuma[k])));
++ }
++ mode_lib->vba.ImmediateFlipSupported = true;
++ if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) {
++ mode_lib->vba.ImmediateFlipSupported = false;
++ }
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) {
++ mode_lib->vba.ImmediateFlipSupported = false;
++ }
++ }
++ } else {
++ mode_lib->vba.ImmediateFlipSupported = false;
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.ErrorResult[k]) {
++ mode_lib->vba.PrefetchModeSupported = false;
++ dml_print(
++ "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
++ }
++ }
++
++ mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1;
++ } while (!((mode_lib->vba.PrefetchModeSupported
++ && (!mode_lib->vba.ImmediateFlipSupport
++ || mode_lib->vba.ImmediateFlipSupported))
++ || mode_lib->vba.MaximumMaxVStartupLines < mode_lib->vba.VStartupLines));
++
++ //Display Pipeline Delivery Time in Prefetch
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.VRatioPrefetchY[k] <= 1) {
++ mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
++ mode_lib->vba.SwathWidthY[k] * mode_lib->vba.DPPPerPlane[k]
++ / mode_lib->vba.HRatio[k]
++ / mode_lib->vba.PixelClock[k];
++ } else {
++ mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
++ mode_lib->vba.SwathWidthY[k]
++ / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k]
++ / mode_lib->vba.DPPCLK[k];
++ }
++ if (mode_lib->vba.BytePerPixelDETC[k] == 0) {
++ mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
++ } else {
++ if (mode_lib->vba.VRatioPrefetchC[k] <= 1) {
++ mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
++ mode_lib->vba.SwathWidthY[k]
++ * mode_lib->vba.DPPPerPlane[k]
++ / mode_lib->vba.HRatio[k]
++ / mode_lib->vba.PixelClock[k];
++ } else {
++ mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
++ mode_lib->vba.SwathWidthY[k]
++ / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k]
++ / mode_lib->vba.DPPCLK[k];
++ }
++ }
++ }
++
++ // Min TTUVBlank
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
++ mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = true;
++ mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true;
++ mode_lib->vba.MinTTUVBlank[k] = dml_max(
++ mode_lib->vba.DRAMClockChangeWatermark,
++ dml_max(
++ mode_lib->vba.StutterEnterPlusExitWatermark,
++ mode_lib->vba.UrgentWatermark));
++ } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) {
++ mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false;
++ mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true;
++ mode_lib->vba.MinTTUVBlank[k] = dml_max(
++ mode_lib->vba.StutterEnterPlusExitWatermark,
++ mode_lib->vba.UrgentWatermark);
++ } else {
++ mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false;
++ mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = false;
++ mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark;
++ }
++ if (!mode_lib->vba.DynamicMetadataEnable[k])
++ mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.TCalc
++ + mode_lib->vba.MinTTUVBlank[k];
++ }
++
++ // DCC Configuration
++ mode_lib->vba.ActiveDPPs = 0;
++ // NB P-State/DRAM Clock Change Support
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ mode_lib->vba.ActiveDPPs = mode_lib->vba.ActiveDPPs + mode_lib->vba.DPPPerPlane[k];
++ }
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ double EffectiveLBLatencyHidingY;
++ double EffectiveLBLatencyHidingC;
++ double DPPOutputBufferLinesY;
++ double DPPOutputBufferLinesC;
++ double DPPOPPBufferingY;
++ double MaxDETBufferingTimeY;
++ double ActiveDRAMClockChangeLatencyMarginY;
++
++ mode_lib->vba.LBLatencyHidingSourceLinesY =
++ dml_min(
++ mode_lib->vba.MaxLineBufferLines,
++ (unsigned int) dml_floor(
++ (double) mode_lib->vba.LineBufferSize
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.SwathWidthY[k]
++ / dml_max(
++ mode_lib->vba.HRatio[k],
++ 1.0)),
++ 1)) - (mode_lib->vba.vtaps[k] - 1);
++
++ mode_lib->vba.LBLatencyHidingSourceLinesC =
++ dml_min(
++ mode_lib->vba.MaxLineBufferLines,
++ (unsigned int) dml_floor(
++ (double) mode_lib->vba.LineBufferSize
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.SwathWidthY[k]
++ / 2.0
++ / dml_max(
++ mode_lib->vba.HRatio[k]
++ / 2,
++ 1.0)),
++ 1))
++ - (mode_lib->vba.VTAPsChroma[k] - 1);
++
++ EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY
++ / mode_lib->vba.VRatio[k]
++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]);
++
++ EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC
++ / (mode_lib->vba.VRatio[k] / 2)
++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]);
++
++ if (mode_lib->vba.SwathWidthY[k] > 2 * mode_lib->vba.DPPOutputBufferPixels) {
++ DPPOutputBufferLinesY = mode_lib->vba.DPPOutputBufferPixels
++ / mode_lib->vba.SwathWidthY[k];
++ } else if (mode_lib->vba.SwathWidthY[k] > mode_lib->vba.DPPOutputBufferPixels) {
++ DPPOutputBufferLinesY = 0.5;
++ } else {
++ DPPOutputBufferLinesY = 1;
++ }
++
++ if (mode_lib->vba.SwathWidthY[k] / 2 > 2 * mode_lib->vba.DPPOutputBufferPixels) {
++ DPPOutputBufferLinesC = mode_lib->vba.DPPOutputBufferPixels
++ / (mode_lib->vba.SwathWidthY[k] / 2);
++ } else if (mode_lib->vba.SwathWidthY[k] / 2 > mode_lib->vba.DPPOutputBufferPixels) {
++ DPPOutputBufferLinesC = 0.5;
++ } else {
++ DPPOutputBufferLinesC = 1;
++ }
++
++ DPPOPPBufferingY = (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ * (DPPOutputBufferLinesY + mode_lib->vba.OPPOutputBufferLines);
++ MaxDETBufferingTimeY = mode_lib->vba.FullDETBufferingTimeY[k]
++ + (mode_lib->vba.LinesInDETY[k]
++ - mode_lib->vba.LinesInDETYRoundedDownToSwath[k])
++ / mode_lib->vba.SwathHeightY[k]
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]);
++
++ ActiveDRAMClockChangeLatencyMarginY = DPPOPPBufferingY + EffectiveLBLatencyHidingY
++ + MaxDETBufferingTimeY - mode_lib->vba.DRAMClockChangeWatermark;
++
++ if (mode_lib->vba.ActiveDPPs > 1) {
++ ActiveDRAMClockChangeLatencyMarginY =
++ ActiveDRAMClockChangeLatencyMarginY
++ - (1 - 1 / (mode_lib->vba.ActiveDPPs - 1))
++ * mode_lib->vba.SwathHeightY[k]
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]);
++ }
++
++ if (mode_lib->vba.BytePerPixelDETC[k] > 0) {
++ double DPPOPPBufferingC = (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k])
++ * (DPPOutputBufferLinesC
++ + mode_lib->vba.OPPOutputBufferLines);
++ double MaxDETBufferingTimeC =
++ mode_lib->vba.FullDETBufferingTimeC[k]
++ + (mode_lib->vba.LinesInDETC[k]
++ - mode_lib->vba.LinesInDETCRoundedDownToSwath[k])
++ / mode_lib->vba.SwathHeightC[k]
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]);
++ double ActiveDRAMClockChangeLatencyMarginC = DPPOPPBufferingC
++ + EffectiveLBLatencyHidingC + MaxDETBufferingTimeC
++ - mode_lib->vba.DRAMClockChangeWatermark;
++
++ if (mode_lib->vba.ActiveDPPs > 1) {
++ ActiveDRAMClockChangeLatencyMarginC =
++ ActiveDRAMClockChangeLatencyMarginC
++ - (1
++ - 1
++ / (mode_lib->vba.ActiveDPPs
++ - 1))
++ * mode_lib->vba.SwathHeightC[k]
++ * (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]);
++ }
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
++ ActiveDRAMClockChangeLatencyMarginY,
++ ActiveDRAMClockChangeLatencyMarginC);
++ } else {
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] =
++ ActiveDRAMClockChangeLatencyMarginY;
++ }
++
++ if (mode_lib->vba.WritebackEnable[k]) {
++ double WritebackDRAMClockChangeLatencyMargin;
++
++ if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
++ WritebackDRAMClockChangeLatencyMargin =
++ (double) (mode_lib->vba.WritebackInterfaceLumaBufferSize
++ + mode_lib->vba.WritebackInterfaceChromaBufferSize)
++ / (mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k])
++ * 4)
++ - mode_lib->vba.WritebackDRAMClockChangeWatermark;
++ } else if (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
++ WritebackDRAMClockChangeLatencyMargin =
++ dml_min(
++ (double) mode_lib->vba.WritebackInterfaceLumaBufferSize
++ * 8.0 / 10,
++ 2.0
++ * mode_lib->vba.WritebackInterfaceChromaBufferSize
++ * 8 / 10)
++ / (mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]))
++ - mode_lib->vba.WritebackDRAMClockChangeWatermark;
++ } else {
++ WritebackDRAMClockChangeLatencyMargin =
++ dml_min(
++ (double) mode_lib->vba.WritebackInterfaceLumaBufferSize,
++ 2.0
++ * mode_lib->vba.WritebackInterfaceChromaBufferSize)
++ / (mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]))
++ - mode_lib->vba.WritebackDRAMClockChangeWatermark;
++ }
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k],
++ WritebackDRAMClockChangeLatencyMargin);
++ }
++ }
++
++ mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
++ < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
++ mode_lib->vba.MinActiveDRAMClockChangeMargin =
++ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
++ }
++ }
++
++ mode_lib->vba.MinActiveDRAMClockChangeLatencySupported =
++ mode_lib->vba.MinActiveDRAMClockChangeMargin
++ + mode_lib->vba.DRAMClockChangeLatency;
++
++ if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
++ mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
++ } else {
++ if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
++ mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) {
++ mode_lib->vba.DRAMClockChangeSupport[0][0] =
++ dm_dram_clock_change_unsupported;
++ }
++ }
++ } else {
++ mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported;
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.soc.num_states; k++)
++ for (j = 0; j < 2; j++)
++ mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0];
++
++ //XFC Parameters:
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.XFCEnabled[k] == true) {
++ double TWait;
++
++ mode_lib->vba.XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset;
++ mode_lib->vba.XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth;
++ mode_lib->vba.XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset;
++ TWait = CalculateTWait(
++ mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.UrgentLatencyPixelDataOnly,
++ mode_lib->vba.SREnterPlusExitTime);
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.SwathWidthY[k],
++ dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1),
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.XFCTSlvVupdateOffset,
++ mode_lib->vba.XFCTSlvVupdateWidth,
++ mode_lib->vba.XFCTSlvVreadyOffset,
++ mode_lib->vba.XFCXBUFLatencyTolerance,
++ mode_lib->vba.XFCFillBWOverhead,
++ mode_lib->vba.XFCSlvChunkSize,
++ mode_lib->vba.XFCBusTransportTime,
++ mode_lib->vba.TCalc,
++ TWait,
++ &mode_lib->vba.SrcActiveDrainRate,
++ &mode_lib->vba.TInitXFill,
++ &mode_lib->vba.TslvChk);
++ mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] =
++ dml_floor(
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay
++ / (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]),
++ 1);
++ mode_lib->vba.XFCTransferDelay[k] =
++ dml_ceil(
++ mode_lib->vba.XFCBusTransportTime
++ / (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]),
++ 1);
++ mode_lib->vba.XFCPrechargeDelay[k] =
++ dml_ceil(
++ (mode_lib->vba.XFCBusTransportTime
++ + mode_lib->vba.TInitXFill
++ + mode_lib->vba.TslvChk)
++ / (mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]),
++ 1);
++ mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance
++ * mode_lib->vba.SrcActiveDrainRate;
++ mode_lib->vba.FinalFillMargin =
++ (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k]
++ + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k])
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]
++ * mode_lib->vba.SrcActiveDrainRate
++ + mode_lib->vba.XFCFillConstant;
++ mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay
++ * mode_lib->vba.SrcActiveDrainRate
++ + mode_lib->vba.FinalFillMargin;
++ mode_lib->vba.RemainingFillLevel = dml_max(
++ 0.0,
++ mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel);
++ mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel
++ / (mode_lib->vba.SrcActiveDrainRate
++ * mode_lib->vba.XFCFillBWOverhead / 100);
++ mode_lib->vba.XFCPrefetchMargin[k] =
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay
++ + mode_lib->vba.TFinalxFill
++ + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k]
++ + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k])
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k];
++ } else {
++ mode_lib->vba.XFCSlaveVUpdateOffset[k] = 0;
++ mode_lib->vba.XFCSlaveVupdateWidth[k] = 0;
++ mode_lib->vba.XFCSlaveVReadyOffset[k] = 0;
++ mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = 0;
++ mode_lib->vba.XFCPrechargeDelay[k] = 0;
++ mode_lib->vba.XFCTransferDelay[k] = 0;
++ mode_lib->vba.XFCPrefetchMargin[k] = 0;
++ }
++ }
++ {
++ unsigned int VStartupMargin = 0;
++ bool FirstMainPlane = true;
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ unsigned int Margin = (mode_lib->vba.MaxVStartupLines[k] - mode_lib->vba.VStartup[k])
++ * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k];
++
++ if (FirstMainPlane) {
++ VStartupMargin = Margin;
++ FirstMainPlane = false;
++ } else
++ VStartupMargin = dml_min(VStartupMargin, Margin);
++ }
++
++ if (mode_lib->vba.UseMaximumVStartup) {
++ if (mode_lib->vba.VTotal_Max[k] == mode_lib->vba.VTotal[k]) {
++ //only use max vstart if it is not drr or lateflip.
++ mode_lib->vba.VStartup[k] = mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]];
++ }
++ }
++ }
++}
++}
++
++static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
++{
++ double BytePerPixDETY;
++ double BytePerPixDETC;
++ double Read256BytesBlockHeightY;
++ double Read256BytesBlockHeightC;
++ double Read256BytesBlockWidthY;
++ double Read256BytesBlockWidthC;
++ double MaximumSwathHeightY;
++ double MaximumSwathHeightC;
++ double MinimumSwathHeightY;
++ double MinimumSwathHeightC;
++ double SwathWidth;
++ double SwathWidthGranularityY;
++ double SwathWidthGranularityC;
++ double RoundedUpMaxSwathSizeBytesY;
++ double RoundedUpMaxSwathSizeBytesC;
++ unsigned int j, k;
++
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ bool MainPlaneDoesODMCombine = false;
++
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ BytePerPixDETY = 8;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
++ BytePerPixDETY = 4;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
++ BytePerPixDETY = 2;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
++ BytePerPixDETY = 1;
++ BytePerPixDETC = 0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ BytePerPixDETY = 1;
++ BytePerPixDETC = 2;
++ } else {
++ BytePerPixDETY = 4.0 / 3.0;
++ BytePerPixDETC = 8.0 / 3.0;
++ }
++
++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ Read256BytesBlockHeightY = 1;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ Read256BytesBlockHeightY = 4;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
++ Read256BytesBlockHeightY = 8;
++ } else {
++ Read256BytesBlockHeightY = 16;
++ }
++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
++ / Read256BytesBlockHeightY;
++ Read256BytesBlockHeightC = 0;
++ Read256BytesBlockWidthC = 0;
++ } else {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ Read256BytesBlockHeightY = 1;
++ Read256BytesBlockHeightC = 1;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ Read256BytesBlockHeightY = 16;
++ Read256BytesBlockHeightC = 8;
++ } else {
++ Read256BytesBlockHeightY = 8;
++ Read256BytesBlockHeightC = 8;
++ }
++ Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
++ / Read256BytesBlockHeightY;
++ Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2)
++ / Read256BytesBlockHeightC;
++ }
++
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ MaximumSwathHeightY = Read256BytesBlockHeightY;
++ MaximumSwathHeightC = Read256BytesBlockHeightC;
++ } else {
++ MaximumSwathHeightY = Read256BytesBlockWidthY;
++ MaximumSwathHeightC = Read256BytesBlockWidthC;
++ }
++
++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ && (mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_t
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s_x)
++ && mode_lib->vba.SourceScan[k] == dm_horz)) {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8
++ && mode_lib->vba.SourceScan[k] != dm_horz) {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ } else {
++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
++ }
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ } else {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ MinimumSwathHeightC = MaximumSwathHeightC / 2.0;
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ } else {
++ MinimumSwathHeightY = MaximumSwathHeightY;
++ MinimumSwathHeightC = MaximumSwathHeightC;
++ }
++ }
++
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ SwathWidth = mode_lib->vba.ViewportWidth[k];
++ } else {
++ SwathWidth = mode_lib->vba.ViewportHeight[k];
++ }
++
++ if (mode_lib->vba.ODMCombineEnabled[k] == true) {
++ MainPlaneDoesODMCombine = true;
++ }
++ for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
++ if (mode_lib->vba.BlendingAndTiming[k] == j
++ && mode_lib->vba.ODMCombineEnabled[j] == true) {
++ MainPlaneDoesODMCombine = true;
++ }
++ }
++
++ if (MainPlaneDoesODMCombine == true) {
++ SwathWidth = dml_min(
++ SwathWidth,
++ mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]);
++ } else {
++ if (mode_lib->vba.DPPPerPlane[k] == 0)
++ SwathWidth = 0;
++ else
++ SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k];
++ }
++
++ SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY;
++ RoundedUpMaxSwathSizeBytesY = (dml_ceil(
++ (double) (SwathWidth - 1),
++ SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY
++ * MaximumSwathHeightY;
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
++ RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256)
++ + 256;
++ }
++ if (MaximumSwathHeightC > 0) {
++ SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2)
++ / MaximumSwathHeightC;
++ RoundedUpMaxSwathSizeBytesC = (dml_ceil(
++ (double) (SwathWidth / 2.0 - 1),
++ SwathWidthGranularityC) + SwathWidthGranularityC)
++ * BytePerPixDETC * MaximumSwathHeightC;
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
++ RoundedUpMaxSwathSizeBytesC = dml_ceil(
++ RoundedUpMaxSwathSizeBytesC,
++ 256) + 256;
++ }
++ } else
++ RoundedUpMaxSwathSizeBytesC = 0.0;
++
++ if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
++ <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
++ mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
++ mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
++ } else {
++ mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY;
++ mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC;
++ }
++
++ if (mode_lib->vba.SwathHeightC[k] == 0) {
++ mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte * 1024;
++ mode_lib->vba.DETBufferSizeC[k] = 0;
++ } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) {
++ mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte
++ * 1024.0 / 2;
++ mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte
++ * 1024.0 / 2;
++ } else {
++ mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte
++ * 1024.0 * 2 / 3;
++ mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte
++ * 1024.0 / 3;
++ }
++ }
++}
++
++static double CalculateTWait(
++ unsigned int PrefetchMode,
++ double DRAMClockChangeLatency,
++ double UrgentLatencyPixelDataOnly,
++ double SREnterPlusExitTime)
++{
++ if (PrefetchMode == 0) {
++ return dml_max(
++ DRAMClockChangeLatency + UrgentLatencyPixelDataOnly,
++ dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly));
++ } else if (PrefetchMode == 1) {
++ return dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly);
++ } else {
++ return UrgentLatencyPixelDataOnly;
++ }
++}
++
++static double CalculateRemoteSurfaceFlipDelay(
++ struct display_mode_lib *mode_lib,
++ double VRatio,
++ double SwathWidth,
++ double Bpp,
++ double LineTime,
++ double XFCTSlvVupdateOffset,
++ double XFCTSlvVupdateWidth,
++ double XFCTSlvVreadyOffset,
++ double XFCXBUFLatencyTolerance,
++ double XFCFillBWOverhead,
++ double XFCSlvChunkSize,
++ double XFCBusTransportTime,
++ double TCalc,
++ double TWait,
++ double *SrcActiveDrainRate,
++ double *TInitXFill,
++ double *TslvChk)
++{
++ double TSlvSetup, AvgfillRate, result;
++
++ *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime;
++ TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset;
++ *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100);
++ AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100);
++ *TslvChk = XFCSlvChunkSize / AvgfillRate;
++ dml_print(
++ "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n",
++ *SrcActiveDrainRate);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate);
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk);
++ result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide
++ dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result);
++ return result;
++}
++
++static double CalculateWriteBackDelay(
++ enum source_format_class WritebackPixelFormat,
++ double WritebackHRatio,
++ double WritebackVRatio,
++ unsigned int WritebackLumaHTaps,
++ unsigned int WritebackLumaVTaps,
++ unsigned int WritebackChromaHTaps,
++ unsigned int WritebackChromaVTaps,
++ unsigned int WritebackDestinationWidth)
++{
++ double CalculateWriteBackDelay =
++ dml_max(
++ dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
++ WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1)
++ * dml_ceil(
++ WritebackDestinationWidth
++ / 4.0,
++ 1)
++ + dml_ceil(1.0 / WritebackVRatio, 1)
++ * (dml_ceil(
++ WritebackLumaVTaps
++ / 4.0,
++ 1) + 4));
++
++ if (WritebackPixelFormat != dm_444_32) {
++ CalculateWriteBackDelay =
++ dml_max(
++ CalculateWriteBackDelay,
++ dml_max(
++ dml_ceil(
++ WritebackChromaHTaps
++ / 2.0,
++ 1)
++ / (2
++ * WritebackHRatio),
++ WritebackChromaVTaps
++ * dml_ceil(
++ 1
++ / (2
++ * WritebackVRatio),
++ 1)
++ * dml_ceil(
++ WritebackDestinationWidth
++ / 2.0
++ / 2.0,
++ 1)
++ + dml_ceil(
++ 1
++ / (2
++ * WritebackVRatio),
++ 1)
++ * (dml_ceil(
++ WritebackChromaVTaps
++ / 4.0,
++ 1)
++ + 4)));
++ }
++ return CalculateWriteBackDelay;
++}
++
++static void CalculateActiveRowBandwidth(
++ bool GPUVMEnable,
++ enum source_format_class SourcePixelFormat,
++ double VRatio,
++ bool DCCEnable,
++ double LineTime,
++ unsigned int MetaRowByteLuma,
++ unsigned int MetaRowByteChroma,
++ unsigned int meta_row_height_luma,
++ unsigned int meta_row_height_chroma,
++ unsigned int PixelPTEBytesPerRowLuma,
++ unsigned int PixelPTEBytesPerRowChroma,
++ unsigned int dpte_row_height_luma,
++ unsigned int dpte_row_height_chroma,
++ double *meta_row_bw,
++ double *dpte_row_bw,
++ double *qual_row_bw)
++{
++ if (DCCEnable != true) {
++ *meta_row_bw = 0;
++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
++ + VRatio / 2 * MetaRowByteChroma
++ / (meta_row_height_chroma * LineTime);
++ } else {
++ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
++ }
++
++ if (GPUVMEnable != true) {
++ *dpte_row_bw = 0;
++ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
++ + VRatio / 2 * PixelPTEBytesPerRowChroma
++ / (dpte_row_height_chroma * LineTime);
++ } else {
++ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
++ }
++
++ if ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)) {
++ *qual_row_bw = *meta_row_bw + *dpte_row_bw;
++ } else {
++ *qual_row_bw = 0;
++ }
++}
++
++static void CalculateFlipSchedule(
++ struct display_mode_lib *mode_lib,
++ double UrgentExtraLatency,
++ double UrgentLatencyPixelDataOnly,
++ unsigned int GPUVMMaxPageTableLevels,
++ bool GPUVMEnable,
++ double BandwidthAvailableForImmediateFlip,
++ unsigned int TotImmediateFlipBytes,
++ enum source_format_class SourcePixelFormat,
++ unsigned int ImmediateFlipBytes,
++ double LineTime,
++ double VRatio,
++ double Tno_bw,
++ double PDEAndMetaPTEBytesFrame,
++ unsigned int MetaRowByte,
++ unsigned int PixelPTEBytesPerRow,
++ bool DCCEnable,
++ unsigned int dpte_row_height,
++ unsigned int meta_row_height,
++ double qual_row_bw,
++ double *DestinationLinesToRequestVMInImmediateFlip,
++ double *DestinationLinesToRequestRowInImmediateFlip,
++ double *final_flip_bw,
++ bool *ImmediateFlipSupportedForPipe)
++{
++ double min_row_time = 0.0;
++
++ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
++ *DestinationLinesToRequestVMInImmediateFlip = 0.0;
++ *DestinationLinesToRequestRowInImmediateFlip = 0.0;
++ *final_flip_bw = qual_row_bw;
++ *ImmediateFlipSupportedForPipe = true;
++ } else {
++ double TimeForFetchingMetaPTEImmediateFlip;
++ double TimeForFetchingRowInVBlankImmediateFlip;
++
++ if (GPUVMEnable == true) {
++ mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip
++ * ImmediateFlipBytes / TotImmediateFlipBytes;
++ TimeForFetchingMetaPTEImmediateFlip =
++ dml_max(
++ Tno_bw
++ + PDEAndMetaPTEBytesFrame
++ / mode_lib->vba.ImmediateFlipBW[0],
++ dml_max(
++ UrgentExtraLatency
++ + UrgentLatencyPixelDataOnly
++ * (GPUVMMaxPageTableLevels
++ - 1),
++ LineTime / 4.0));
++ } else {
++ TimeForFetchingMetaPTEImmediateFlip = 0;
++ }
++
++ *DestinationLinesToRequestVMInImmediateFlip = dml_floor(
++ 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime + 0.125),
++ 1) / 4.0;
++
++ if ((GPUVMEnable == true || DCCEnable == true)) {
++ mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip
++ * ImmediateFlipBytes / TotImmediateFlipBytes;
++ TimeForFetchingRowInVBlankImmediateFlip = dml_max(
++ (MetaRowByte + PixelPTEBytesPerRow)
++ / mode_lib->vba.ImmediateFlipBW[0],
++ dml_max(UrgentLatencyPixelDataOnly, LineTime / 4.0));
++ } else {
++ TimeForFetchingRowInVBlankImmediateFlip = 0;
++ }
++
++ *DestinationLinesToRequestRowInImmediateFlip = dml_floor(
++ 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime + 0.125),
++ 1) / 4.0;
++
++ if (GPUVMEnable == true) {
++ *final_flip_bw =
++ dml_max(
++ PDEAndMetaPTEBytesFrame
++ / (*DestinationLinesToRequestVMInImmediateFlip
++ * LineTime),
++ (MetaRowByte + PixelPTEBytesPerRow)
++ / (TimeForFetchingRowInVBlankImmediateFlip
++ * LineTime));
++ } else if (MetaRowByte + PixelPTEBytesPerRow > 0) {
++ *final_flip_bw = (MetaRowByte + PixelPTEBytesPerRow)
++ / (TimeForFetchingRowInVBlankImmediateFlip * LineTime);
++ } else {
++ *final_flip_bw = 0;
++ }
++
++ if (GPUVMEnable && !DCCEnable)
++ min_row_time = dpte_row_height * LineTime / VRatio;
++ else if (!GPUVMEnable && DCCEnable)
++ min_row_time = meta_row_height * LineTime / VRatio;
++ else
++ min_row_time = dml_min(dpte_row_height, meta_row_height) * LineTime
++ / VRatio;
++
++ if (*DestinationLinesToRequestVMInImmediateFlip >= 8
++ || *DestinationLinesToRequestRowInImmediateFlip >= 16
++ || TimeForFetchingMetaPTEImmediateFlip
++ + 2 * TimeForFetchingRowInVBlankImmediateFlip
++ > min_row_time)
++ *ImmediateFlipSupportedForPipe = false;
++ else
++ *ImmediateFlipSupportedForPipe = true;
++ }
++}
++
++static unsigned int TruncToValidBPP(
++ double DecimalBPP,
++ bool DSCEnabled,
++ enum output_encoder_class Output,
++ enum output_format_class Format,
++ unsigned int DSCInputBitPerComponent)
++{
++ if (Output == dm_hdmi) {
++ if (Format == dm_420) {
++ if (DecimalBPP >= 18)
++ return 18;
++ else if (DecimalBPP >= 15)
++ return 15;
++ else if (DecimalBPP >= 12)
++ return 12;
++ else
++ return BPP_INVALID;
++ } else if (Format == dm_444) {
++ if (DecimalBPP >= 36)
++ return 36;
++ else if (DecimalBPP >= 30)
++ return 30;
++ else if (DecimalBPP >= 24)
++ return 24;
++ else if (DecimalBPP >= 18)
++ return 18;
++ else
++ return BPP_INVALID;
++ } else {
++ if (DecimalBPP / 1.5 >= 24)
++ return 24;
++ else if (DecimalBPP / 1.5 >= 20)
++ return 20;
++ else if (DecimalBPP / 1.5 >= 16)
++ return 16;
++ else
++ return BPP_INVALID;
++ }
++ } else {
++ if (DSCEnabled) {
++ if (Format == dm_420) {
++ if (DecimalBPP < 6)
++ return BPP_INVALID;
++ else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16)
++ return 1.5 * DSCInputBitPerComponent - 1 / 16;
++ else
++ return dml_floor(16 * DecimalBPP, 1) / 16;
++ } else if (Format == dm_n422) {
++ if (DecimalBPP < 7)
++ return BPP_INVALID;
++ else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16)
++ return 2 * DSCInputBitPerComponent - 1 / 16;
++ else
++ return dml_floor(16 * DecimalBPP, 1) / 16;
++ } else {
++ if (DecimalBPP < 8)
++ return BPP_INVALID;
++ else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16)
++ return 3 * DSCInputBitPerComponent - 1 / 16;
++ else
++ return dml_floor(16 * DecimalBPP, 1) / 16;
++ }
++ } else if (Format == dm_420) {
++ if (DecimalBPP >= 18)
++ return 18;
++ else if (DecimalBPP >= 15)
++ return 15;
++ else if (DecimalBPP >= 12)
++ return 12;
++ else
++ return BPP_INVALID;
++ } else if (Format == dm_s422 || Format == dm_n422) {
++ if (DecimalBPP >= 24)
++ return 24;
++ else if (DecimalBPP >= 20)
++ return 20;
++ else if (DecimalBPP >= 16)
++ return 16;
++ else
++ return BPP_INVALID;
++ } else {
++ if (DecimalBPP >= 36)
++ return 36;
++ else if (DecimalBPP >= 30)
++ return 30;
++ else if (DecimalBPP >= 24)
++ return 24;
++ else if (DecimalBPP >= 18)
++ return 18;
++ else
++ return BPP_INVALID;
++ }
++ }
++}
++
++void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
++{
++ struct vba_vars_st *locals = &mode_lib->vba;
++
++ int i;
++ unsigned int j, k, m;
++
++ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
++
++ /*Scale Ratio, taps Support Check*/
++
++ mode_lib->vba.ScaleRatioAndTapsSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.ScalerEnabled[k] == false
++ && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)
++ || mode_lib->vba.HRatio[k] != 1.0
++ || mode_lib->vba.htaps[k] != 1.0
++ || mode_lib->vba.VRatio[k] != 1.0
++ || mode_lib->vba.vtaps[k] != 1.0)) {
++ mode_lib->vba.ScaleRatioAndTapsSupport = false;
++ } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0
++ || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0
++ || (mode_lib->vba.htaps[k] > 1.0
++ && (mode_lib->vba.htaps[k] % 2) == 1)
++ || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
++ || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
++ || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
++ || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
++ || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
++ && (mode_lib->vba.HRatio[k] / 2.0
++ > mode_lib->vba.HTAPsChroma[k]
++ || mode_lib->vba.VRatio[k] / 2.0
++ > mode_lib->vba.VTAPsChroma[k]))) {
++ mode_lib->vba.ScaleRatioAndTapsSupport = false;
++ }
++ }
++ /*Source Format, Pixel Format and Scan Support Check*/
++
++ mode_lib->vba.SourceFormatPixelAndScanSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
++ && mode_lib->vba.SourceScan[k] != dm_horz)
++ || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d
++ || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x)
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_64)
++ || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x
++ && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_8
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_10))
++ || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_gfx7_2d_thin_lvp)
++ && !((mode_lib->vba.SourcePixelFormat[k]
++ == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_444_32)
++ && mode_lib->vba.SourceScan[k]
++ == dm_horz
++ && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp
++ == true
++ && mode_lib->vba.DCCEnable[k]
++ == false))
++ || (mode_lib->vba.DCCEnable[k] == true
++ && (mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_linear
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_8
++ || mode_lib->vba.SourcePixelFormat[k]
++ == dm_420_10)))) {
++ mode_lib->vba.SourceFormatPixelAndScanSupport = false;
++ }
++ }
++ /*Bandwidth Support Check*/
++
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
++ locals->BytePerPixelInDETY[k] = 8.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
++ locals->BytePerPixelInDETY[k] = 4.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
++ locals->BytePerPixelInDETY[k] = 2.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
++ locals->BytePerPixelInDETY[k] = 1.0;
++ locals->BytePerPixelInDETC[k] = 0.0;
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
++ locals->BytePerPixelInDETY[k] = 1.0;
++ locals->BytePerPixelInDETC[k] = 2.0;
++ } else {
++ locals->BytePerPixelInDETY[k] = 4.0 / 3;
++ locals->BytePerPixelInDETC[k] = 8.0 / 3;
++ }
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
++ } else {
++ locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
++ locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0)
++ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0;
++ locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k];
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true
++ && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]) * 4.0;
++ } else if (mode_lib->vba.WritebackEnable[k] == true
++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]) * 3.0;
++ } else if (mode_lib->vba.WritebackEnable[k] == true) {
++ locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
++ * mode_lib->vba.WritebackDestinationHeight[k]
++ / (mode_lib->vba.WritebackSourceHeight[k]
++ * mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k]) * 1.5;
++ } else {
++ locals->WriteBandwidth[k] = 0.0;
++ }
++ }
++ mode_lib->vba.DCCEnabledInAnyPlane = false;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.DCCEnable[k] == true) {
++ mode_lib->vba.DCCEnabledInAnyPlane = true;
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->FabricAndDRAMBandwidthPerState[i] = dml_min(
++ mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
++ * mode_lib->vba.DRAMChannelWidth,
++ mode_lib->vba.FabricClockPerState[i]
++ * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000;
++ locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * locals->DCFCLKPerState[i],
++ locals->FabricAndDRAMBandwidthPerState[i] * 1000)
++ * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
++
++ locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState;
++
++ if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
++ locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
++ ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
++ / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
++ * locals->ReturnBusWidth / 4) + locals->UrgentLatency)));
++ }
++ locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] *
++ locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency
++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
++
++ if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
++ 4 * locals->ReturnBWToDCNPerState *
++ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
++ * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
++ dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency
++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2));
++ }
++
++ locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth *
++ locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000);
++
++ if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) {
++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
++ locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency /
++ ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
++ / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i]
++ * locals->ReturnBusWidth / 4) + locals->UrgentLatency)));
++ }
++ locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] *
++ locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency
++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024);
++
++ if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) {
++ locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i],
++ 4 * locals->ReturnBWToDCNPerState *
++ (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024
++ * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency /
++ dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency
++ + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2));
++ }
++ }
++ /*Writeback Latency support check*/
++
++ mode_lib->vba.WritebackLatencySupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
++ if (locals->WriteBandwidth[k]
++ > (mode_lib->vba.WritebackInterfaceLumaBufferSize
++ + mode_lib->vba.WritebackInterfaceChromaBufferSize)
++ / mode_lib->vba.WritebackLatency) {
++ mode_lib->vba.WritebackLatencySupport = false;
++ }
++ } else {
++ if (locals->WriteBandwidth[k]
++ > 1.5
++ * dml_min(
++ mode_lib->vba.WritebackInterfaceLumaBufferSize,
++ 2.0
++ * mode_lib->vba.WritebackInterfaceChromaBufferSize)
++ / mode_lib->vba.WritebackLatency) {
++ mode_lib->vba.WritebackLatencySupport = false;
++ }
++ }
++ }
++ }
++ /*Re-ordering Buffer Support Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
++ (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
++ + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i];
++ if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i]
++ > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
++ locals->ROBSupport[i] = true;
++ } else {
++ locals->ROBSupport[i] = false;
++ }
++ }
++ /*Writeback Mode Support Check*/
++
++ mode_lib->vba.TotalNumberOfActiveWriteback = 0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0)
++ mode_lib->vba.ActiveWritebacksPerPlane[k] = 1;
++ mode_lib->vba.TotalNumberOfActiveWriteback =
++ mode_lib->vba.TotalNumberOfActiveWriteback
++ + mode_lib->vba.ActiveWritebacksPerPlane[k];
++ }
++ }
++ mode_lib->vba.WritebackModeSupport = true;
++ if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) {
++ mode_lib->vba.WritebackModeSupport = false;
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true
++ && mode_lib->vba.Writeback10bpc420Supported != true
++ && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
++ mode_lib->vba.WritebackModeSupport = false;
++ }
++ }
++ /*Writeback Scale Ratio and Taps Support Check*/
++
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false
++ && (mode_lib->vba.WritebackHRatio[k] != 1.0
++ || mode_lib->vba.WritebackVRatio[k] != 1.0)) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
++ || mode_lib->vba.WritebackVRatio[k]
++ > mode_lib->vba.WritebackMaxVSCLRatio
++ || mode_lib->vba.WritebackHRatio[k]
++ < mode_lib->vba.WritebackMinHSCLRatio
++ || mode_lib->vba.WritebackVRatio[k]
++ < mode_lib->vba.WritebackMinVSCLRatio
++ || mode_lib->vba.WritebackLumaHTaps[k]
++ > mode_lib->vba.WritebackMaxHSCLTaps
++ || mode_lib->vba.WritebackLumaVTaps[k]
++ > mode_lib->vba.WritebackMaxVSCLTaps
++ || mode_lib->vba.WritebackHRatio[k]
++ > mode_lib->vba.WritebackLumaHTaps[k]
++ || mode_lib->vba.WritebackVRatio[k]
++ > mode_lib->vba.WritebackLumaVTaps[k]
++ || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0
++ && ((mode_lib->vba.WritebackLumaHTaps[k] % 2)
++ == 1))
++ || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32
++ && (mode_lib->vba.WritebackChromaHTaps[k]
++ > mode_lib->vba.WritebackMaxHSCLTaps
++ || mode_lib->vba.WritebackChromaVTaps[k]
++ > mode_lib->vba.WritebackMaxVSCLTaps
++ || 2.0
++ * mode_lib->vba.WritebackHRatio[k]
++ > mode_lib->vba.WritebackChromaHTaps[k]
++ || 2.0
++ * mode_lib->vba.WritebackVRatio[k]
++ > mode_lib->vba.WritebackChromaVTaps[k]
++ || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0
++ && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ if (mode_lib->vba.WritebackVRatio[k] < 1.0) {
++ mode_lib->vba.WritebackLumaVExtra =
++ dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0);
++ } else {
++ mode_lib->vba.WritebackLumaVExtra = -1;
++ }
++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32
++ && mode_lib->vba.WritebackLumaVTaps[k]
++ > (mode_lib->vba.WritebackLineBufferLumaBufferSize
++ + mode_lib->vba.WritebackLineBufferChromaBufferSize)
++ / 3.0
++ / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackLumaVExtra)
++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
++ && mode_lib->vba.WritebackLumaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferLumaBufferSize
++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackLumaVExtra)
++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
++ && mode_lib->vba.WritebackLumaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferLumaBufferSize
++ * 8.0 / 10.0
++ / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackLumaVExtra)) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) {
++ mode_lib->vba.WritebackChromaVExtra = 0.0;
++ } else {
++ mode_lib->vba.WritebackChromaVExtra = -1;
++ }
++ if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
++ && mode_lib->vba.WritebackChromaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferChromaBufferSize
++ * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackChromaVExtra)
++ || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
++ && mode_lib->vba.WritebackChromaVTaps[k]
++ > mode_lib->vba.WritebackLineBufferChromaBufferSize
++ * 8.0 / 10.0
++ / mode_lib->vba.WritebackDestinationWidth[k]
++ - mode_lib->vba.WritebackChromaVExtra)) {
++ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
++ }
++ }
++ }
++ /*Maximum DISPCLK/DPPCLK Support check*/
++
++ mode_lib->vba.WritebackRequiredDISPCLK = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ mode_lib->vba.WritebackRequiredDISPCLK =
++ dml_max(
++ mode_lib->vba.WritebackRequiredDISPCLK,
++ CalculateWriteBackDISPCLK(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k],
++ mode_lib->vba.HTotal[k],
++ mode_lib->vba.WritebackChromaLineBufferWidth));
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.HRatio[k] > 1.0) {
++ locals->PSCL_FACTOR[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / dml_ceil(
++ mode_lib->vba.htaps[k]
++ / 6.0,
++ 1.0));
++ } else {
++ locals->PSCL_FACTOR[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++ if (locals->BytePerPixelInDETC[k] == 0.0) {
++ locals->PSCL_FACTOR_CHROMA[k] = 0.0;
++ locals->MinDPPCLKUsingSingleDPP[k] =
++ mode_lib->vba.PixelClock[k]
++ * dml_max3(
++ mode_lib->vba.vtaps[k] / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]),
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / locals->PSCL_FACTOR[k],
++ 1.0);
++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0)
++ && locals->MinDPPCLKUsingSingleDPP[k]
++ < 2.0 * mode_lib->vba.PixelClock[k]) {
++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0
++ * mode_lib->vba.PixelClock[k];
++ }
++ } else {
++ if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) {
++ locals->PSCL_FACTOR_CHROMA[k] =
++ dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput
++ * mode_lib->vba.HRatio[k]
++ / 2.0
++ / dml_ceil(
++ mode_lib->vba.HTAPsChroma[k]
++ / 6.0,
++ 1.0));
++ } else {
++ locals->PSCL_FACTOR_CHROMA[k] = dml_min(
++ mode_lib->vba.MaxDCHUBToPSCLThroughput,
++ mode_lib->vba.MaxPSCLToLBThroughput);
++ }
++ locals->MinDPPCLKUsingSingleDPP[k] =
++ mode_lib->vba.PixelClock[k]
++ * dml_max5(
++ mode_lib->vba.vtaps[k] / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]),
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / locals->PSCL_FACTOR[k],
++ mode_lib->vba.VTAPsChroma[k]
++ / 6.0
++ * dml_min(
++ 1.0,
++ mode_lib->vba.HRatio[k]
++ / 2.0),
++ mode_lib->vba.HRatio[k]
++ * mode_lib->vba.VRatio[k]
++ / 4.0
++ / locals->PSCL_FACTOR_CHROMA[k],
++ 1.0);
++ if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0
++ || mode_lib->vba.HTAPsChroma[k] > 6.0
++ || mode_lib->vba.VTAPsChroma[k] > 6.0)
++ && locals->MinDPPCLKUsingSingleDPP[k]
++ < 2.0 * mode_lib->vba.PixelClock[k]) {
++ locals->MinDPPCLKUsingSingleDPP[k] = 2.0
++ * mode_lib->vba.PixelClock[k];
++ }
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ Calculate256BBlockSizes(
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
++ dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
++ &locals->Read256BlockHeightY[k],
++ &locals->Read256BlockHeightC[k],
++ &locals->Read256BlockWidthY[k],
++ &locals->Read256BlockWidthC[k]);
++ if (mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k];
++ locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k];
++ } else {
++ locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k];
++ locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k];
++ }
++ if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
++ || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16
++ || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
++ || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
++ && (mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_4kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_t
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_64kb_s_x
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s
++ || mode_lib->vba.SurfaceTiling[k]
++ == dm_sw_var_s_x)
++ && mode_lib->vba.SourceScan[k] == dm_horz)) {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ } else {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
++ / 2.0;
++ }
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ } else {
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
++ / 2.0;
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
++ && mode_lib->vba.SourceScan[k] == dm_horz) {
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]
++ / 2.0;
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ } else {
++ locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
++ locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
++ }
++ }
++ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
++ mode_lib->vba.MaximumSwathWidthSupport = 8192.0;
++ } else {
++ mode_lib->vba.MaximumSwathWidthSupport = 5120.0;
++ }
++ mode_lib->vba.MaximumSwathWidthInDETBuffer =
++ dml_min(
++ mode_lib->vba.MaximumSwathWidthSupport,
++ mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0
++ / (locals->BytePerPixelInDETY[k]
++ * locals->MinSwathHeightY[k]
++ + locals->BytePerPixelInDETC[k]
++ / 2.0
++ * locals->MinSwathHeightC[k]));
++ if (locals->BytePerPixelInDETC[k] == 0.0) {
++ mode_lib->vba.MaximumSwathWidthInLineBuffer =
++ mode_lib->vba.LineBufferSize
++ * dml_max(mode_lib->vba.HRatio[k], 1.0)
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.vtaps[k]
++ + dml_max(
++ dml_ceil(
++ mode_lib->vba.VRatio[k],
++ 1.0)
++ - 2,
++ 0.0));
++ } else {
++ mode_lib->vba.MaximumSwathWidthInLineBuffer =
++ dml_min(
++ mode_lib->vba.LineBufferSize
++ * dml_max(
++ mode_lib->vba.HRatio[k],
++ 1.0)
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.vtaps[k]
++ + dml_max(
++ dml_ceil(
++ mode_lib->vba.VRatio[k],
++ 1.0)
++ - 2,
++ 0.0)),
++ 2.0 * mode_lib->vba.LineBufferSize
++ * dml_max(
++ mode_lib->vba.HRatio[k]
++ / 2.0,
++ 1.0)
++ / mode_lib->vba.LBBitPerPixel[k]
++ / (mode_lib->vba.VTAPsChroma[k]
++ + dml_max(
++ dml_ceil(
++ mode_lib->vba.VRatio[k]
++ / 2.0,
++ 1.0)
++ - 2,
++ 0.0)));
++ }
++ locals->MaximumSwathWidth[k] = dml_min(
++ mode_lib->vba.MaximumSwathWidthInDETBuffer,
++ mode_lib->vba.MaximumSwathWidthInLineBuffer);
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
++ mode_lib->vba.MaxDispclk[i],
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
++ mode_lib->vba.MaxDppclk[i],
++ mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
++ locals->RequiredDISPCLK[i][j] = 0.0;
++ locals->DISPCLK_DPPCLK_Support[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine =
++ mode_lib->vba.PixelClock[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
++ * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
++ if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i]
++ && i == mode_lib->vba.soc.num_states)
++ mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k]
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++
++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
++ if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i]
++ && i == mode_lib->vba.soc.num_states)
++ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
++ locals->ODMCombineEnablePerState[i][k] = false;
++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
++ } else {
++ locals->ODMCombineEnablePerState[i][k] = true;
++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
++ }
++ if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
++ && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
++ && locals->ODMCombineEnablePerState[i][k] == false) {
++ locals->NoOfDPP[i][j][k] = 1;
++ locals->RequiredDPPCLK[i][j][k] =
++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ } else {
++ locals->NoOfDPP[i][j][k] = 2;
++ locals->RequiredDPPCLK[i][j][k] =
++ locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
++ }
++ locals->RequiredDISPCLK[i][j] = dml_max(
++ locals->RequiredDISPCLK[i][j],
++ mode_lib->vba.PlaneRequiredDISPCLK);
++ if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity)
++ || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) {
++ locals->DISPCLK_DPPCLK_Support[i][j] = false;
++ }
++ }
++ locals->TotalNumberOfActiveDPP[i][j] = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
++ if (j == 1) {
++ while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP
++ && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) {
++ double BWOfNonSplitPlaneOfMaximumBandwidth;
++ unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
++
++ BWOfNonSplitPlaneOfMaximumBandwidth = 0;
++ NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
++ for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
++ if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) {
++ BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k];
++ NumberOfNonSplitPlaneOfMaximumBandwidth = k;
++ }
++ }
++ locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
++ locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
++ locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
++ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1;
++ }
++ }
++ if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) {
++ locals->RequiredDISPCLK[i][j] = 0.0;
++ locals->DISPCLK_DPPCLK_Support[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->ODMCombineEnablePerState[i][k] = false;
++ if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
++ locals->NoOfDPP[i][j][k] = 1;
++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ } else {
++ locals->NoOfDPP[i][j][k] = 2;
++ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
++ }
++ if (i != mode_lib->vba.soc.num_states) {
++ mode_lib->vba.PlaneRequiredDISPCLK =
++ mode_lib->vba.PixelClock[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
++ * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
++ } else {
++ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k]
++ * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
++ }
++ locals->RequiredDISPCLK[i][j] = dml_max(
++ locals->RequiredDISPCLK[i][j],
++ mode_lib->vba.PlaneRequiredDISPCLK);
++ if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
++ > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
++ || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)
++ locals->DISPCLK_DPPCLK_Support[i][j] = false;
++ }
++ locals->TotalNumberOfActiveDPP[i][j] = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
++ locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
++ }
++ locals->RequiredDISPCLK[i][j] = dml_max(
++ locals->RequiredDISPCLK[i][j],
++ mode_lib->vba.WritebackRequiredDISPCLK);
++ if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity
++ < mode_lib->vba.WritebackRequiredDISPCLK) {
++ locals->DISPCLK_DPPCLK_Support[i][j] = false;
++ }
++ }
++ }
++ /*Viewport Size Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->ViewportSizeSupport[i] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->ODMCombineEnablePerState[i][k] == true) {
++ if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
++ > locals->MaximumSwathWidth[k]) {
++ locals->ViewportSizeSupport[i] = false;
++ }
++ } else {
++ if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
++ locals->ViewportSizeSupport[i] = false;
++ }
++ }
++ }
++ }
++ /*Total Available Pipes Support Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP)
++ locals->TotalAvailablePipesSupport[i][j] = true;
++ else
++ locals->TotalAvailablePipesSupport[i][j] = false;
++ }
++ }
++ /*Total Available OTG Support Check*/
++
++ mode_lib->vba.TotalNumberOfActiveOTG = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG
++ + 1.0;
++ }
++ }
++ if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) {
++ mode_lib->vba.NumberOfOTGSupport = true;
++ } else {
++ mode_lib->vba.NumberOfOTGSupport = false;
++ }
++ /*Display IO and DSC Support Check*/
++
++ mode_lib->vba.NonsupportedDSCInputBPC = false;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
++ || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
++ || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) {
++ mode_lib->vba.NonsupportedDSCInputBPC = true;
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->RequiresDSC[i][k] = 0;
++ locals->RequiresFEC[i][k] = 0;
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if (mode_lib->vba.Output[k] == dm_hdmi) {
++ locals->RequiresDSC[i][k] = 0;
++ locals->RequiresFEC[i][k] = 0;
++ locals->OutputBppPerState[i][k] = TruncToValidBPP(
++ dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24,
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ } else if (mode_lib->vba.Output[k] == dm_dp
++ || mode_lib->vba.Output[k] == dm_edp) {
++ if (mode_lib->vba.Output[k] == dm_edp) {
++ mode_lib->vba.EffectiveFECOverhead = 0.0;
++ } else {
++ mode_lib->vba.EffectiveFECOverhead =
++ mode_lib->vba.FECOverhead;
++ }
++ if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) {
++ mode_lib->vba.Outbpp = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ mode_lib->vba.OutbppDSC = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ true,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ if (mode_lib->vba.DSCEnabled[k] == true) {
++ locals->RequiresDSC[i][k] = true;
++ if (mode_lib->vba.Output[k] == dm_dp) {
++ locals->RequiresFEC[i][k] = true;
++ } else {
++ locals->RequiresFEC[i][k] = false;
++ }
++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
++ } else {
++ locals->RequiresDSC[i][k] = false;
++ locals->RequiresFEC[i][k] = false;
++ }
++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
++ }
++ if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) {
++ mode_lib->vba.Outbpp = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ mode_lib->vba.OutbppDSC = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ true,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ if (mode_lib->vba.DSCEnabled[k] == true) {
++ locals->RequiresDSC[i][k] = true;
++ if (mode_lib->vba.Output[k] == dm_dp) {
++ locals->RequiresFEC[i][k] = true;
++ } else {
++ locals->RequiresFEC[i][k] = false;
++ }
++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
++ } else {
++ locals->RequiresDSC[i][k] = false;
++ locals->RequiresFEC[i][k] = false;
++ }
++ locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
++ }
++ if (mode_lib->vba.Outbpp == BPP_INVALID
++ && mode_lib->vba.PHYCLKPerState[i]
++ >= 810.0) {
++ mode_lib->vba.Outbpp = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ false,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ mode_lib->vba.OutbppDSC = TruncToValidBPP(
++ (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0
++ * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
++ true,
++ mode_lib->vba.Output[k],
++ mode_lib->vba.OutputFormat[k],
++ mode_lib->vba.DSCInputBitPerComponent[k]);
++ if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) {
++ locals->RequiresDSC[i][k] = true;
++ if (mode_lib->vba.Output[k] == dm_dp) {
++ locals->RequiresFEC[i][k] = true;
++ } else {
++ locals->RequiresFEC[i][k] = false;
++ }
++ mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
++ } else {
++ locals->RequiresDSC[i][k] = false;
++ locals->RequiresFEC[i][k] = false;
++ }
++ locals->OutputBppPerState[i][k] =
++ mode_lib->vba.Outbpp;
++ }
++ }
++ } else {
++ locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE;
++ }
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->DIOSupport[i] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->OutputBppPerState[i][k] == BPP_INVALID
++ || (mode_lib->vba.OutputFormat[k] == dm_420
++ && mode_lib->vba.Interlace[k] == true
++ && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) {
++ locals->DIOSupport[i] = false;
++ }
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->DSCCLKRequiredMoreThanSupported[i] = false;
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if ((mode_lib->vba.Output[k] == dm_dp
++ || mode_lib->vba.Output[k] == dm_edp)) {
++ if (mode_lib->vba.OutputFormat[k] == dm_420
++ || mode_lib->vba.OutputFormat[k]
++ == dm_n422) {
++ mode_lib->vba.DSCFormatFactor = 2;
++ } else {
++ mode_lib->vba.DSCFormatFactor = 1;
++ }
++ if (locals->RequiresDSC[i][k] == true) {
++ if (locals->ODMCombineEnablePerState[i][k]
++ == true) {
++ if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
++ locals->DSCCLKRequiredMoreThanSupported[i] =
++ true;
++ }
++ } else {
++ if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor
++ > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
++ locals->DSCCLKRequiredMoreThanSupported[i] =
++ true;
++ }
++ }
++ }
++ }
++ }
++ }
++ }
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ locals->NotEnoughDSCUnits[i] = false;
++ mode_lib->vba.TotalDSCUnitsRequired = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->RequiresDSC[i][k] == true) {
++ if (locals->ODMCombineEnablePerState[i][k] == true) {
++ mode_lib->vba.TotalDSCUnitsRequired =
++ mode_lib->vba.TotalDSCUnitsRequired + 2.0;
++ } else {
++ mode_lib->vba.TotalDSCUnitsRequired =
++ mode_lib->vba.TotalDSCUnitsRequired + 1.0;
++ }
++ }
++ }
++ if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) {
++ locals->NotEnoughDSCUnits[i] = true;
++ }
++ }
++ /*DSC Delay per state*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] != k) {
++ mode_lib->vba.slices = 0;
++ } else if (locals->RequiresDSC[i][k] == 0
++ || locals->RequiresDSC[i][k] == false) {
++ mode_lib->vba.slices = 0;
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) {
++ mode_lib->vba.slices = dml_ceil(
++ mode_lib->vba.PixelClockBackEnd[k] / 400.0,
++ 4.0);
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) {
++ mode_lib->vba.slices = 8.0;
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) {
++ mode_lib->vba.slices = 4.0;
++ } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) {
++ mode_lib->vba.slices = 2.0;
++ } else {
++ mode_lib->vba.slices = 1.0;
++ }
++ if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE
++ || locals->OutputBppPerState[i][k] == BPP_INVALID) {
++ mode_lib->vba.bpp = 0.0;
++ } else {
++ mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
++ }
++ if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
++ if (locals->ODMCombineEnablePerState[i][k] == false) {
++ locals->DSCDelayPerState[i][k] =
++ dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ mode_lib->vba.bpp,
++ dml_ceil(
++ mode_lib->vba.HActive[k]
++ / mode_lib->vba.slices,
++ 1.0),
++ mode_lib->vba.slices,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(
++ mode_lib->vba.OutputFormat[k]);
++ } else {
++ locals->DSCDelayPerState[i][k] =
++ 2.0 * (dscceComputeDelay(
++ mode_lib->vba.DSCInputBitPerComponent[k],
++ mode_lib->vba.bpp,
++ dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0),
++ mode_lib->vba.slices / 2,
++ mode_lib->vba.OutputFormat[k])
++ + dscComputeDelay(mode_lib->vba.OutputFormat[k]));
++ }
++ locals->DSCDelayPerState[i][k] =
++ locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k];
++ } else {
++ locals->DSCDelayPerState[i][k] = 0.0;
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
++ for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true)
++ locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m];
++ }
++ }
++ }
++ }
++
++ //Prefetch Check
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->ODMCombineEnablePerState[i][k] == true)
++ locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k]));
++ else
++ locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
++ locals->SwathWidthGranularityY = 256 / dml_ceil(locals->BytePerPixelInDETY[k], 1) / locals->MaxSwathHeightY[k];
++ locals->RoundedUpMaxSwathSizeBytesY = (dml_ceil(locals->SwathWidthYPerState[i][j][k] - 1, locals->SwathWidthGranularityY)
++ + locals->SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k];
++ if (locals->SourcePixelFormat[k] == dm_420_10) {
++ locals->RoundedUpMaxSwathSizeBytesY = dml_ceil(locals->RoundedUpMaxSwathSizeBytesY, 256) + 256;
++ }
++ if (locals->MaxSwathHeightC[k] > 0) {
++ locals->SwathWidthGranularityC = 256 / dml_ceil(locals->BytePerPixelInDETC[k], 2) / locals->MaxSwathHeightC[k];
++
++ locals->RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYPerState[i][j][k] / 2 - 1, locals->SwathWidthGranularityC)
++ + locals->SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k];
++ }
++ if (locals->SourcePixelFormat[k] == dm_420_10) {
++ locals->RoundedUpMaxSwathSizeBytesC = dml_ceil(locals->RoundedUpMaxSwathSizeBytesC, 256) + 256;
++ } else {
++ locals->RoundedUpMaxSwathSizeBytesC = 0;
++ }
++
++ if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte * 1024 / 2) {
++ locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k];
++ locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k];
++ } else {
++ locals->SwathHeightYPerState[i][j][k] = locals->MinSwathHeightY[k];
++ locals->SwathHeightCPerState[i][j][k] = locals->MinSwathHeightC[k];
++ }
++
++ if (locals->BytePerPixelInDETC[k] == 0) {
++ locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
++ locals->LinesInDETChroma = 0;
++ } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) {
++ locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETY[k] /
++ locals->SwathWidthYPerState[i][j][k];
++ locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
++ } else {
++ locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
++ locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
++ }
++
++ locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines,
++ dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] / (locals->SwathWidthYPerState[i][j][k]
++ / dml_max(locals->HRatio[k], 1)), 1)) - (locals->vtaps[k] - 1);
++
++ locals->EffectiveLBLatencyHidingSourceLinesChroma = dml_min(locals->MaxLineBufferLines,
++ dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k]
++ / (locals->SwathWidthYPerState[i][j][k] / 2
++ / dml_max(locals->HRatio[k] / 2, 1)), 1)) - (locals->VTAPsChroma[k] - 1);
++
++ locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min(
++ locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] *
++ locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i],
++ locals->EffectiveLBLatencyHidingSourceLinesLuma),
++ locals->SwathHeightYPerState[i][j][k]);
++
++ locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
++ locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
++ locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i],
++ locals->EffectiveLBLatencyHidingSourceLinesChroma),
++ locals->SwathHeightCPerState[i][j][k]);
++
++ if (locals->BytePerPixelInDETC[k] == 0) {
++ locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
++ / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
++ dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]);
++ } else {
++ locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
++ locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
++ / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
++ dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]),
++ locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) -
++ locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 *
++ dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]));
++ }
++ }
++ }
++ }
++
++ for (i = 0; i <= locals->soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ locals->UrgentLatencySupport[i][j] = true;
++ for (k = 0; k < locals->NumberOfActivePlanes; k++) {
++ if (locals->UrgentLatencySupportUsPerState[i][j][k] < locals->UrgentLatency)
++ locals->UrgentLatencySupport[i][j] = false;
++ }
++ }
++ }
++
++
++ /*Prefetch Check*/
++ for (i = 0; i <= locals->soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ locals->TotalNumberOfDCCActiveDPP[i][j] = 0;
++ for (k = 0; k < locals->NumberOfActivePlanes; k++) {
++ if (locals->DCCEnable[k] == true) {
++ locals->TotalNumberOfDCCActiveDPP[i][j] =
++ locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
++ }
++ }
++ }
++ }
++
++ CalculateMinAndMaxPrefetchMode(locals->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &locals->MinPrefetchMode, &locals->MaxPrefetchMode);
++
++ locals->MaxTotalVActiveRDBandwidth = 0;
++ for (k = 0; k < locals->NumberOfActivePlanes; k++) {
++ locals->MaxTotalVActiveRDBandwidth = locals->MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
++ }
++
++ for (i = 0; i <= locals->soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ for (k = 0; k < locals->NumberOfActivePlanes; k++) {
++ locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k];
++ locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
++ locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k];
++ locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k];
++ locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k];
++ mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max(
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ mode_lib->vba.PixelClock[k] / 16.0);
++ if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) {
++ if (mode_lib->vba.VRatio[k] <= 1.0) {
++ mode_lib->vba.ProjectedDCFCLKDeepSleep =
++ dml_max(
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ 1.1
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelInDETY[k],
++ 1.0)
++ / 64.0
++ * mode_lib->vba.HRatio[k]
++ * mode_lib->vba.PixelClock[k]
++ / mode_lib->vba.NoOfDPP[i][j][k]);
++ } else {
++ mode_lib->vba.ProjectedDCFCLKDeepSleep =
++ dml_max(
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ 1.1
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelInDETY[k],
++ 1.0)
++ / 64.0
++ * mode_lib->vba.PSCL_FACTOR[k]
++ * mode_lib->vba.RequiredDPPCLK[i][j][k]);
++ }
++ } else {
++ if (mode_lib->vba.VRatio[k] <= 1.0) {
++ mode_lib->vba.ProjectedDCFCLKDeepSleep =
++ dml_max(
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ 1.1
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelInDETY[k],
++ 1.0)
++ / 32.0
++ * mode_lib->vba.HRatio[k]
++ * mode_lib->vba.PixelClock[k]
++ / mode_lib->vba.NoOfDPP[i][j][k]);
++ } else {
++ mode_lib->vba.ProjectedDCFCLKDeepSleep =
++ dml_max(
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ 1.1
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelInDETY[k],
++ 1.0)
++ / 32.0
++ * mode_lib->vba.PSCL_FACTOR[k]
++ * mode_lib->vba.RequiredDPPCLK[i][j][k]);
++ }
++ if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) {
++ mode_lib->vba.ProjectedDCFCLKDeepSleep =
++ dml_max(
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ 1.1
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelInDETC[k],
++ 2.0)
++ / 32.0
++ * mode_lib->vba.HRatio[k]
++ / 2.0
++ * mode_lib->vba.PixelClock[k]
++ / mode_lib->vba.NoOfDPP[i][j][k]);
++ } else {
++ mode_lib->vba.ProjectedDCFCLKDeepSleep =
++ dml_max(
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ 1.1
++ * dml_ceil(
++ mode_lib->vba.BytePerPixelInDETC[k],
++ 2.0)
++ / 32.0
++ * mode_lib->vba.PSCL_FACTOR_CHROMA[k]
++ * mode_lib->vba.RequiredDPPCLK[i][j][k]);
++ }
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.Read256BlockHeightY[k],
++ mode_lib->vba.Read256BlockWidthY[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k],
++ mode_lib->vba.ViewportHeight[k],
++ mode_lib->vba.SwathWidthYPerState[i][j][k],
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.VMMPageSize,
++ mode_lib->vba.PTEBufferSizeInRequestsLuma,
++ mode_lib->vba.PDEProcessingBufIn64KBReqs,
++ mode_lib->vba.PitchY[k],
++ mode_lib->vba.DCCMetaPitchY[k],
++ &mode_lib->vba.MacroTileWidthY[k],
++ &mode_lib->vba.MetaRowBytesY,
++ &mode_lib->vba.DPTEBytesPerRowY,
++ &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k],
++ &mode_lib->vba.dpte_row_height[k],
++ &mode_lib->vba.meta_row_height[k]);
++ mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.vtaps[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.SwathHeightYPerState[i][j][k],
++ mode_lib->vba.ViewportYStartY[k],
++ &mode_lib->vba.PrefillY[k],
++ &mode_lib->vba.MaxNumSwY[k]);
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) {
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
++ mode_lib,
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.Read256BlockHeightY[k],
++ mode_lib->vba.Read256BlockWidthY[k],
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.SurfaceTiling[k],
++ dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0),
++ mode_lib->vba.SourceScan[k],
++ mode_lib->vba.ViewportWidth[k] / 2.0,
++ mode_lib->vba.ViewportHeight[k] / 2.0,
++ mode_lib->vba.SwathWidthYPerState[i][j][k] / 2.0,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.VMMPageSize,
++ mode_lib->vba.PTEBufferSizeInRequestsLuma,
++ mode_lib->vba.PDEProcessingBufIn64KBReqs,
++ mode_lib->vba.PitchC[k],
++ 0.0,
++ &mode_lib->vba.MacroTileWidthC[k],
++ &mode_lib->vba.MetaRowBytesC,
++ &mode_lib->vba.DPTEBytesPerRowC,
++ &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k],
++ &mode_lib->vba.dpte_row_height_chroma[k],
++ &mode_lib->vba.meta_row_height_chroma[k]);
++ mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines(
++ mode_lib,
++ mode_lib->vba.VRatio[k] / 2.0,
++ mode_lib->vba.VTAPsChroma[k],
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.SwathHeightCPerState[i][j][k],
++ mode_lib->vba.ViewportYStartC[k],
++ &mode_lib->vba.PrefillC[k],
++ &mode_lib->vba.MaxNumSwC[k]);
++ } else {
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
++ mode_lib->vba.MetaRowBytesC = 0.0;
++ mode_lib->vba.DPTEBytesPerRowC = 0.0;
++ locals->PrefetchLinesC[k] = 0.0;
++ locals->PTEBufferSizeNotExceededC[i][j][k] = true;
++ locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
++ }
++ locals->PDEAndMetaPTEBytesPerFrame[k] =
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
++ locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
++ locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
++
++ CalculateActiveRowBandwidth(
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.MetaRowBytesY,
++ mode_lib->vba.MetaRowBytesC,
++ mode_lib->vba.meta_row_height[k],
++ mode_lib->vba.meta_row_height_chroma[k],
++ mode_lib->vba.DPTEBytesPerRowY,
++ mode_lib->vba.DPTEBytesPerRowC,
++ mode_lib->vba.dpte_row_height[k],
++ mode_lib->vba.dpte_row_height_chroma[k],
++ &mode_lib->vba.meta_row_bw[k],
++ &mode_lib->vba.dpte_row_bw[k],
++ &mode_lib->vba.qual_row_bw[k]);
++ }
++ mode_lib->vba.ExtraLatency =
++ mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i]
++ + (mode_lib->vba.TotalNumberOfActiveDPP[i][j]
++ * mode_lib->vba.PixelChunkSizeInKByte
++ + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
++ * mode_lib->vba.MetaChunkSize)
++ * 1024.0
++ / mode_lib->vba.ReturnBWPerState[i];
++ if (mode_lib->vba.GPUVMEnable == true) {
++ mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency
++ + mode_lib->vba.TotalNumberOfActiveDPP[i][j]
++ * mode_lib->vba.PTEGroupSize
++ / mode_lib->vba.ReturnBWPerState[i];
++ }
++ mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep;
++
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ if (mode_lib->vba.WritebackEnable[k] == true) {
++ locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency
++ + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[k],
++ mode_lib->vba.WritebackHRatio[k],
++ mode_lib->vba.WritebackVRatio[k],
++ mode_lib->vba.WritebackLumaHTaps[k],
++ mode_lib->vba.WritebackLumaVTaps[k],
++ mode_lib->vba.WritebackChromaHTaps[k],
++ mode_lib->vba.WritebackChromaVTaps[k],
++ mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j];
++ } else {
++ locals->WritebackDelay[i][k] = 0.0;
++ }
++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
++ if (mode_lib->vba.BlendingAndTiming[m] == k
++ && mode_lib->vba.WritebackEnable[m]
++ == true) {
++ locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k],
++ mode_lib->vba.WritebackLatency + CalculateWriteBackDelay(
++ mode_lib->vba.WritebackPixelFormat[m],
++ mode_lib->vba.WritebackHRatio[m],
++ mode_lib->vba.WritebackVRatio[m],
++ mode_lib->vba.WritebackLumaHTaps[m],
++ mode_lib->vba.WritebackLumaVTaps[m],
++ mode_lib->vba.WritebackChromaHTaps[m],
++ mode_lib->vba.WritebackChromaVTaps[m],
++ mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]);
++ }
++ }
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == m) {
++ locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m];
++ }
++ }
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ for (m = 0; m < locals->NumberOfCursors[k]; m++)
++ locals->cursor_bw[k] = locals->NumberOfCursors[k] * locals->CursorWidth[k][m] * locals->CursorBPP[k][m]
++ / 8 / (locals->HTotal[k] / locals->PixelClock[k]) * locals->VRatio[k];
++ }
++
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
++ - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
++ }
++
++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
++ do {
++ mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
++ mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
++
++ mode_lib->vba.TWait = CalculateTWait(
++ mode_lib->vba.PrefetchMode[i][j],
++ mode_lib->vba.DRAMClockChangeLatency,
++ mode_lib->vba.UrgentLatency,
++ mode_lib->vba.SREnterPlusExitTime);
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++
++ if (mode_lib->vba.XFCEnabled[k] == true) {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay =
++ CalculateRemoteSurfaceFlipDelay(
++ mode_lib,
++ mode_lib->vba.VRatio[k],
++ locals->SwathWidthYPerState[i][j][k],
++ dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
++ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.XFCTSlvVupdateOffset,
++ mode_lib->vba.XFCTSlvVupdateWidth,
++ mode_lib->vba.XFCTSlvVreadyOffset,
++ mode_lib->vba.XFCXBUFLatencyTolerance,
++ mode_lib->vba.XFCFillBWOverhead,
++ mode_lib->vba.XFCSlvChunkSize,
++ mode_lib->vba.XFCBusTransportTime,
++ mode_lib->vba.TimeCalc,
++ mode_lib->vba.TWait,
++ &mode_lib->vba.SrcActiveDrainRate,
++ &mode_lib->vba.TInitXFill,
++ &mode_lib->vba.TslvChk);
++ } else {
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
++ }
++
++ CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth,
++ mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k],
++ mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k],
++ mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal,
++ mode_lib->vba.SwathWidthYPerState[i][j][k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k],
++ mode_lib->vba.SwathWidthYSingleDPP[k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.SwathHeightYThisState[k], mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.Interlace[k], mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]);
++
++ mode_lib->vba.IsErrorResult[i][j][k] =
++ CalculatePrefetchSchedule(
++ mode_lib,
++ mode_lib->vba.RequiredDPPCLK[i][j][k],
++ mode_lib->vba.RequiredDISPCLK[i][j],
++ mode_lib->vba.PixelClock[k],
++ mode_lib->vba.ProjectedDCFCLKDeepSleep,
++ mode_lib->vba.NoOfDPP[i][j][k],
++ mode_lib->vba.NumberOfCursors[k],
++ mode_lib->vba.VTotal[k]
++ - mode_lib->vba.VActive[k],
++ mode_lib->vba.HTotal[k],
++ mode_lib->vba.MaxInterDCNTileRepeaters,
++ mode_lib->vba.MaximumVStartup[k],
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.DynamicMetadataEnable[k],
++ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
++ mode_lib->vba.DynamicMetadataTransmittedBytes[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.UrgentLatencyPixelDataOnly,
++ mode_lib->vba.ExtraLatency,
++ mode_lib->vba.TimeCalc,
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
++ mode_lib->vba.MetaRowBytes[k],
++ mode_lib->vba.DPTEBytesPerRow[k],
++ mode_lib->vba.PrefetchLinesY[k],
++ mode_lib->vba.SwathWidthYPerState[i][j][k],
++ mode_lib->vba.BytePerPixelInDETY[k],
++ mode_lib->vba.PrefillY[k],
++ mode_lib->vba.MaxNumSwY[k],
++ mode_lib->vba.PrefetchLinesC[k],
++ mode_lib->vba.BytePerPixelInDETC[k],
++ mode_lib->vba.PrefillC[k],
++ mode_lib->vba.MaxNumSwC[k],
++ mode_lib->vba.SwathHeightYPerState[i][j][k],
++ mode_lib->vba.SwathHeightCPerState[i][j][k],
++ mode_lib->vba.TWait,
++ mode_lib->vba.XFCEnabled[k],
++ mode_lib->vba.XFCRemoteSurfaceFlipDelay,
++ mode_lib->vba.Interlace[k],
++ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
++ mode_lib->vba.DSTXAfterScaler[k],
++ mode_lib->vba.DSTYAfterScaler[k],
++ &mode_lib->vba.LineTimesForPrefetch[k],
++ &mode_lib->vba.PrefetchBW[k],
++ &mode_lib->vba.LinesForMetaPTE[k],
++ &mode_lib->vba.LinesForMetaAndDPTERow[k],
++ &mode_lib->vba.VRatioPreY[i][j][k],
++ &mode_lib->vba.VRatioPreC[i][j][k],
++ &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k],
++ &mode_lib->vba.Tno_bw[k],
++ &mode_lib->vba.VUpdateOffsetPix[k],
++ &mode_lib->vba.VUpdateWidthPix[k],
++ &mode_lib->vba.VReadyOffsetPix[k]);
++ }
++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
++ mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
++ locals->prefetch_vm_bw_valid = true;
++ locals->prefetch_row_bw_valid = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0)
++ locals->prefetch_vm_bw[k] = 0;
++ else if (locals->LinesForMetaPTE[k] > 0)
++ locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k]
++ / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]);
++ else {
++ locals->prefetch_vm_bw[k] = 0;
++ locals->prefetch_vm_bw_valid = false;
++ }
++ if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0)
++ locals->prefetch_row_bw[k] = 0;
++ else if (locals->LinesForMetaAndDPTERow[k] > 0)
++ locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k])
++ / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]);
++ else {
++ locals->prefetch_row_bw[k] = 0;
++ locals->prefetch_row_bw_valid = false;
++ }
++
++ mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch
++ + mode_lib->vba.cursor_bw[k] + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k];
++ mode_lib->vba.MaximumReadBandwidthWithPrefetch =
++ mode_lib->vba.MaximumReadBandwidthWithPrefetch
++ + mode_lib->vba.cursor_bw[k]
++ + dml_max3(
++ mode_lib->vba.prefetch_vm_bw[k],
++ mode_lib->vba.prefetch_row_bw[k],
++ dml_max(mode_lib->vba.ReadBandwidth[k],
++ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])
++ + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]);
++ }
++ locals->BandwidthWithoutPrefetchSupported[i] = true;
++ if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) {
++ locals->BandwidthWithoutPrefetchSupported[i] = false;
++ }
++
++ locals->PrefetchSupported[i][j] = true;
++ if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) {
++ locals->PrefetchSupported[i][j] = false;
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->LineTimesForPrefetch[k] < 2.0
++ || locals->LinesForMetaPTE[k] >= 8.0
++ || locals->LinesForMetaAndDPTERow[k] >= 16.0
++ || mode_lib->vba.IsErrorResult[i][j][k] == true) {
++ locals->PrefetchSupported[i][j] = false;
++ }
++ }
++ locals->VRatioInPrefetchSupported[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->VRatioPreY[i][j][k] > 4.0
++ || locals->VRatioPreC[i][j][k] > 4.0
++ || mode_lib->vba.IsErrorResult[i][j][k] == true) {
++ locals->VRatioInPrefetchSupported[i][j] = false;
++ }
++ }
++ } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
++ && mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode);
++
++ if (mode_lib->vba.PrefetchSupported[i][j] == true
++ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
++ mode_lib->vba.BandwidthAvailableForImmediateFlip =
++ mode_lib->vba.ReturnBWPerState[i];
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.BandwidthAvailableForImmediateFlip =
++ mode_lib->vba.BandwidthAvailableForImmediateFlip
++ - mode_lib->vba.cursor_bw[k]
++ - dml_max(
++ mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.qual_row_bw[k],
++ mode_lib->vba.PrefetchBW[k]);
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.ImmediateFlipBytes[k] = 0.0;
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
++ mode_lib->vba.ImmediateFlipBytes[k] =
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k]
++ + mode_lib->vba.MetaRowBytes[k]
++ + mode_lib->vba.DPTEBytesPerRow[k];
++ }
++ }
++ mode_lib->vba.TotImmediateFlipBytes = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
++ && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
++ mode_lib->vba.TotImmediateFlipBytes =
++ mode_lib->vba.TotImmediateFlipBytes
++ + mode_lib->vba.ImmediateFlipBytes[k];
++ }
++ }
++
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ CalculateFlipSchedule(
++ mode_lib,
++ mode_lib->vba.ExtraLatency,
++ mode_lib->vba.UrgentLatencyPixelDataOnly,
++ mode_lib->vba.GPUVMMaxPageTableLevels,
++ mode_lib->vba.GPUVMEnable,
++ mode_lib->vba.BandwidthAvailableForImmediateFlip,
++ mode_lib->vba.TotImmediateFlipBytes,
++ mode_lib->vba.SourcePixelFormat[k],
++ mode_lib->vba.ImmediateFlipBytes[k],
++ mode_lib->vba.HTotal[k]
++ / mode_lib->vba.PixelClock[k],
++ mode_lib->vba.VRatio[k],
++ mode_lib->vba.Tno_bw[k],
++ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k],
++ mode_lib->vba.MetaRowBytes[k],
++ mode_lib->vba.DPTEBytesPerRow[k],
++ mode_lib->vba.DCCEnable[k],
++ mode_lib->vba.dpte_row_height[k],
++ mode_lib->vba.meta_row_height[k],
++ mode_lib->vba.qual_row_bw[k],
++ &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k],
++ &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k],
++ &mode_lib->vba.final_flip_bw[k],
++ &mode_lib->vba.ImmediateFlipSupportedForPipe[k]);
++ }
++ mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.total_dcn_read_bw_with_flip =
++ mode_lib->vba.total_dcn_read_bw_with_flip
++ + mode_lib->vba.cursor_bw[k]
++ + dml_max3(
++ mode_lib->vba.prefetch_vm_bw[k],
++ mode_lib->vba.prefetch_row_bw[k],
++ mode_lib->vba.final_flip_bw[k]
++ + dml_max(
++ mode_lib->vba.ReadBandwidth[k],
++ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]));
++ }
++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true;
++ if (mode_lib->vba.total_dcn_read_bw_with_flip
++ > mode_lib->vba.ReturnBWPerState[i]) {
++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
++ }
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) {
++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
++ }
++ }
++ } else {
++ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
++ }
++ }
++ }
++
++ /*Vertical Active BW support*/
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth *
++ mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) *
++ mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100;
++ if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i])
++ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true;
++ else
++ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false;
++ }
++
++ /*PTE Buffer Size Check*/
++
++ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
++ for (j = 0; j < 2; j++) {
++ locals->PTEBufferSizeNotExceeded[i][j] = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (locals->PTEBufferSizeNotExceededY[i][j][k] == false
++ || locals->PTEBufferSizeNotExceededC[i][j][k] == false) {
++ locals->PTEBufferSizeNotExceeded[i][j] = false;
++ }
++ }
++ }
++ }
++ /*Cursor Support Check*/
++ mode_lib->vba.CursorSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ for (j = 0; j < 2; j++) {
++ if (mode_lib->vba.CursorWidth[k][j] > 0.0) {
++ if (dml_floor(
++ dml_floor(
++ mode_lib->vba.CursorBufferSize
++ - mode_lib->vba.CursorChunkSize,
++ mode_lib->vba.CursorChunkSize) * 1024.0
++ / (mode_lib->vba.CursorWidth[k][j]
++ * mode_lib->vba.CursorBPP[k][j]
++ / 8.0),
++ 1.0)
++ * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
++ / mode_lib->vba.VRatio[k] < mode_lib->vba.UrgentLatencyPixelDataOnly
++ || (mode_lib->vba.CursorBPP[k][j] == 64.0
++ && mode_lib->vba.Cursor64BppSupport == false)) {
++ mode_lib->vba.CursorSupport = false;
++ }
++ }
++ }
++ }
++ /*Valid Pitch Check*/
++
++ mode_lib->vba.PitchSupport = true;
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ locals->AlignedYPitch[k] = dml_ceil(
++ dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]),
++ locals->MacroTileWidthY[k]);
++ if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) {
++ mode_lib->vba.PitchSupport = false;
++ }
++ if (mode_lib->vba.DCCEnable[k] == true) {
++ locals->AlignedDCCMetaPitch[k] = dml_ceil(
++ dml_max(
++ mode_lib->vba.DCCMetaPitchY[k],
++ mode_lib->vba.ViewportWidth[k]),
++ 64.0 * locals->Read256BlockWidthY[k]);
++ } else {
++ locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k];
++ }
++ if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) {
++ mode_lib->vba.PitchSupport = false;
++ }
++ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
++ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
++ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
++ locals->AlignedCPitch[k] = dml_ceil(
++ dml_max(
++ mode_lib->vba.PitchC[k],
++ mode_lib->vba.ViewportWidth[k] / 2.0),
++ locals->MacroTileWidthC[k]);
++ } else {
++ locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k];
++ }
++ if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) {
++ mode_lib->vba.PitchSupport = false;
++ }
++ }
++ /*Mode Support, Voltage State and SOC Configuration*/
++
++ for (i = mode_lib->vba.soc.num_states; i >= 0; i--) {
++ for (j = 0; j < 2; j++) {
++ enum dm_validation_status status = DML_VALIDATION_OK;
++
++ if (mode_lib->vba.ScaleRatioAndTapsSupport != true) {
++ status = DML_FAIL_SCALE_RATIO_TAP;
++ } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
++ status = DML_FAIL_SOURCE_PIXEL_FORMAT;
++ } else if (locals->ViewportSizeSupport[i] != true) {
++ status = DML_FAIL_VIEWPORT_SIZE;
++ } else if (locals->DIOSupport[i] != true) {
++ status = DML_FAIL_DIO_SUPPORT;
++ } else if (locals->NotEnoughDSCUnits[i] != false) {
++ status = DML_FAIL_NOT_ENOUGH_DSC;
++ } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) {
++ status = DML_FAIL_DSC_CLK_REQUIRED;
++ } else if (locals->UrgentLatencySupport[i][j] != true) {
++ status = DML_FAIL_URGENT_LATENCY;
++ } else if (locals->ROBSupport[i] != true) {
++ status = DML_FAIL_REORDERING_BUFFER;
++ } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
++ status = DML_FAIL_DISPCLK_DPPCLK;
++ } else if (locals->TotalAvailablePipesSupport[i][j] != true) {
++ status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
++ } else if (mode_lib->vba.NumberOfOTGSupport != true) {
++ status = DML_FAIL_NUM_OTG;
++ } else if (mode_lib->vba.WritebackModeSupport != true) {
++ status = DML_FAIL_WRITEBACK_MODE;
++ } else if (mode_lib->vba.WritebackLatencySupport != true) {
++ status = DML_FAIL_WRITEBACK_LATENCY;
++ } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) {
++ status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
++ } else if (mode_lib->vba.CursorSupport != true) {
++ status = DML_FAIL_CURSOR_SUPPORT;
++ } else if (mode_lib->vba.PitchSupport != true) {
++ status = DML_FAIL_PITCH_SUPPORT;
++ } else if (locals->PrefetchSupported[i][j] != true) {
++ status = DML_FAIL_PREFETCH_SUPPORT;
++ } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) {
++ status = DML_FAIL_TOTAL_V_ACTIVE_BW;
++ } else if (locals->VRatioInPrefetchSupported[i][j] != true) {
++ status = DML_FAIL_V_RATIO_PREFETCH;
++ } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) {
++ status = DML_FAIL_PTE_BUFFER_SIZE;
++ } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) {
++ status = DML_FAIL_DSC_INPUT_BPC;
++ }
++
++ if (status == DML_VALIDATION_OK) {
++ locals->ModeSupport[i][j] = true;
++ } else {
++ locals->ModeSupport[i][j] = false;
++ }
++ locals->ValidationStatus[i] = status;
++ }
++ }
++ {
++ unsigned int MaximumMPCCombine = 0;
++ mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1;
++ for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) {
++ if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) {
++ mode_lib->vba.VoltageLevel = i;
++ if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false
++ || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible)) {
++ MaximumMPCCombine = 1;
++ } else {
++ MaximumMPCCombine = 0;
++ }
++ break;
++ }
++ }
++ mode_lib->vba.ImmediateFlipSupport =
++ locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
++ locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
++ }
++ mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
++ mode_lib->vba.maxMpcComb = MaximumMPCCombine;
++ }
++ mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel];
++ mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel];
++ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
++ if (mode_lib->vba.BlendingAndTiming[k] == k) {
++ mode_lib->vba.ODMCombineEnabled[k] =
++ locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
++ } else {
++ mode_lib->vba.ODMCombineEnabled[k] = 0;
++ }
++ mode_lib->vba.DSCEnabled[k] =
++ locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
++ mode_lib->vba.OutputBpp[k] =
++ locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k];
++ }
++}
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h
+new file mode 100644
+index 000000000000..a989d3ca1e99
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h
+@@ -0,0 +1,32 @@
++/*
++ * Copyright 2018 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#ifndef _DCN20V2_DISPLAY_MODE_VBA_H_
++#define _DCN20V2_DISPLAY_MODE_VBA_H_
++
++void dml20v2_recalculate(struct display_mode_lib *mode_lib);
++void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
++
++#endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+new file mode 100644
+index 000000000000..ed8bf5f723c9
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+@@ -0,0 +1,1701 @@
++/*
++ * Copyright 2018 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#include "../display_mode_lib.h"
++#include "../display_mode_vba.h"
++#include "display_rq_dlg_calc_20v2.h"
++
++// Function: dml20v2_rq_dlg_get_rq_params
++// Calculate requestor related parameters that register definition agnostic
++// (i.e. this layer does try to separate real values from register definition)
++// Input:
++// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.)
++// Output:
++// rq_param - values that can be used to setup RQ (e.g. swath_height, plane1_addr, etc.)
++//
++static void dml20v2_rq_dlg_get_rq_params(
++ struct display_mode_lib *mode_lib,
++ display_rq_params_st * rq_param,
++ const display_pipe_source_params_st pipe_src_param);
++
++// Function: dml20v2_rq_dlg_get_dlg_params
++// Calculate deadline related parameters
++//
++static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
++ const unsigned int num_pipes,
++ const unsigned int pipe_idx,
++ display_dlg_regs_st *disp_dlg_regs,
++ display_ttu_regs_st *disp_ttu_regs,
++ const display_rq_dlg_params_st rq_dlg_param,
++ const display_dlg_sys_params_st dlg_sys_param,
++ const bool cstate_en,
++ const bool pstate_en);
++/*
++ * NOTE:
++ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
++ *
++ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
++ * ways. Unless there is something clearly wrong with it the code should
++ * remain as-is as it provides us with a guarantee from HW that it is correct.
++ */
++
++static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
++ double *refcyc_per_req_delivery_pre_cur,
++ double *refcyc_per_req_delivery_cur,
++ double refclk_freq_in_mhz,
++ double ref_freq_to_pix_freq,
++ double hscale_pixel_rate_l,
++ double hscl_ratio,
++ double vratio_pre_l,
++ double vratio_l,
++ unsigned int cur_width,
++ enum cursor_bpp cur_bpp);
++
++#include "../dml_inline_defs.h"
++
++static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
++{
++ unsigned int ret_val = 0;
++
++ if (source_format == dm_444_16) {
++ if (!is_chroma)
++ ret_val = 2;
++ } else if (source_format == dm_444_32) {
++ if (!is_chroma)
++ ret_val = 4;
++ } else if (source_format == dm_444_64) {
++ if (!is_chroma)
++ ret_val = 8;
++ } else if (source_format == dm_420_8) {
++ if (is_chroma)
++ ret_val = 2;
++ else
++ ret_val = 1;
++ } else if (source_format == dm_420_10) {
++ if (is_chroma)
++ ret_val = 4;
++ else
++ ret_val = 2;
++ } else if (source_format == dm_444_8) {
++ ret_val = 1;
++ }
++ return ret_val;
++}
++
++static bool is_dual_plane(enum source_format_class source_format)
++{
++ bool ret_val = 0;
++
++ if ((source_format == dm_420_8) || (source_format == dm_420_10))
++ ret_val = 1;
++
++ return ret_val;
++}
++
++static double get_refcyc_per_delivery(struct display_mode_lib *mode_lib,
++ double refclk_freq_in_mhz,
++ double pclk_freq_in_mhz,
++ bool odm_combine,
++ unsigned int recout_width,
++ unsigned int hactive,
++ double vratio,
++ double hscale_pixel_rate,
++ unsigned int delivery_width,
++ unsigned int req_per_swath_ub)
++{
++ double refcyc_per_delivery = 0.0;
++
++ if (vratio <= 1.0) {
++ if (odm_combine)
++ refcyc_per_delivery = (double) refclk_freq_in_mhz
++ * dml_min((double) recout_width, (double) hactive / 2.0)
++ / pclk_freq_in_mhz / (double) req_per_swath_ub;
++ else
++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width
++ / pclk_freq_in_mhz / (double) req_per_swath_ub;
++ } else {
++ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width
++ / (double) hscale_pixel_rate / (double) req_per_swath_ub;
++ }
++
++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width);
++ dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio);
++ dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub);
++ dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery);
++
++ return refcyc_per_delivery;
++
++}
++
++static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
++{
++ if (tile_size == dm_256k_tile)
++ return (256 * 1024);
++ else if (tile_size == dm_64k_tile)
++ return (64 * 1024);
++ else
++ return (4 * 1024);
++}
++
++static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib,
++ display_data_rq_regs_st *rq_regs,
++ const display_data_rq_sizing_params_st rq_sizing)
++{
++ dml_print("DML_DLG: %s: rq_sizing param\n", __func__);
++ print__data_rq_sizing_params_st(mode_lib, rq_sizing);
++
++ rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10;
++
++ if (rq_sizing.min_chunk_bytes == 0)
++ rq_regs->min_chunk_size = 0;
++ else
++ rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1;
++
++ rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10;
++ if (rq_sizing.min_meta_chunk_bytes == 0)
++ rq_regs->min_meta_chunk_size = 0;
++ else
++ rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1;
++
++ rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6;
++ rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6;
++}
++
++static void extract_rq_regs(struct display_mode_lib *mode_lib,
++ display_rq_regs_st *rq_regs,
++ const display_rq_params_st rq_param)
++{
++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
++ unsigned int detile_buf_plane1_addr = 0;
++
++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l);
++
++ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_l.dpte_row_height),
++ 1) - 3;
++
++ if (rq_param.yuv420) {
++ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c);
++ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_c.dpte_row_height),
++ 1) - 3;
++ }
++
++ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
++ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
++
++ // FIXME: take the max between luma, chroma chunk size?
++ // okay for now, as we are setting chunk_bytes to 8kb anyways
++ if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb
++ rq_regs->drq_expansion_mode = 0;
++ } else {
++ rq_regs->drq_expansion_mode = 2;
++ }
++ rq_regs->prq_expansion_mode = 1;
++ rq_regs->mrq_expansion_mode = 1;
++ rq_regs->crq_expansion_mode = 1;
++
++ if (rq_param.yuv420) {
++ if ((double) rq_param.misc.rq_l.stored_swath_bytes
++ / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) {
++ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma
++ } else {
++ detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
++ 256,
++ 0) / 64.0; // 2/3 to chroma
++ }
++ }
++ rq_regs->plane1_base_address = detile_buf_plane1_addr;
++}
++
++static void handle_det_buf_split(struct display_mode_lib *mode_lib,
++ display_rq_params_st *rq_param,
++ const display_pipe_source_params_st pipe_src_param)
++{
++ unsigned int total_swath_bytes = 0;
++ unsigned int swath_bytes_l = 0;
++ unsigned int swath_bytes_c = 0;
++ unsigned int full_swath_bytes_packed_l = 0;
++ unsigned int full_swath_bytes_packed_c = 0;
++ bool req128_l = 0;
++ bool req128_c = 0;
++ bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
++ bool surf_vert = (pipe_src_param.source_scan == dm_vert);
++ unsigned int log2_swath_height_l = 0;
++ unsigned int log2_swath_height_c = 0;
++ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
++
++ full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
++ full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
++
++ if (rq_param->yuv420_10bpc) {
++ full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2 / 3,
++ 256,
++ 1) + 256;
++ full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2 / 3,
++ 256,
++ 1) + 256;
++ }
++
++ if (rq_param->yuv420) {
++ total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
++
++ if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
++ req128_l = 0;
++ req128_c = 0;
++ swath_bytes_l = full_swath_bytes_packed_l;
++ swath_bytes_c = full_swath_bytes_packed_c;
++ } else { //128b request (for luma only for yuv420 8bpc)
++ req128_l = 1;
++ req128_c = 0;
++ swath_bytes_l = full_swath_bytes_packed_l / 2;
++ swath_bytes_c = full_swath_bytes_packed_c;
++ }
++ // Note: assumption, the config that pass in will fit into
++ // the detiled buffer.
++ } else {
++ total_swath_bytes = 2 * full_swath_bytes_packed_l;
++
++ if (total_swath_bytes <= detile_buf_size_in_bytes)
++ req128_l = 0;
++ else
++ req128_l = 1;
++
++ swath_bytes_l = total_swath_bytes;
++ swath_bytes_c = 0;
++ }
++ rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
++ rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
++
++ if (surf_linear) {
++ log2_swath_height_l = 0;
++ log2_swath_height_c = 0;
++ } else if (!surf_vert) {
++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l;
++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c;
++ } else {
++ log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l;
++ log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c;
++ }
++ rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
++ rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
++
++ dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l);
++ dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c);
++ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n",
++ __func__,
++ full_swath_bytes_packed_l);
++ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n",
++ __func__,
++ full_swath_bytes_packed_c);
++}
++
++static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
++ display_data_rq_dlg_params_st *rq_dlg_param,
++ display_data_rq_misc_params_st *rq_misc_param,
++ display_data_rq_sizing_params_st *rq_sizing_param,
++ unsigned int vp_width,
++ unsigned int vp_height,
++ unsigned int data_pitch,
++ unsigned int meta_pitch,
++ unsigned int source_format,
++ unsigned int tiling,
++ unsigned int macro_tile_size,
++ unsigned int source_scan,
++ unsigned int is_chroma)
++{
++ bool surf_linear = (tiling == dm_sw_linear);
++ bool surf_vert = (source_scan == dm_vert);
++
++ unsigned int bytes_per_element;
++ unsigned int bytes_per_element_y = get_bytes_per_element((enum source_format_class)(source_format),
++ false);
++ unsigned int bytes_per_element_c = get_bytes_per_element((enum source_format_class)(source_format),
++ true);
++
++ unsigned int blk256_width = 0;
++ unsigned int blk256_height = 0;
++
++ unsigned int blk256_width_y = 0;
++ unsigned int blk256_height_y = 0;
++ unsigned int blk256_width_c = 0;
++ unsigned int blk256_height_c = 0;
++ unsigned int log2_bytes_per_element;
++ unsigned int log2_blk256_width;
++ unsigned int log2_blk256_height;
++ unsigned int blk_bytes;
++ unsigned int log2_blk_bytes;
++ unsigned int log2_blk_height;
++ unsigned int log2_blk_width;
++ unsigned int log2_meta_req_bytes;
++ unsigned int log2_meta_req_height;
++ unsigned int log2_meta_req_width;
++ unsigned int meta_req_width;
++ unsigned int meta_req_height;
++ unsigned int log2_meta_row_height;
++ unsigned int meta_row_width_ub;
++ unsigned int log2_meta_chunk_bytes;
++ unsigned int log2_meta_chunk_height;
++
++ //full sized meta chunk width in unit of data elements
++ unsigned int log2_meta_chunk_width;
++ unsigned int log2_min_meta_chunk_bytes;
++ unsigned int min_meta_chunk_width;
++ unsigned int meta_chunk_width;
++ unsigned int meta_chunk_per_row_int;
++ unsigned int meta_row_remainder;
++ unsigned int meta_chunk_threshold;
++ unsigned int meta_blk_bytes;
++ unsigned int meta_blk_height;
++ unsigned int meta_blk_width;
++ unsigned int meta_surface_bytes;
++ unsigned int vmpg_bytes;
++ unsigned int meta_pte_req_per_frame_ub;
++ unsigned int meta_pte_bytes_per_frame_ub;
++ const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes);
++ const unsigned int dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
++ const unsigned int pde_proc_buffer_size_64k_reqs =
++ mode_lib->ip.pde_proc_buffer_size_64k_reqs;
++
++ unsigned int log2_vmpg_height = 0;
++ unsigned int log2_vmpg_width = 0;
++ unsigned int log2_dpte_req_height_ptes = 0;
++ unsigned int log2_dpte_req_height = 0;
++ unsigned int log2_dpte_req_width = 0;
++ unsigned int log2_dpte_row_height_linear = 0;
++ unsigned int log2_dpte_row_height = 0;
++ unsigned int log2_dpte_group_width = 0;
++ unsigned int dpte_row_width_ub = 0;
++ unsigned int dpte_req_height = 0;
++ unsigned int dpte_req_width = 0;
++ unsigned int dpte_group_width = 0;
++ unsigned int log2_dpte_group_bytes = 0;
++ unsigned int log2_dpte_group_length = 0;
++ unsigned int pde_buf_entries;
++ bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10);
++
++ Calculate256BBlockSizes((enum source_format_class)(source_format),
++ (enum dm_swizzle_mode)(tiling),
++ bytes_per_element_y,
++ bytes_per_element_c,
++ &blk256_height_y,
++ &blk256_height_c,
++ &blk256_width_y,
++ &blk256_width_c);
++
++ if (!is_chroma) {
++ blk256_width = blk256_width_y;
++ blk256_height = blk256_height_y;
++ bytes_per_element = bytes_per_element_y;
++ } else {
++ blk256_width = blk256_width_c;
++ blk256_height = blk256_height_c;
++ bytes_per_element = bytes_per_element_c;
++ }
++
++ log2_bytes_per_element = dml_log2(bytes_per_element);
++
++ dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear);
++ dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert);
++ dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width);
++ dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height);
++
++ log2_blk256_width = dml_log2((double) blk256_width);
++ log2_blk256_height = dml_log2((double) blk256_height);
++ blk_bytes = surf_linear ?
++ 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
++ log2_blk_bytes = dml_log2((double) blk_bytes);
++ log2_blk_height = 0;
++ log2_blk_width = 0;
++
++ // remember log rule
++ // "+" in log is multiply
++ // "-" in log is divide
++ // "/2" is like square root
++ // blk is vertical biased
++ if (tiling != dm_sw_linear)
++ log2_blk_height = log2_blk256_height
++ + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
++ else
++ log2_blk_height = 0; // blk height of 1
++
++ log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
++
++ if (!surf_vert) {
++ rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1)
++ + blk256_width;
++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width;
++ } else {
++ rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_height - 1, blk256_height, 1)
++ + blk256_height;
++ rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height;
++ }
++
++ if (!surf_vert)
++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height
++ * bytes_per_element;
++ else
++ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width
++ * bytes_per_element;
++
++ rq_misc_param->blk256_height = blk256_height;
++ rq_misc_param->blk256_width = blk256_width;
++
++ // -------
++ // meta
++ // -------
++ log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element
++
++ // each 64b meta request for dcn is 8x8 meta elements and
++ // a meta element covers one 256b block of the the data surface.
++ log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256
++ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element
++ - log2_meta_req_height;
++ meta_req_width = 1 << log2_meta_req_width;
++ meta_req_height = 1 << log2_meta_req_height;
++ log2_meta_row_height = 0;
++ meta_row_width_ub = 0;
++
++ // the dimensions of a meta row are meta_row_width x meta_row_height in elements.
++ // calculate upper bound of the meta_row_width
++ if (!surf_vert) {
++ log2_meta_row_height = log2_meta_req_height;
++ meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1)
++ + meta_req_width;
++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
++ } else {
++ log2_meta_row_height = log2_meta_req_width;
++ meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1)
++ + meta_req_height;
++ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
++ }
++ rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
++
++ rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
++
++ log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
++ log2_meta_chunk_height = log2_meta_row_height;
++
++ //full sized meta chunk width in unit of data elements
++ log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element
++ - log2_meta_chunk_height;
++ log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
++ min_meta_chunk_width = 1
++ << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element
++ - log2_meta_chunk_height);
++ meta_chunk_width = 1 << log2_meta_chunk_width;
++ meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
++ meta_row_remainder = meta_row_width_ub % meta_chunk_width;
++ meta_chunk_threshold = 0;
++ meta_blk_bytes = 4096;
++ meta_blk_height = blk256_height * 64;
++ meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height;
++ meta_surface_bytes = meta_pitch
++ * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height)
++ * bytes_per_element / 256;
++ vmpg_bytes = mode_lib->soc.vmm_page_size_bytes;
++ meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes,
++ 8 * vmpg_bytes,
++ 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
++ meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request
++ rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
++
++ dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height);
++ dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width);
++ dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes);
++ dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n",
++ __func__,
++ meta_pte_req_per_frame_ub);
++ dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n",
++ __func__,
++ meta_pte_bytes_per_frame_ub);
++
++ if (!surf_vert)
++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
++ else
++ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
++
++ if (meta_row_remainder <= meta_chunk_threshold)
++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
++ else
++ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
++
++ // ------
++ // dpte
++ // ------
++ if (surf_linear) {
++ log2_vmpg_height = 0; // one line high
++ } else {
++ log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
++ }
++ log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
++
++ // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4.
++ if (surf_linear) { //one 64B PTE request returns 8 PTEs
++ log2_dpte_req_height_ptes = 0;
++ log2_dpte_req_width = log2_vmpg_width + 3;
++ log2_dpte_req_height = 0;
++ } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size
++ //one 64B req gives 8x1 PTEs for 4KB tile
++ log2_dpte_req_height_ptes = 0;
++ log2_dpte_req_width = log2_blk_width + 3;
++ log2_dpte_req_height = log2_blk_height + 0;
++ } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB
++ //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB
++ log2_dpte_req_height_ptes = 4;
++ log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width
++ log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height
++ } else { //64KB page size and must 64KB tile block
++ //one 64B req gives 8x1 PTEs for 64KB tile
++ log2_dpte_req_height_ptes = 0;
++ log2_dpte_req_width = log2_blk_width + 3;
++ log2_dpte_req_height = log2_blk_height + 0;
++ }
++
++ // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
++ // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
++ // That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
++ //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
++ //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
++ dpte_req_height = 1 << log2_dpte_req_height;
++ dpte_req_width = 1 << log2_dpte_req_width;
++
++ // calculate pitch dpte row buffer can hold
++ // round the result down to a power of two.
++ pde_buf_entries = yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs;
++ if (surf_linear) {
++ unsigned int dpte_row_height;
++
++ log2_dpte_row_height_linear = dml_floor(dml_log2(dml_min(64 * 1024 * pde_buf_entries
++ / bytes_per_element,
++ dpte_buf_in_pte_reqs
++ * dpte_req_width)
++ / data_pitch),
++ 1);
++
++ ASSERT(log2_dpte_row_height_linear >= 3);
++
++ if (log2_dpte_row_height_linear > 7)
++ log2_dpte_row_height_linear = 7;
++
++ log2_dpte_row_height = log2_dpte_row_height_linear;
++ // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
++ // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
++ dpte_row_height = 1 << log2_dpte_row_height;
++ dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1,
++ dpte_req_width,
++ 1) + dpte_req_width;
++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
++ } else {
++ // the upper bound of the dpte_row_width without dependency on viewport position follows.
++ // for tiled mode, row height is the same as req height and row store up to vp size upper bound
++ if (!surf_vert) {
++ log2_dpte_row_height = log2_dpte_req_height;
++ dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1)
++ + dpte_req_width;
++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
++ } else {
++ log2_dpte_row_height =
++ (log2_blk_width < log2_dpte_req_width) ?
++ log2_blk_width : log2_dpte_req_width;
++ dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1)
++ + dpte_req_height;
++ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
++ }
++ }
++ if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB
++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request
++ else
++ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request
++
++ rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
++
++ // the dpte_group_bytes is reduced for the specific case of vertical
++ // access of a tile surface that has dpte request of 8x1 ptes.
++ if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
++ rq_sizing_param->dpte_group_bytes = 512;
++ else
++ //full size
++ rq_sizing_param->dpte_group_bytes = 2048;
++
++ //since pte request size is 64byte, the number of data pte requests per full sized group is as follows.
++ log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
++ log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests
++
++ // full sized data pte group width in elements
++ if (!surf_vert)
++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
++ else
++ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
++
++ //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B
++ if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB
++ log2_dpte_group_width = log2_dpte_group_width - 1;
++
++ dpte_group_width = 1 << log2_dpte_group_width;
++
++ // since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
++ // the upper bound for the dpte groups per row is as follows.
++ rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width,
++ 1);
++}
++
++static void get_surf_rq_param(struct display_mode_lib *mode_lib,
++ display_data_rq_sizing_params_st *rq_sizing_param,
++ display_data_rq_dlg_params_st *rq_dlg_param,
++ display_data_rq_misc_params_st *rq_misc_param,
++ const display_pipe_source_params_st pipe_src_param,
++ bool is_chroma)
++{
++ bool mode_422 = 0;
++ unsigned int vp_width = 0;
++ unsigned int vp_height = 0;
++ unsigned int data_pitch = 0;
++ unsigned int meta_pitch = 0;
++ unsigned int ppe = mode_422 ? 2 : 1;
++
++ // FIXME check if ppe apply for both luma and chroma in 422 case
++ if (is_chroma) {
++ vp_width = pipe_src_param.viewport_width_c / ppe;
++ vp_height = pipe_src_param.viewport_height_c;
++ data_pitch = pipe_src_param.data_pitch_c;
++ meta_pitch = pipe_src_param.meta_pitch_c;
++ } else {
++ vp_width = pipe_src_param.viewport_width / ppe;
++ vp_height = pipe_src_param.viewport_height;
++ data_pitch = pipe_src_param.data_pitch;
++ meta_pitch = pipe_src_param.meta_pitch;
++ }
++
++ rq_sizing_param->chunk_bytes = 8192;
++
++ if (rq_sizing_param->chunk_bytes == 64 * 1024)
++ rq_sizing_param->min_chunk_bytes = 0;
++ else
++ rq_sizing_param->min_chunk_bytes = 1024;
++
++ rq_sizing_param->meta_chunk_bytes = 2048;
++ rq_sizing_param->min_meta_chunk_bytes = 256;
++
++ rq_sizing_param->mpte_group_bytes = 2048;
++
++ get_meta_and_pte_attr(mode_lib,
++ rq_dlg_param,
++ rq_misc_param,
++ rq_sizing_param,
++ vp_width,
++ vp_height,
++ data_pitch,
++ meta_pitch,
++ pipe_src_param.source_format,
++ pipe_src_param.sw_mode,
++ pipe_src_param.macro_tile_size,
++ pipe_src_param.source_scan,
++ is_chroma);
++}
++
++static void dml20v2_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
++ display_rq_params_st *rq_param,
++ const display_pipe_source_params_st pipe_src_param)
++{
++ // get param for luma surface
++ rq_param->yuv420 = pipe_src_param.source_format == dm_420_8
++ || pipe_src_param.source_format == dm_420_10;
++ rq_param->yuv420_10bpc = pipe_src_param.source_format == dm_420_10;
++
++ get_surf_rq_param(mode_lib,
++ &(rq_param->sizing.rq_l),
++ &(rq_param->dlg.rq_l),
++ &(rq_param->misc.rq_l),
++ pipe_src_param,
++ 0);
++
++ if (is_dual_plane((enum source_format_class)(pipe_src_param.source_format))) {
++ // get param for chroma surface
++ get_surf_rq_param(mode_lib,
++ &(rq_param->sizing.rq_c),
++ &(rq_param->dlg.rq_c),
++ &(rq_param->misc.rq_c),
++ pipe_src_param,
++ 1);
++ }
++
++ // calculate how to split the det buffer space between luma and chroma
++ handle_det_buf_split(mode_lib, rq_param, pipe_src_param);
++ print__rq_params_st(mode_lib, *rq_param);
++}
++
++void dml20v2_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
++ display_rq_regs_st *rq_regs,
++ const display_pipe_params_st pipe_param)
++{
++ display_rq_params_st rq_param = {0};
++
++ memset(rq_regs, 0, sizeof(*rq_regs));
++ dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param.src);
++ extract_rq_regs(mode_lib, rq_regs, rq_param);
++
++ print__rq_regs_st(mode_lib, *rq_regs);
++}
++
++// Note: currently taken in as is.
++// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
++static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
++ const unsigned int num_pipes,
++ const unsigned int pipe_idx,
++ display_dlg_regs_st *disp_dlg_regs,
++ display_ttu_regs_st *disp_ttu_regs,
++ const display_rq_dlg_params_st rq_dlg_param,
++ const display_dlg_sys_params_st dlg_sys_param,
++ const bool cstate_en,
++ const bool pstate_en)
++{
++ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
++ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
++ const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
++ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
++ const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
++ const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
++
++ // -------------------------
++ // Section 1.15.2.1: OTG dependent Params
++ // -------------------------
++ // Timing
++ unsigned int htotal = dst->htotal;
++// unsigned int hblank_start = dst.hblank_start; // TODO: Remove
++ unsigned int hblank_end = dst->hblank_end;
++ unsigned int vblank_start = dst->vblank_start;
++ unsigned int vblank_end = dst->vblank_end;
++ unsigned int min_vblank = mode_lib->ip.min_vblank_lines;
++
++ double dppclk_freq_in_mhz = clks->dppclk_mhz;
++ double dispclk_freq_in_mhz = clks->dispclk_mhz;
++ double refclk_freq_in_mhz = clks->refclk_mhz;
++ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
++ bool interlaced = dst->interlaced;
++
++ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
++
++ double min_dcfclk_mhz;
++ double t_calc_us;
++ double min_ttu_vblank;
++
++ double min_dst_y_ttu_vblank;
++ unsigned int dlg_vblank_start;
++ bool dual_plane;
++ bool mode_422;
++ unsigned int access_dir;
++ unsigned int vp_height_l;
++ unsigned int vp_width_l;
++ unsigned int vp_height_c;
++ unsigned int vp_width_c;
++
++ // Scaling
++ unsigned int htaps_l;
++ unsigned int htaps_c;
++ double hratio_l;
++ double hratio_c;
++ double vratio_l;
++ double vratio_c;
++ bool scl_enable;
++
++ double line_time_in_us;
++ // double vinit_l;
++ // double vinit_c;
++ // double vinit_bot_l;
++ // double vinit_bot_c;
++
++ // unsigned int swath_height_l;
++ unsigned int swath_width_ub_l;
++ // unsigned int dpte_bytes_per_row_ub_l;
++ unsigned int dpte_groups_per_row_ub_l;
++ // unsigned int meta_pte_bytes_per_frame_ub_l;
++ // unsigned int meta_bytes_per_row_ub_l;
++
++ // unsigned int swath_height_c;
++ unsigned int swath_width_ub_c;
++ // unsigned int dpte_bytes_per_row_ub_c;
++ unsigned int dpte_groups_per_row_ub_c;
++
++ unsigned int meta_chunks_per_row_ub_l;
++ unsigned int meta_chunks_per_row_ub_c;
++ unsigned int vupdate_offset;
++ unsigned int vupdate_width;
++ unsigned int vready_offset;
++
++ unsigned int dppclk_delay_subtotal;
++ unsigned int dispclk_delay_subtotal;
++ unsigned int pixel_rate_delay_subtotal;
++
++ unsigned int vstartup_start;
++ unsigned int dst_x_after_scaler;
++ unsigned int dst_y_after_scaler;
++ double line_wait;
++ double dst_y_prefetch;
++ double dst_y_per_vm_vblank;
++ double dst_y_per_row_vblank;
++ double dst_y_per_vm_flip;
++ double dst_y_per_row_flip;
++ double min_dst_y_per_vm_vblank;
++ double min_dst_y_per_row_vblank;
++ double lsw;
++ double vratio_pre_l;
++ double vratio_pre_c;
++ unsigned int req_per_swath_ub_l;
++ unsigned int req_per_swath_ub_c;
++ unsigned int meta_row_height_l;
++ unsigned int meta_row_height_c;
++ unsigned int swath_width_pixels_ub_l;
++ unsigned int swath_width_pixels_ub_c;
++ unsigned int scaler_rec_in_width_l;
++ unsigned int scaler_rec_in_width_c;
++ unsigned int dpte_row_height_l;
++ unsigned int dpte_row_height_c;
++ double hscale_pixel_rate_l;
++ double hscale_pixel_rate_c;
++ double min_hratio_fact_l;
++ double min_hratio_fact_c;
++ double refcyc_per_line_delivery_pre_l;
++ double refcyc_per_line_delivery_pre_c;
++ double refcyc_per_line_delivery_l;
++ double refcyc_per_line_delivery_c;
++
++ double refcyc_per_req_delivery_pre_l;
++ double refcyc_per_req_delivery_pre_c;
++ double refcyc_per_req_delivery_l;
++ double refcyc_per_req_delivery_c;
++
++ unsigned int full_recout_width;
++ double xfc_transfer_delay;
++ double xfc_precharge_delay;
++ double xfc_remote_surface_flip_latency;
++ double xfc_dst_y_delta_drq_limit;
++ double xfc_prefetch_margin;
++ double refcyc_per_req_delivery_pre_cur0;
++ double refcyc_per_req_delivery_cur0;
++ double refcyc_per_req_delivery_pre_cur1;
++ double refcyc_per_req_delivery_cur1;
++
++ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
++ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
++
++ dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en);
++ dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en);
++
++ dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
++ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
++ ASSERT(ref_freq_to_pix_freq < 4.0);
++
++ disp_dlg_regs->ref_freq_to_pix_freq =
++ (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
++ disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal
++ * dml_pow(2, 8));
++ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
++ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end
++ * (double) ref_freq_to_pix_freq);
++ ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13));
++
++ min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz;
++ t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes);
++ min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal;
++ dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
++
++ disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start
++ + min_dst_y_ttu_vblank) * dml_pow(2, 2));
++ ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18));
++
++ dml_print("DML_DLG: %s: min_dcfclk_mhz = %3.2f\n",
++ __func__,
++ min_dcfclk_mhz);
++ dml_print("DML_DLG: %s: min_ttu_vblank = %3.2f\n",
++ __func__,
++ min_ttu_vblank);
++ dml_print("DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n",
++ __func__,
++ min_dst_y_ttu_vblank);
++ dml_print("DML_DLG: %s: t_calc_us = %3.2f\n",
++ __func__,
++ t_calc_us);
++ dml_print("DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n",
++ __func__,
++ disp_dlg_regs->min_dst_y_next_start);
++ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n",
++ __func__,
++ ref_freq_to_pix_freq);
++
++ // -------------------------
++ // Section 1.15.2.2: Prefetch, Active and TTU
++ // -------------------------
++ // Prefetch Calc
++ // Source
++// dcc_en = src.dcc;
++ dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
++ mode_422 = 0; // FIXME
++ access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
++// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
++// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
++ vp_height_l = src->viewport_height;
++ vp_width_l = src->viewport_width;
++ vp_height_c = src->viewport_height_c;
++ vp_width_c = src->viewport_width_c;
++
++ // Scaling
++ htaps_l = taps->htaps;
++ htaps_c = taps->htaps_c;
++ hratio_l = scl->hscl_ratio;
++ hratio_c = scl->hscl_ratio_c;
++ vratio_l = scl->vscl_ratio;
++ vratio_c = scl->vscl_ratio_c;
++ scl_enable = scl->scl_enable;
++
++ line_time_in_us = (htotal / pclk_freq_in_mhz);
++// vinit_l = scl.vinit;
++// vinit_c = scl.vinit_c;
++// vinit_bot_l = scl.vinit_bot;
++// vinit_bot_c = scl.vinit_bot_c;
++
++// unsigned int swath_height_l = rq_dlg_param.rq_l.swath_height;
++ swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub;
++// unsigned int dpte_bytes_per_row_ub_l = rq_dlg_param.rq_l.dpte_bytes_per_row_ub;
++ dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub;
++// unsigned int meta_pte_bytes_per_frame_ub_l = rq_dlg_param.rq_l.meta_pte_bytes_per_frame_ub;
++// unsigned int meta_bytes_per_row_ub_l = rq_dlg_param.rq_l.meta_bytes_per_row_ub;
++
++// unsigned int swath_height_c = rq_dlg_param.rq_c.swath_height;
++ swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub;
++ // dpte_bytes_per_row_ub_c = rq_dlg_param.rq_c.dpte_bytes_per_row_ub;
++ dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub;
++
++ meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub;
++ meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub;
++ vupdate_offset = dst->vupdate_offset;
++ vupdate_width = dst->vupdate_width;
++ vready_offset = dst->vready_offset;
++
++ dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
++ dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
++
++ if (scl_enable)
++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
++ else
++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
++
++ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter
++ + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
++
++ if (dout->dsc_enable) {
++ double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ dispclk_delay_subtotal += dsc_delay;
++ }
++
++ pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz
++ + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz;
++
++ vstartup_start = dst->vstartup_start;
++ if (interlaced) {
++ if (vstartup_start / 2.0
++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
++ <= vblank_end / 2.0)
++ disp_dlg_regs->vready_after_vcount0 = 1;
++ else
++ disp_dlg_regs->vready_after_vcount0 = 0;
++ } else {
++ if (vstartup_start
++ - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
++ <= vblank_end)
++ disp_dlg_regs->vready_after_vcount0 = 1;
++ else
++ disp_dlg_regs->vready_after_vcount0 = 0;
++ }
++
++ // TODO: Where is this coming from?
++ if (interlaced)
++ vstartup_start = vstartup_start / 2;
++
++ // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp?
++ if (vstartup_start >= min_vblank) {
++ dml_print("WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n",
++ __func__,
++ vblank_start,
++ vblank_end);
++ dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
++ __func__,
++ vstartup_start,
++ min_vblank);
++ min_vblank = vstartup_start + 1;
++ dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
++ __func__,
++ vstartup_start,
++ min_vblank);
++ }
++
++ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
++ dml_print("DML_DLG: %s: pixel_rate_delay_subtotal = %d\n",
++ __func__,
++ pixel_rate_delay_subtotal);
++ dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n",
++ __func__,
++ dst_x_after_scaler);
++ dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n",
++ __func__,
++ dst_y_after_scaler);
++
++ // Lwait
++ line_wait = mode_lib->soc.urgent_latency_us;
++ if (cstate_en)
++ line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait);
++ if (pstate_en)
++ line_wait = dml_max(mode_lib->soc.dram_clock_change_latency_us
++ + mode_lib->soc.urgent_latency_us,
++ line_wait);
++ line_wait = line_wait / line_time_in_us;
++
++ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
++
++ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ min_dst_y_per_vm_vblank = 8.0;
++ min_dst_y_per_row_vblank = 16.0;
++
++ // magic!
++ if (htotal <= 75) {
++ min_vblank = 300;
++ min_dst_y_per_vm_vblank = 100.0;
++ min_dst_y_per_row_vblank = 100.0;
++ }
++
++ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
++ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
++
++ ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank);
++ ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank);
++
++ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
++ lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank);
++
++ dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw);
++
++ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++
++ dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l);
++ dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c);
++
++ // Active
++ req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub;
++ req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub;
++ meta_row_height_l = rq_dlg_param.rq_l.meta_row_height;
++ meta_row_height_c = rq_dlg_param.rq_c.meta_row_height;
++ swath_width_pixels_ub_l = 0;
++ swath_width_pixels_ub_c = 0;
++ scaler_rec_in_width_l = 0;
++ scaler_rec_in_width_c = 0;
++ dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height;
++ dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height;
++
++ if (mode_422) {
++ swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
++ swath_width_pixels_ub_c = swath_width_ub_c * 2;
++ } else {
++ swath_width_pixels_ub_l = swath_width_ub_l * 1;
++ swath_width_pixels_ub_c = swath_width_ub_c * 1;
++ }
++
++ hscale_pixel_rate_l = 0.;
++ hscale_pixel_rate_c = 0.;
++ min_hratio_fact_l = 1.0;
++ min_hratio_fact_c = 1.0;
++
++ if (htaps_l <= 1)
++ min_hratio_fact_l = 2.0;
++ else if (htaps_l <= 6) {
++ if ((hratio_l * 2.0) > 4.0)
++ min_hratio_fact_l = 4.0;
++ else
++ min_hratio_fact_l = hratio_l * 2.0;
++ } else {
++ if (hratio_l > 4.0)
++ min_hratio_fact_l = 4.0;
++ else
++ min_hratio_fact_l = hratio_l;
++ }
++
++ hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
++
++ if (htaps_c <= 1)
++ min_hratio_fact_c = 2.0;
++ else if (htaps_c <= 6) {
++ if ((hratio_c * 2.0) > 4.0)
++ min_hratio_fact_c = 4.0;
++ else
++ min_hratio_fact_c = hratio_c * 2.0;
++ } else {
++ if (hratio_c > 4.0)
++ min_hratio_fact_c = 4.0;
++ else
++ min_hratio_fact_c = hratio_c;
++ }
++
++ hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
++
++ refcyc_per_line_delivery_pre_l = 0.;
++ refcyc_per_line_delivery_pre_c = 0.;
++ refcyc_per_line_delivery_l = 0.;
++ refcyc_per_line_delivery_c = 0.;
++
++ refcyc_per_req_delivery_pre_l = 0.;
++ refcyc_per_req_delivery_pre_c = 0.;
++ refcyc_per_req_delivery_l = 0.;
++ refcyc_per_req_delivery_c = 0.;
++
++ full_recout_width = 0;
++ // In ODM
++ if (src->is_hsplit) {
++ // This "hack" is only allowed (and valid) for MPC combine. In ODM
++ // combine, you MUST specify the full_recout_width...according to Oswin
++ if (dst->full_recout_width == 0 && !dst->odm_combine) {
++ dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n",
++ __func__);
++ full_recout_width = dst->recout_width * 2; // assume half split for dcn1
++ } else
++ full_recout_width = dst->full_recout_width;
++ } else
++ full_recout_width = dst->recout_width;
++
++ // As of DCN2, mpc_combine and odm_combine are mutually exclusive
++ refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_l,
++ hscale_pixel_rate_l,
++ swath_width_pixels_ub_l,
++ 1); // per line
++
++ refcyc_per_line_delivery_l = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_l,
++ hscale_pixel_rate_l,
++ swath_width_pixels_ub_l,
++ 1); // per line
++
++ dml_print("DML_DLG: %s: full_recout_width = %d\n",
++ __func__,
++ full_recout_width);
++ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n",
++ __func__,
++ hscale_pixel_rate_l);
++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_pre_l);
++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_l);
++
++ if (dual_plane) {
++ refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_c,
++ hscale_pixel_rate_c,
++ swath_width_pixels_ub_c,
++ 1); // per line
++
++ refcyc_per_line_delivery_c = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_c,
++ hscale_pixel_rate_c,
++ swath_width_pixels_ub_c,
++ 1); // per line
++
++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_pre_c);
++ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n",
++ __func__,
++ refcyc_per_line_delivery_c);
++ }
++
++ // TTU - Luma / Chroma
++ if (access_dir) { // vertical access
++ scaler_rec_in_width_l = vp_height_l;
++ scaler_rec_in_width_c = vp_height_c;
++ } else {
++ scaler_rec_in_width_l = vp_width_l;
++ scaler_rec_in_width_c = vp_width_c;
++ }
++
++ refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_l,
++ hscale_pixel_rate_l,
++ scaler_rec_in_width_l,
++ req_per_swath_ub_l); // per req
++ refcyc_per_req_delivery_l = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_l,
++ hscale_pixel_rate_l,
++ scaler_rec_in_width_l,
++ req_per_swath_ub_l); // per req
++
++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_pre_l);
++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_l);
++
++ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
++ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
++
++ if (dual_plane) {
++ refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_pre_c,
++ hscale_pixel_rate_c,
++ scaler_rec_in_width_c,
++ req_per_swath_ub_c); // per req
++ refcyc_per_req_delivery_c = get_refcyc_per_delivery(mode_lib,
++ refclk_freq_in_mhz,
++ pclk_freq_in_mhz,
++ dst->odm_combine,
++ full_recout_width,
++ dst->hactive,
++ vratio_c,
++ hscale_pixel_rate_c,
++ scaler_rec_in_width_c,
++ req_per_swath_ub_c); // per req
++
++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_pre_c);
++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n",
++ __func__,
++ refcyc_per_req_delivery_c);
++
++ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
++ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
++ }
++
++ // XFC
++ xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
++ xfc_precharge_delay = get_xfc_precharge_delay(mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency(mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++ xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency;
++ xfc_prefetch_margin = get_xfc_prefetch_margin(mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx);
++
++ // TTU - Cursor
++ refcyc_per_req_delivery_pre_cur0 = 0.0;
++ refcyc_per_req_delivery_cur0 = 0.0;
++ if (src->num_cursors > 0) {
++ calculate_ttu_cursor(mode_lib,
++ &refcyc_per_req_delivery_pre_cur0,
++ &refcyc_per_req_delivery_cur0,
++ refclk_freq_in_mhz,
++ ref_freq_to_pix_freq,
++ hscale_pixel_rate_l,
++ scl->hscl_ratio,
++ vratio_pre_l,
++ vratio_l,
++ src->cur0_src_width,
++ (enum cursor_bpp)(src->cur0_bpp));
++ }
++
++ refcyc_per_req_delivery_pre_cur1 = 0.0;
++ refcyc_per_req_delivery_cur1 = 0.0;
++ if (src->num_cursors > 1) {
++ calculate_ttu_cursor(mode_lib,
++ &refcyc_per_req_delivery_pre_cur1,
++ &refcyc_per_req_delivery_cur1,
++ refclk_freq_in_mhz,
++ ref_freq_to_pix_freq,
++ hscale_pixel_rate_l,
++ scl->hscl_ratio,
++ vratio_pre_l,
++ vratio_l,
++ src->cur1_src_width,
++ (enum cursor_bpp)(src->cur1_bpp));
++ }
++
++ // TTU - Misc
++ // all hard-coded
++
++ // Assignment to register structures
++ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
++ disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
++ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13));
++ disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
++ disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
++
++ disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
++ disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
++
++ disp_dlg_regs->refcyc_per_pte_group_vblank_l =
++ (unsigned int) (dst_y_per_row_vblank * (double) htotal
++ * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13));
++
++ if (dual_plane) {
++ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank
++ * (double) htotal * ref_freq_to_pix_freq
++ / (double) dpte_groups_per_row_ub_c);
++ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c
++ < (unsigned int) dml_pow(2, 13));
++ }
++
++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l =
++ (unsigned int) (dst_y_per_row_vblank * (double) htotal
++ * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
++ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13));
++
++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c =
++ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
++
++ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
++ * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l;
++ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
++ * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l;
++
++ if (dual_plane) {
++ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip
++ * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c;
++ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip
++ * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c;
++ }
++
++ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l
++ / (double) vratio_l * dml_pow(2, 2));
++ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17));
++
++ if (dual_plane) {
++ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c
++ / (double) vratio_c * dml_pow(2, 2));
++ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
++ dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n",
++ __func__,
++ disp_dlg_regs->dst_y_per_pte_row_nom_c,
++ (unsigned int) dml_pow(2, 17) - 1);
++ }
++ }
++
++ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l
++ / (double) vratio_l * dml_pow(2, 2));
++ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17));
++
++ disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
++
++ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l
++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
++ / (double) dpte_groups_per_row_ub_l);
++ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l
++ / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
++ / (double) meta_chunks_per_row_ub_l);
++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
++
++ if (dual_plane) {
++ disp_dlg_regs->refcyc_per_pte_group_nom_c =
++ (unsigned int) ((double) dpte_row_height_c / (double) vratio_c
++ * (double) htotal * ref_freq_to_pix_freq
++ / (double) dpte_groups_per_row_ub_c);
++ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
++
++ // TODO: Is this the right calculation? Does htotal need to be halved?
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c =
++ (unsigned int) ((double) meta_row_height_c / (double) vratio_c
++ * (double) htotal * ref_freq_to_pix_freq
++ / (double) meta_chunks_per_row_ub_c);
++ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
++ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
++ }
++
++ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l,
++ 1);
++ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l,
++ 1);
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13));
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13));
++
++ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c,
++ 1);
++ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c,
++ 1);
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13));
++ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13));
++
++ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
++ disp_dlg_regs->dst_y_offset_cur0 = 0;
++ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
++ disp_dlg_regs->dst_y_offset_cur1 = 0;
++
++ disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay;
++ disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay;
++ disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency;
++ disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil(xfc_prefetch_margin * refclk_freq_in_mhz,
++ 1);
++
++ // slave has to have this value also set to off
++ if (src->xfc_enable && !src->xfc_slave)
++ disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1);
++ else
++ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
++
++ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 =
++ (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0
++ * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 =
++ (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10));
++ disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1
++ * dml_pow(2, 10));
++ disp_ttu_regs->qos_level_low_wm = 0;
++ ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
++ disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal
++ * ref_freq_to_pix_freq);
++ /*ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));*/
++
++ disp_ttu_regs->qos_level_flip = 14;
++ disp_ttu_regs->qos_level_fixed_l = 8;
++ disp_ttu_regs->qos_level_fixed_c = 8;
++ disp_ttu_regs->qos_level_fixed_cur0 = 8;
++ disp_ttu_regs->qos_ramp_disable_l = 0;
++ disp_ttu_regs->qos_ramp_disable_c = 0;
++ disp_ttu_regs->qos_ramp_disable_cur0 = 0;
++
++ disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
++ ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
++
++ print__ttu_regs_st(mode_lib, *disp_ttu_regs);
++ print__dlg_regs_st(mode_lib, *disp_dlg_regs);
++}
++
++void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
++ display_dlg_regs_st *dlg_regs,
++ display_ttu_regs_st *ttu_regs,
++ display_e2e_pipe_params_st *e2e_pipe_param,
++ const unsigned int num_pipes,
++ const unsigned int pipe_idx,
++ const bool cstate_en,
++ const bool pstate_en,
++ const bool vm_en,
++ const bool ignore_viewport_pos,
++ const bool immediate_flip_support)
++{
++ display_rq_params_st rq_param = {0};
++ display_dlg_sys_params_st dlg_sys_param = {0};
++
++ // Get watermark and Tex.
++ dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib,
++ e2e_pipe_param,
++ num_pipes);
++ dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes);
++ dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib,
++ e2e_pipe_param,
++ num_pipes);
++ dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
++ e2e_pipe_param,
++ num_pipes);
++ dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
++ / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
++
++ print__dlg_sys_params_st(mode_lib, dlg_sys_param);
++
++ // system parameter calculation done
++
++ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
++ dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe.src);
++ dml20v2_rq_dlg_get_dlg_params(mode_lib,
++ e2e_pipe_param,
++ num_pipes,
++ pipe_idx,
++ dlg_regs,
++ ttu_regs,
++ rq_param.dlg,
++ dlg_sys_param,
++ cstate_en,
++ pstate_en);
++ dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
++}
++
++static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
++ double *refcyc_per_req_delivery_pre_cur,
++ double *refcyc_per_req_delivery_cur,
++ double refclk_freq_in_mhz,
++ double ref_freq_to_pix_freq,
++ double hscale_pixel_rate_l,
++ double hscl_ratio,
++ double vratio_pre_l,
++ double vratio_l,
++ unsigned int cur_width,
++ enum cursor_bpp cur_bpp)
++{
++ unsigned int cur_src_width = cur_width;
++ unsigned int cur_req_size = 0;
++ unsigned int cur_req_width = 0;
++ double cur_width_ub = 0.0;
++ double cur_req_per_width = 0.0;
++ double hactive_cur = 0.0;
++
++ ASSERT(cur_src_width <= 256);
++
++ *refcyc_per_req_delivery_pre_cur = 0.0;
++ *refcyc_per_req_delivery_cur = 0.0;
++ if (cur_src_width > 0) {
++ unsigned int cur_bit_per_pixel = 0;
++
++ if (cur_bpp == dm_cur_2bit) {
++ cur_req_size = 64; // byte
++ cur_bit_per_pixel = 2;
++ } else { // 32bit
++ cur_bit_per_pixel = 32;
++ if (cur_src_width >= 1 && cur_src_width <= 16)
++ cur_req_size = 64;
++ else if (cur_src_width >= 17 && cur_src_width <= 31)
++ cur_req_size = 128;
++ else
++ cur_req_size = 256;
++ }
++
++ cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0);
++ cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1)
++ * (double) cur_req_width;
++ cur_req_per_width = cur_width_ub / (double) cur_req_width;
++ hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
++
++ if (vratio_pre_l <= 1.0) {
++ *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq
++ / (double) cur_req_per_width;
++ } else {
++ *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz
++ * (double) cur_src_width / hscale_pixel_rate_l
++ / (double) cur_req_per_width;
++ }
++
++ ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13));
++
++ if (vratio_l <= 1.0) {
++ *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq
++ / (double) cur_req_per_width;
++ } else {
++ *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz
++ * (double) cur_src_width / hscale_pixel_rate_l
++ / (double) cur_req_per_width;
++ }
++
++ dml_print("DML_DLG: %s: cur_req_width = %d\n",
++ __func__,
++ cur_req_width);
++ dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n",
++ __func__,
++ cur_width_ub);
++ dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n",
++ __func__,
++ cur_req_per_width);
++ dml_print("DML_DLG: %s: hactive_cur = %3.2f\n",
++ __func__,
++ hactive_cur);
++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n",
++ __func__,
++ *refcyc_per_req_delivery_pre_cur);
++ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n",
++ __func__,
++ *refcyc_per_req_delivery_cur);
++
++ ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13));
++ }
++}
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
+new file mode 100644
+index 000000000000..0378406bf7e7
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
+@@ -0,0 +1,74 @@
++/*
++ * Copyright 2018 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#ifndef __DML20V2_DISPLAY_RQ_DLG_CALC_H__
++#define __DML20V2_DISPLAY_RQ_DLG_CALC_H__
++
++#include "../dml_common_defs.h"
++#include "../display_rq_dlg_helpers.h"
++
++struct display_mode_lib;
++
++
++// Function: dml_rq_dlg_get_rq_reg
++// Main entry point for test to get the register values out of this DML class.
++// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate
++// and then populate the rq_regs struct
++// Input:
++// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.)
++// Output:
++// rq_regs - struct that holds all the RQ registers field value.
++// See also: <display_rq_regs_st>
++void dml20v2_rq_dlg_get_rq_reg(
++ struct display_mode_lib *mode_lib,
++ display_rq_regs_st *rq_regs,
++ const display_pipe_params_st pipe_param);
++
++
++// Function: dml_rq_dlg_get_dlg_reg
++// Calculate and return DLG and TTU register struct given the system setting
++// Output:
++// dlg_regs - output DLG register struct
++// ttu_regs - output DLG TTU register struct
++// Input:
++// e2e_pipe_param - "compacted" array of e2e pipe param struct
++// num_pipes - num of active "pipe" or "route"
++// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
++// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
++// Added for legacy or unrealistic timing tests.
++void dml20v2_rq_dlg_get_dlg_reg(
++ struct display_mode_lib *mode_lib,
++ display_dlg_regs_st *dlg_regs,
++ display_ttu_regs_st *ttu_regs,
++ display_e2e_pipe_params_st *e2e_pipe_param,
++ const unsigned int num_pipes,
++ const unsigned int pipe_idx,
++ const bool cstate_en,
++ const bool pstate_en,
++ const bool vm_en,
++ const bool ignore_viewport_pos,
++ const bool immediate_flip_support);
++
++#endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+index 91810c7d5cf5..96dfcd8c36bc 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+@@ -28,6 +28,8 @@
+ #if defined(CONFIG_DRM_AMD_DC_DCN2_0)
+ #include "dcn20/display_mode_vba_20.h"
+ #include "dcn20/display_rq_dlg_calc_20.h"
++#include "dcn20/display_mode_vba_20v2.h"
++#include "dcn20/display_rq_dlg_calc_20v2.h"
+ #endif
+
+ #if defined(CONFIG_DRM_AMD_DC_DCN2_0)
+@@ -37,6 +39,13 @@ const struct dml_funcs dml20_funcs = {
+ .rq_dlg_get_dlg_reg = dml20_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg = dml20_rq_dlg_get_rq_reg
+ };
++
++const struct dml_funcs dml20v2_funcs = {
++ .validate = dml20v2_ModeSupportAndSystemConfigurationFull,
++ .recalculate = dml20v2_recalculate,
++ .rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg,
++ .rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg
++};
+ #endif
+
+ void dml_init_instance(struct display_mode_lib *lib,
+@@ -52,6 +61,9 @@ void dml_init_instance(struct display_mode_lib *lib,
+ case DML_PROJECT_NAVI10:
+ lib->funcs = dml20_funcs;
+ break;
++ case DML_PROJECT_NAVI10v2:
++ lib->funcs = dml20v2_funcs;
++ break;
+ #endif
+ default:
+ break;
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+index 5bf13d67f289..870716e3c132 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+@@ -36,6 +36,7 @@ enum dml_project {
+ DML_PROJECT_RAVEN1,
+ #ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ DML_PROJECT_NAVI10,
++ DML_PROJECT_NAVI10v2,
+ #endif
+ };
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+index 5678472546ab..ab34fd26702f 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+@@ -100,6 +100,7 @@ struct _vcs_dpi_soc_bounding_box_st {
+ unsigned int vmm_page_size_bytes;
+ unsigned int hostvm_min_page_size_bytes;
+ double dram_clock_change_latency_us;
++ double dummy_pstate_latency_us;
+ double writeback_dram_clock_change_latency_us;
+ unsigned int return_bus_width_bytes;
+ unsigned int voltage_override;
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+index 4d2a1262d9db..88e63f16f7fc 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+@@ -568,6 +568,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
+ if (src->is_hsplit) {
+ for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) {
+ display_pipe_source_params_st *src_k = &pipes[k].pipe.src;
++ display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest;
+
+ if (src_k->is_hsplit && !visited[k]
+ && src->hsplit_grp == src_k->hsplit_grp) {
+@@ -575,12 +576,15 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
+ mode_lib->vba.NumberOfActivePlanes;
+ mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++;
+ if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes]
+- == dm_horz)
++ == dm_horz) {
+ mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] +=
+ src_k->viewport_width;
+- else
++ mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] +=
++ dst_k->recout_width;
++ } else {
+ mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] +=
+ src_k->viewport_height;
++ }
+
+ visited[k] = true;
+ }
+--
+2.17.1
+